2 * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1998-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: lex.c,v 1.66.2.6.2.8 2004/08/28 06:25:21 marka Exp $ */
26 #include <isc/buffer.h>
31 #include <isc/parseint.h>
32 #include <isc/print.h>
33 #include <isc/stdio.h>
34 #include <isc/string.h>
37 typedef struct inputsource {
39 isc_boolean_t is_file;
40 isc_boolean_t need_close;
42 isc_buffer_t * pushback;
47 unsigned long saved_line;
48 ISC_LINK(struct inputsource) link;
51 #define LEX_MAGIC ISC_MAGIC('L', 'e', 'x', '!')
52 #define VALID_LEX(l) ISC_MAGIC_VALID(l, LEX_MAGIC)
60 unsigned int comments;
61 isc_boolean_t comment_ok;
62 isc_boolean_t last_was_eol;
63 unsigned int paren_count;
64 unsigned int saved_paren_count;
65 isc_lexspecials_t specials;
66 LIST(struct inputsource) sources;
69 static inline isc_result_t
70 grow_data(isc_lex_t *lex, size_t *remainingp, char **currp, char **prevp) {
73 new = isc_mem_get(lex->mctx, lex->max_token * 2 + 1);
75 return (ISC_R_NOMEMORY);
76 memcpy(new, lex->data, lex->max_token + 1);
77 *currp = new + (*currp - lex->data);
79 *prevp = new + (*prevp - lex->data);
80 isc_mem_put(lex->mctx, lex->data, lex->max_token + 1);
82 *remainingp += lex->max_token;
84 return (ISC_R_SUCCESS);
88 isc_lex_create(isc_mem_t *mctx, size_t max_token, isc_lex_t **lexp) {
95 REQUIRE(lexp != NULL && *lexp == NULL);
96 REQUIRE(max_token > 0U);
98 lex = isc_mem_get(mctx, sizeof(*lex));
100 return (ISC_R_NOMEMORY);
101 lex->data = isc_mem_get(mctx, max_token + 1);
102 if (lex->data == NULL) {
103 isc_mem_put(mctx, lex, sizeof(*lex));
104 return (ISC_R_NOMEMORY);
107 lex->max_token = max_token;
109 lex->comment_ok = ISC_TRUE;
110 lex->last_was_eol = ISC_TRUE;
111 lex->paren_count = 0;
112 lex->saved_paren_count = 0;
113 memset(lex->specials, 0, 256);
114 INIT_LIST(lex->sources);
115 lex->magic = LEX_MAGIC;
119 return (ISC_R_SUCCESS);
123 isc_lex_destroy(isc_lex_t **lexp) {
130 REQUIRE(lexp != NULL);
132 REQUIRE(VALID_LEX(lex));
134 while (!EMPTY(lex->sources))
135 RUNTIME_CHECK(isc_lex_close(lex) == ISC_R_SUCCESS);
136 if (lex->data != NULL)
137 isc_mem_put(lex->mctx, lex->data, lex->max_token + 1);
139 isc_mem_put(lex->mctx, lex, sizeof(*lex));
145 isc_lex_getcomments(isc_lex_t *lex) {
147 * Return the current lexer commenting styles.
150 REQUIRE(VALID_LEX(lex));
152 return (lex->comments);
156 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments) {
158 * Set allowed lexer commenting styles.
161 REQUIRE(VALID_LEX(lex));
163 lex->comments = comments;
167 isc_lex_getspecials(isc_lex_t *lex, isc_lexspecials_t specials) {
169 * Put the current list of specials into 'specials'.
172 REQUIRE(VALID_LEX(lex));
174 memcpy(specials, lex->specials, 256);
178 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials) {
180 * The characters in 'specials' are returned as tokens. Along with
181 * whitespace, they delimit strings and numbers.
184 REQUIRE(VALID_LEX(lex));
186 memcpy(lex->specials, specials, 256);
189 static inline isc_result_t
190 new_source(isc_lex_t *lex, isc_boolean_t is_file, isc_boolean_t need_close,
191 void *input, const char *name)
196 source = isc_mem_get(lex->mctx, sizeof(*source));
198 return (ISC_R_NOMEMORY);
199 source->result = ISC_R_SUCCESS;
200 source->is_file = is_file;
201 source->need_close = need_close;
202 source->at_eof = ISC_FALSE;
203 source->input = input;
204 source->name = isc_mem_strdup(lex->mctx, name);
205 if (source->name == NULL) {
206 isc_mem_put(lex->mctx, source, sizeof(*source));
207 return (ISC_R_NOMEMORY);
209 source->pushback = NULL;
210 result = isc_buffer_allocate(lex->mctx, &source->pushback,
212 if (result != ISC_R_SUCCESS) {
213 isc_mem_free(lex->mctx, source->name);
214 isc_mem_put(lex->mctx, source, sizeof(*source));
219 ISC_LIST_INITANDPREPEND(lex->sources, source, link);
221 return (ISC_R_SUCCESS);
225 isc_lex_openfile(isc_lex_t *lex, const char *filename) {
230 * Open 'filename' and make it the current input source for 'lex'.
233 REQUIRE(VALID_LEX(lex));
235 result = isc_stdio_open(filename, "r", &stream);
236 if (result != ISC_R_SUCCESS)
239 result = new_source(lex, ISC_TRUE, ISC_TRUE, stream, filename);
240 if (result != ISC_R_SUCCESS)
241 (void)fclose(stream);
246 isc_lex_openstream(isc_lex_t *lex, FILE *stream) {
250 * Make 'stream' the current input source for 'lex'.
253 REQUIRE(VALID_LEX(lex));
255 snprintf(name, sizeof(name), "stream-%p", stream);
257 return (new_source(lex, ISC_TRUE, ISC_FALSE, stream, name));
261 isc_lex_openbuffer(isc_lex_t *lex, isc_buffer_t *buffer) {
265 * Make 'buffer' the current input source for 'lex'.
268 REQUIRE(VALID_LEX(lex));
270 snprintf(name, sizeof(name), "buffer-%p", buffer);
272 return (new_source(lex, ISC_FALSE, ISC_FALSE, buffer, name));
276 isc_lex_close(isc_lex_t *lex) {
280 * Close the most recently opened object (i.e. file or buffer).
283 REQUIRE(VALID_LEX(lex));
285 source = HEAD(lex->sources);
287 return (ISC_R_NOMORE);
289 ISC_LIST_UNLINK(lex->sources, source, link);
290 if (source->is_file) {
291 if (source->need_close)
292 (void)fclose((FILE *)(source->input));
294 isc_mem_free(lex->mctx, source->name);
295 isc_buffer_free(&source->pushback);
296 isc_mem_put(lex->mctx, source, sizeof(*source));
298 return (ISC_R_SUCCESS);
306 lexstate_maybecomment,
308 lexstate_ccommentend,
313 #define IWSEOL (ISC_LEXOPT_INITIALWS | ISC_LEXOPT_EOL)
316 pushback(inputsource *source, int c) {
317 REQUIRE(source->pushback->current > 0);
319 source->at_eof = ISC_FALSE;
322 source->pushback->current--;
328 pushandgrow(isc_lex_t *lex, inputsource *source, int c) {
329 if (isc_buffer_availablelength(source->pushback) == 0) {
330 isc_buffer_t *tbuf = NULL;
335 oldlen = isc_buffer_length(source->pushback);
336 result = isc_buffer_allocate(lex->mctx, &tbuf, oldlen * 2);
337 if (result != ISC_R_SUCCESS)
339 isc_buffer_usedregion(source->pushback, &used);
340 result = isc_buffer_copyregion(tbuf, &used);
341 INSIST(result == ISC_R_SUCCESS);
342 tbuf->current = source->pushback->current;
343 isc_buffer_free(&source->pushback);
344 source->pushback = tbuf;
346 isc_buffer_putuint8(source->pushback, (isc_uint8_t)c);
347 return (ISC_R_SUCCESS);
351 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp) {
354 isc_boolean_t done = ISC_FALSE;
355 isc_boolean_t no_comments = ISC_FALSE;
356 isc_boolean_t escaped = ISC_FALSE;
357 lexstate state = lexstate_start;
358 lexstate saved_state = lexstate_start;
359 isc_buffer_t *buffer;
363 isc_uint32_t as_ulong;
364 unsigned int saved_options;
368 * Get the next token.
371 REQUIRE(VALID_LEX(lex));
372 source = HEAD(lex->sources);
373 REQUIRE(tokenp != NULL);
375 lex->saved_paren_count = lex->paren_count;
376 source->saved_line = source->line;
378 if (source == NULL) {
379 if ((options & ISC_LEXOPT_NOMORE) != 0) {
380 tokenp->type = isc_tokentype_nomore;
381 return (ISC_R_SUCCESS);
383 return (ISC_R_NOMORE);
386 if (source->result != ISC_R_SUCCESS)
387 return (source->result);
389 if (isc_buffer_remaininglength(source->pushback) == 0 &&
392 if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 &&
393 lex->paren_count != 0) {
394 lex->paren_count = 0;
395 return (ISC_R_UNBALANCED);
397 if ((options & ISC_LEXOPT_EOF) != 0) {
398 tokenp->type = isc_tokentype_eof;
399 return (ISC_R_SUCCESS);
404 isc_buffer_compact(source->pushback);
406 saved_options = options;
407 if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 && lex->paren_count > 0)
414 remaining = lex->max_token;
416 #ifdef HAVE_FLOCKFILE
418 flockfile(source->input);
422 if (isc_buffer_remaininglength(source->pushback) == 0) {
423 if (source->is_file) {
424 stream = source->input;
426 #if defined(HAVE_FLOCKFILE) && defined(HAVE_GETCUNLOCKED)
427 c = getc_unlocked(stream);
432 if (ferror(stream)) {
433 source->result = ISC_R_IOERROR;
434 result = source->result;
437 source->at_eof = ISC_TRUE;
440 buffer = source->input;
442 if (buffer->current == buffer->used) {
444 source->at_eof = ISC_TRUE;
446 c = *((char *)buffer->base +
452 source->result = pushandgrow(lex, source, c);
453 if (source->result != ISC_R_SUCCESS) {
454 result = source->result;
460 if (!source->at_eof) {
461 if (state == lexstate_start)
462 /* Token has not started yet. */
464 isc_buffer_consumedlength(source->pushback);
465 c = isc_buffer_getuint8(source->pushback);
473 if (lex->comment_ok && !no_comments) {
474 if (!escaped && c == ';' &&
475 ((lex->comments & ISC_LEXCOMMENT_DNSMASTERFILE)
478 state = lexstate_eatline;
479 no_comments = ISC_TRUE;
481 } else if (c == '/' &&
484 ISC_LEXCOMMENT_CPLUSPLUS)) != 0) {
486 state = lexstate_maybecomment;
487 no_comments = ISC_TRUE;
489 } else if (c == '#' &&
490 ((lex->comments & ISC_LEXCOMMENT_SHELL)
493 state = lexstate_eatline;
494 no_comments = ISC_TRUE;
500 /* INSIST(c == EOF || (c >= 0 && c <= 255)); */
504 lex->last_was_eol = ISC_FALSE;
505 if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 &&
506 lex->paren_count != 0) {
507 lex->paren_count = 0;
508 result = ISC_R_UNBALANCED;
511 if ((options & ISC_LEXOPT_EOF) == 0) {
515 tokenp->type = isc_tokentype_eof;
517 } else if (c == ' ' || c == '\t') {
518 if (lex->last_was_eol &&
519 (options & ISC_LEXOPT_INITIALWS)
521 lex->last_was_eol = ISC_FALSE;
522 tokenp->type = isc_tokentype_initialws;
523 tokenp->value.as_char = c;
526 } else if (c == '\n') {
527 if ((options & ISC_LEXOPT_EOL) != 0) {
528 tokenp->type = isc_tokentype_eol;
531 lex->last_was_eol = ISC_TRUE;
532 } else if (c == '\r') {
533 if ((options & ISC_LEXOPT_EOL) != 0)
534 state = lexstate_crlf;
535 } else if (c == '"' &&
536 (options & ISC_LEXOPT_QSTRING) != 0) {
537 lex->last_was_eol = ISC_FALSE;
538 no_comments = ISC_TRUE;
539 state = lexstate_qstring;
540 } else if (lex->specials[c]) {
541 lex->last_was_eol = ISC_FALSE;
542 if ((c == '(' || c == ')') &&
543 (options & ISC_LEXOPT_DNSMULTILINE) != 0) {
545 if (lex->paren_count == 0)
549 if (lex->paren_count == 0) {
550 result = ISC_R_UNBALANCED;
554 if (lex->paren_count == 0)
560 tokenp->type = isc_tokentype_special;
561 tokenp->value.as_char = c;
563 } else if (isdigit((unsigned char)c) &&
564 (options & ISC_LEXOPT_NUMBER) != 0) {
565 lex->last_was_eol = ISC_FALSE;
566 state = lexstate_number;
569 lex->last_was_eol = ISC_FALSE;
570 state = lexstate_string;
577 tokenp->type = isc_tokentype_eol;
579 lex->last_was_eol = ISC_TRUE;
581 case lexstate_number:
582 if (c == EOF || !isdigit((unsigned char)c)) {
583 if (c == ' ' || c == '\t' || c == '\r' ||
584 c == '\n' || c == EOF ||
587 if ((options & ISC_LEXOPT_CNUMBER) != 0)
593 result = isc_parse_uint32(&as_ulong,
596 if (result == ISC_R_SUCCESS) {
598 isc_tokentype_number;
599 tokenp->value.as_ulong =
601 } else if (result == ISC_R_BADNUMBER) {
605 isc_tokentype_string;
606 v = &(tokenp->value);
607 v->as_textregion.base =
609 v->as_textregion.length =
616 } else if (!(options & ISC_LEXOPT_CNUMBER) ||
617 ((c != 'x' && c != 'X') ||
618 (curr != &lex->data[1]) ||
619 (lex->data[0] != '0'))) {
620 /* Above test supports hex numbers */
621 state = lexstate_string;
624 if (remaining == 0U) {
625 result = grow_data(lex, &remaining,
627 if (result != ISC_R_SUCCESS)
630 INSIST(remaining > 0U);
635 case lexstate_string:
637 (c == ' ' || c == '\t' || lex->specials[c])) ||
638 c == '\r' || c == '\n' || c == EOF) {
640 if (source->result != ISC_R_SUCCESS) {
641 result = source->result;
644 tokenp->type = isc_tokentype_string;
645 tokenp->value.as_textregion.base = lex->data;
646 tokenp->value.as_textregion.length =
647 lex->max_token - remaining;
651 if ((options & ISC_LEXOPT_ESCAPE) != 0)
652 escaped = (!escaped && c == '\\') ?
653 ISC_TRUE : ISC_FALSE;
654 if (remaining == 0U) {
655 result = grow_data(lex, &remaining,
657 if (result != ISC_R_SUCCESS)
660 INSIST(remaining > 0U);
665 case lexstate_maybecomment:
667 (lex->comments & ISC_LEXCOMMENT_C) != 0) {
668 state = lexstate_ccomment;
670 } else if (c == '/' &&
671 (lex->comments & ISC_LEXCOMMENT_CPLUSPLUS) != 0) {
672 state = lexstate_eatline;
677 no_comments = ISC_FALSE;
680 case lexstate_ccomment:
682 result = ISC_R_UNEXPECTEDEND;
686 state = lexstate_ccommentend;
688 case lexstate_ccommentend:
690 result = ISC_R_UNEXPECTEDEND;
695 * C-style comments become a single space.
696 * We do this to ensure that a comment will
697 * act as a delimiter for strings and
701 no_comments = ISC_FALSE;
705 state = lexstate_ccomment;
707 case lexstate_eatline:
709 result = ISC_R_UNEXPECTEDEND;
713 no_comments = ISC_FALSE;
718 case lexstate_qstring:
720 result = ISC_R_UNEXPECTEDEND;
727 * Overwrite the preceding backslash.
729 INSIST(prev != NULL);
732 tokenp->type = isc_tokentype_qstring;
733 tokenp->value.as_textregion.base =
735 tokenp->value.as_textregion.length =
736 lex->max_token - remaining;
737 no_comments = ISC_FALSE;
741 if (c == '\n' && !escaped &&
742 (options & ISC_LEXOPT_QSTRINGMULTILINE) == 0) {
744 result = ISC_R_UNBALANCEDQUOTES;
747 if (c == '\\' && !escaped)
751 if (remaining == 0U) {
752 result = grow_data(lex, &remaining,
754 if (result != ISC_R_SUCCESS)
757 INSIST(remaining > 0U);
765 FATAL_ERROR(__FILE__, __LINE__,
766 isc_msgcat_get(isc_msgcat, ISC_MSGSET_LEX,
767 ISC_MSG_UNEXPECTEDSTATE,
768 "Unexpected state %d"),
770 /* Does not return. */
775 result = ISC_R_SUCCESS;
777 #ifdef HAVE_FLOCKFILE
779 funlockfile(source->input);
785 isc_lex_getmastertoken(isc_lex_t *lex, isc_token_t *token,
786 isc_tokentype_t expect, isc_boolean_t eol)
788 unsigned int options = ISC_LEXOPT_EOL | ISC_LEXOPT_EOF |
789 ISC_LEXOPT_DNSMULTILINE | ISC_LEXOPT_ESCAPE;
792 if (expect == isc_tokentype_qstring)
793 options |= ISC_LEXOPT_QSTRING;
794 else if (expect == isc_tokentype_number)
795 options |= ISC_LEXOPT_NUMBER;
796 result = isc_lex_gettoken(lex, options, token);
797 if (result == ISC_R_RANGE)
798 isc_lex_ungettoken(lex, token);
799 if (result != ISC_R_SUCCESS)
802 if (eol && ((token->type == isc_tokentype_eol) ||
803 (token->type == isc_tokentype_eof)))
804 return (ISC_R_SUCCESS);
805 if (token->type == isc_tokentype_string &&
806 expect == isc_tokentype_qstring)
807 return (ISC_R_SUCCESS);
808 if (token->type != expect) {
809 isc_lex_ungettoken(lex, token);
810 if (token->type == isc_tokentype_eol ||
811 token->type == isc_tokentype_eof)
812 return (ISC_R_UNEXPECTEDEND);
813 if (expect == isc_tokentype_number)
814 return (ISC_R_BADNUMBER);
815 return (ISC_R_UNEXPECTEDTOKEN);
817 return (ISC_R_SUCCESS);
821 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp) {
824 * Unget the current token.
827 REQUIRE(VALID_LEX(lex));
828 source = HEAD(lex->sources);
829 REQUIRE(source != NULL);
830 REQUIRE(tokenp != NULL);
831 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
832 tokenp->type == isc_tokentype_eof);
836 isc_buffer_first(source->pushback);
837 lex->paren_count = lex->saved_paren_count;
838 source->line = source->saved_line;
839 source->at_eof = ISC_FALSE;
843 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r)
847 REQUIRE(VALID_LEX(lex));
848 source = HEAD(lex->sources);
849 REQUIRE(source != NULL);
850 REQUIRE(tokenp != NULL);
851 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
852 tokenp->type == isc_tokentype_eof);
856 INSIST(source->ignored <= isc_buffer_consumedlength(source->pushback));
857 r->base = (unsigned char *)isc_buffer_base(source->pushback) +
859 r->length = isc_buffer_consumedlength(source->pushback) -
865 isc_lex_getsourcename(isc_lex_t *lex) {
868 REQUIRE(VALID_LEX(lex));
869 source = HEAD(lex->sources);
874 return (source->name);
878 isc_lex_getsourceline(isc_lex_t *lex) {
881 REQUIRE(VALID_LEX(lex));
882 source = HEAD(lex->sources);
887 return (source->line);
892 isc_lex_setsourcename(isc_lex_t *lex, const char *name) {
896 REQUIRE(VALID_LEX(lex));
897 source = HEAD(lex->sources);
900 return(ISC_R_NOTFOUND);
901 newname = isc_mem_strdup(lex->mctx, name);
903 return (ISC_R_NOMEMORY);
904 isc_mem_free(lex->mctx, source->name);
905 source->name = newname;
906 return (ISC_R_SUCCESS);
910 isc_lex_isfile(isc_lex_t *lex) {
913 REQUIRE(VALID_LEX(lex));
915 source = HEAD(lex->sources);
920 return (source->is_file);