2 * Copyright (C) 2004, 2006 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1998-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: lex.c,v 1.66.2.6.2.10 2006/01/04 23:50:21 marka Exp $ */
26 #include <isc/buffer.h>
31 #include <isc/parseint.h>
32 #include <isc/print.h>
33 #include <isc/stdio.h>
34 #include <isc/string.h>
37 typedef struct inputsource {
39 isc_boolean_t is_file;
40 isc_boolean_t need_close;
42 isc_buffer_t * pushback;
47 unsigned long saved_line;
48 ISC_LINK(struct inputsource) link;
51 #define LEX_MAGIC ISC_MAGIC('L', 'e', 'x', '!')
52 #define VALID_LEX(l) ISC_MAGIC_VALID(l, LEX_MAGIC)
60 unsigned int comments;
61 isc_boolean_t comment_ok;
62 isc_boolean_t last_was_eol;
63 unsigned int paren_count;
64 unsigned int saved_paren_count;
65 isc_lexspecials_t specials;
66 LIST(struct inputsource) sources;
69 static inline isc_result_t
70 grow_data(isc_lex_t *lex, size_t *remainingp, char **currp, char **prevp) {
73 new = isc_mem_get(lex->mctx, lex->max_token * 2 + 1);
75 return (ISC_R_NOMEMORY);
76 memcpy(new, lex->data, lex->max_token + 1);
77 *currp = new + (*currp - lex->data);
79 *prevp = new + (*prevp - lex->data);
80 isc_mem_put(lex->mctx, lex->data, lex->max_token + 1);
82 *remainingp += lex->max_token;
84 return (ISC_R_SUCCESS);
88 isc_lex_create(isc_mem_t *mctx, size_t max_token, isc_lex_t **lexp) {
95 REQUIRE(lexp != NULL && *lexp == NULL);
96 REQUIRE(max_token > 0U);
98 lex = isc_mem_get(mctx, sizeof(*lex));
100 return (ISC_R_NOMEMORY);
101 lex->data = isc_mem_get(mctx, max_token + 1);
102 if (lex->data == NULL) {
103 isc_mem_put(mctx, lex, sizeof(*lex));
104 return (ISC_R_NOMEMORY);
107 lex->max_token = max_token;
109 lex->comment_ok = ISC_TRUE;
110 lex->last_was_eol = ISC_TRUE;
111 lex->paren_count = 0;
112 lex->saved_paren_count = 0;
113 memset(lex->specials, 0, 256);
114 INIT_LIST(lex->sources);
115 lex->magic = LEX_MAGIC;
119 return (ISC_R_SUCCESS);
123 isc_lex_destroy(isc_lex_t **lexp) {
130 REQUIRE(lexp != NULL);
132 REQUIRE(VALID_LEX(lex));
134 while (!EMPTY(lex->sources))
135 RUNTIME_CHECK(isc_lex_close(lex) == ISC_R_SUCCESS);
136 if (lex->data != NULL)
137 isc_mem_put(lex->mctx, lex->data, lex->max_token + 1);
139 isc_mem_put(lex->mctx, lex, sizeof(*lex));
145 isc_lex_getcomments(isc_lex_t *lex) {
147 * Return the current lexer commenting styles.
150 REQUIRE(VALID_LEX(lex));
152 return (lex->comments);
156 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments) {
158 * Set allowed lexer commenting styles.
161 REQUIRE(VALID_LEX(lex));
163 lex->comments = comments;
167 isc_lex_getspecials(isc_lex_t *lex, isc_lexspecials_t specials) {
169 * Put the current list of specials into 'specials'.
172 REQUIRE(VALID_LEX(lex));
174 memcpy(specials, lex->specials, 256);
178 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials) {
180 * The characters in 'specials' are returned as tokens. Along with
181 * whitespace, they delimit strings and numbers.
184 REQUIRE(VALID_LEX(lex));
186 memcpy(lex->specials, specials, 256);
189 static inline isc_result_t
190 new_source(isc_lex_t *lex, isc_boolean_t is_file, isc_boolean_t need_close,
191 void *input, const char *name)
196 source = isc_mem_get(lex->mctx, sizeof(*source));
198 return (ISC_R_NOMEMORY);
199 source->result = ISC_R_SUCCESS;
200 source->is_file = is_file;
201 source->need_close = need_close;
202 source->at_eof = ISC_FALSE;
203 source->input = input;
204 source->name = isc_mem_strdup(lex->mctx, name);
205 if (source->name == NULL) {
206 isc_mem_put(lex->mctx, source, sizeof(*source));
207 return (ISC_R_NOMEMORY);
209 source->pushback = NULL;
210 result = isc_buffer_allocate(lex->mctx, &source->pushback,
212 if (result != ISC_R_SUCCESS) {
213 isc_mem_free(lex->mctx, source->name);
214 isc_mem_put(lex->mctx, source, sizeof(*source));
219 ISC_LIST_INITANDPREPEND(lex->sources, source, link);
221 return (ISC_R_SUCCESS);
225 isc_lex_openfile(isc_lex_t *lex, const char *filename) {
230 * Open 'filename' and make it the current input source for 'lex'.
233 REQUIRE(VALID_LEX(lex));
235 result = isc_stdio_open(filename, "r", &stream);
236 if (result != ISC_R_SUCCESS)
239 result = new_source(lex, ISC_TRUE, ISC_TRUE, stream, filename);
240 if (result != ISC_R_SUCCESS)
241 (void)fclose(stream);
246 isc_lex_openstream(isc_lex_t *lex, FILE *stream) {
250 * Make 'stream' the current input source for 'lex'.
253 REQUIRE(VALID_LEX(lex));
255 snprintf(name, sizeof(name), "stream-%p", stream);
257 return (new_source(lex, ISC_TRUE, ISC_FALSE, stream, name));
261 isc_lex_openbuffer(isc_lex_t *lex, isc_buffer_t *buffer) {
265 * Make 'buffer' the current input source for 'lex'.
268 REQUIRE(VALID_LEX(lex));
270 snprintf(name, sizeof(name), "buffer-%p", buffer);
272 return (new_source(lex, ISC_FALSE, ISC_FALSE, buffer, name));
276 isc_lex_close(isc_lex_t *lex) {
280 * Close the most recently opened object (i.e. file or buffer).
283 REQUIRE(VALID_LEX(lex));
285 source = HEAD(lex->sources);
287 return (ISC_R_NOMORE);
289 ISC_LIST_UNLINK(lex->sources, source, link);
290 if (source->is_file) {
291 if (source->need_close)
292 (void)fclose((FILE *)(source->input));
294 isc_mem_free(lex->mctx, source->name);
295 isc_buffer_free(&source->pushback);
296 isc_mem_put(lex->mctx, source, sizeof(*source));
298 return (ISC_R_SUCCESS);
306 lexstate_maybecomment,
308 lexstate_ccommentend,
313 #define IWSEOL (ISC_LEXOPT_INITIALWS | ISC_LEXOPT_EOL)
316 pushback(inputsource *source, int c) {
317 REQUIRE(source->pushback->current > 0);
319 source->at_eof = ISC_FALSE;
322 source->pushback->current--;
328 pushandgrow(isc_lex_t *lex, inputsource *source, int c) {
329 if (isc_buffer_availablelength(source->pushback) == 0) {
330 isc_buffer_t *tbuf = NULL;
335 oldlen = isc_buffer_length(source->pushback);
336 result = isc_buffer_allocate(lex->mctx, &tbuf, oldlen * 2);
337 if (result != ISC_R_SUCCESS)
339 isc_buffer_usedregion(source->pushback, &used);
340 result = isc_buffer_copyregion(tbuf, &used);
341 INSIST(result == ISC_R_SUCCESS);
342 tbuf->current = source->pushback->current;
343 isc_buffer_free(&source->pushback);
344 source->pushback = tbuf;
346 isc_buffer_putuint8(source->pushback, (isc_uint8_t)c);
347 return (ISC_R_SUCCESS);
351 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp) {
354 isc_boolean_t done = ISC_FALSE;
355 isc_boolean_t no_comments = ISC_FALSE;
356 isc_boolean_t escaped = ISC_FALSE;
357 lexstate state = lexstate_start;
358 lexstate saved_state = lexstate_start;
359 isc_buffer_t *buffer;
363 isc_uint32_t as_ulong;
364 unsigned int saved_options;
368 * Get the next token.
371 REQUIRE(VALID_LEX(lex));
372 source = HEAD(lex->sources);
373 REQUIRE(tokenp != NULL);
375 if (source == NULL) {
376 if ((options & ISC_LEXOPT_NOMORE) != 0) {
377 tokenp->type = isc_tokentype_nomore;
378 return (ISC_R_SUCCESS);
380 return (ISC_R_NOMORE);
383 if (source->result != ISC_R_SUCCESS)
384 return (source->result);
386 lex->saved_paren_count = lex->paren_count;
387 source->saved_line = source->line;
389 if (isc_buffer_remaininglength(source->pushback) == 0 &&
392 if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 &&
393 lex->paren_count != 0) {
394 lex->paren_count = 0;
395 return (ISC_R_UNBALANCED);
397 if ((options & ISC_LEXOPT_EOF) != 0) {
398 tokenp->type = isc_tokentype_eof;
399 return (ISC_R_SUCCESS);
404 isc_buffer_compact(source->pushback);
406 saved_options = options;
407 if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 && lex->paren_count > 0)
414 remaining = lex->max_token;
416 #ifdef HAVE_FLOCKFILE
418 flockfile(source->input);
422 if (isc_buffer_remaininglength(source->pushback) == 0) {
423 if (source->is_file) {
424 stream = source->input;
426 #if defined(HAVE_FLOCKFILE) && defined(HAVE_GETCUNLOCKED)
427 c = getc_unlocked(stream);
432 if (ferror(stream)) {
433 source->result = ISC_R_IOERROR;
434 result = source->result;
437 source->at_eof = ISC_TRUE;
440 buffer = source->input;
442 if (buffer->current == buffer->used) {
444 source->at_eof = ISC_TRUE;
446 c = *((char *)buffer->base +
452 source->result = pushandgrow(lex, source, c);
453 if (source->result != ISC_R_SUCCESS) {
454 result = source->result;
460 if (!source->at_eof) {
461 if (state == lexstate_start)
462 /* Token has not started yet. */
464 isc_buffer_consumedlength(source->pushback);
465 c = isc_buffer_getuint8(source->pushback);
473 if (lex->comment_ok && !no_comments) {
474 if (!escaped && c == ';' &&
475 ((lex->comments & ISC_LEXCOMMENT_DNSMASTERFILE)
478 state = lexstate_eatline;
479 no_comments = ISC_TRUE;
481 } else if (c == '/' &&
484 ISC_LEXCOMMENT_CPLUSPLUS)) != 0) {
486 state = lexstate_maybecomment;
487 no_comments = ISC_TRUE;
489 } else if (c == '#' &&
490 ((lex->comments & ISC_LEXCOMMENT_SHELL)
493 state = lexstate_eatline;
494 no_comments = ISC_TRUE;
500 /* INSIST(c == EOF || (c >= 0 && c <= 255)); */
504 lex->last_was_eol = ISC_FALSE;
505 if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 &&
506 lex->paren_count != 0) {
507 lex->paren_count = 0;
508 result = ISC_R_UNBALANCED;
511 if ((options & ISC_LEXOPT_EOF) == 0) {
515 tokenp->type = isc_tokentype_eof;
517 } else if (c == ' ' || c == '\t') {
518 if (lex->last_was_eol &&
519 (options & ISC_LEXOPT_INITIALWS)
521 lex->last_was_eol = ISC_FALSE;
522 tokenp->type = isc_tokentype_initialws;
523 tokenp->value.as_char = c;
526 } else if (c == '\n') {
527 if ((options & ISC_LEXOPT_EOL) != 0) {
528 tokenp->type = isc_tokentype_eol;
531 lex->last_was_eol = ISC_TRUE;
532 } else if (c == '\r') {
533 if ((options & ISC_LEXOPT_EOL) != 0)
534 state = lexstate_crlf;
535 } else if (c == '"' &&
536 (options & ISC_LEXOPT_QSTRING) != 0) {
537 lex->last_was_eol = ISC_FALSE;
538 no_comments = ISC_TRUE;
539 state = lexstate_qstring;
540 } else if (lex->specials[c]) {
541 lex->last_was_eol = ISC_FALSE;
542 if ((c == '(' || c == ')') &&
543 (options & ISC_LEXOPT_DNSMULTILINE) != 0) {
545 if (lex->paren_count == 0)
549 if (lex->paren_count == 0) {
550 result = ISC_R_UNBALANCED;
554 if (lex->paren_count == 0)
560 tokenp->type = isc_tokentype_special;
561 tokenp->value.as_char = c;
563 } else if (isdigit((unsigned char)c) &&
564 (options & ISC_LEXOPT_NUMBER) != 0) {
565 lex->last_was_eol = ISC_FALSE;
566 state = lexstate_number;
569 lex->last_was_eol = ISC_FALSE;
570 state = lexstate_string;
577 tokenp->type = isc_tokentype_eol;
579 lex->last_was_eol = ISC_TRUE;
581 case lexstate_number:
582 if (c == EOF || !isdigit((unsigned char)c)) {
583 if (c == ' ' || c == '\t' || c == '\r' ||
584 c == '\n' || c == EOF ||
587 if ((options & ISC_LEXOPT_CNUMBER) != 0)
593 result = isc_parse_uint32(&as_ulong,
596 if (result == ISC_R_SUCCESS) {
598 isc_tokentype_number;
599 tokenp->value.as_ulong =
601 } else if (result == ISC_R_BADNUMBER) {
605 isc_tokentype_string;
606 v = &(tokenp->value);
607 v->as_textregion.base =
609 v->as_textregion.length =
616 } else if (!(options & ISC_LEXOPT_CNUMBER) ||
617 ((c != 'x' && c != 'X') ||
618 (curr != &lex->data[1]) ||
619 (lex->data[0] != '0'))) {
620 /* Above test supports hex numbers */
621 state = lexstate_string;
624 if (remaining == 0U) {
625 result = grow_data(lex, &remaining,
627 if (result != ISC_R_SUCCESS)
630 INSIST(remaining > 0U);
635 case lexstate_string:
637 * EOF needs to be checked before lex->specials[c]
638 * as lex->specials[EOF] is not a good idea.
640 if (c == '\r' || c == '\n' || c == EOF ||
642 (c == ' ' || c == '\t' || lex->specials[c]))) {
644 if (source->result != ISC_R_SUCCESS) {
645 result = source->result;
648 tokenp->type = isc_tokentype_string;
649 tokenp->value.as_textregion.base = lex->data;
650 tokenp->value.as_textregion.length =
651 lex->max_token - remaining;
655 if ((options & ISC_LEXOPT_ESCAPE) != 0)
656 escaped = (!escaped && c == '\\') ?
657 ISC_TRUE : ISC_FALSE;
658 if (remaining == 0U) {
659 result = grow_data(lex, &remaining,
661 if (result != ISC_R_SUCCESS)
664 INSIST(remaining > 0U);
669 case lexstate_maybecomment:
671 (lex->comments & ISC_LEXCOMMENT_C) != 0) {
672 state = lexstate_ccomment;
674 } else if (c == '/' &&
675 (lex->comments & ISC_LEXCOMMENT_CPLUSPLUS) != 0) {
676 state = lexstate_eatline;
681 no_comments = ISC_FALSE;
684 case lexstate_ccomment:
686 result = ISC_R_UNEXPECTEDEND;
690 state = lexstate_ccommentend;
692 case lexstate_ccommentend:
694 result = ISC_R_UNEXPECTEDEND;
699 * C-style comments become a single space.
700 * We do this to ensure that a comment will
701 * act as a delimiter for strings and
705 no_comments = ISC_FALSE;
709 state = lexstate_ccomment;
711 case lexstate_eatline:
713 result = ISC_R_UNEXPECTEDEND;
717 no_comments = ISC_FALSE;
722 case lexstate_qstring:
724 result = ISC_R_UNEXPECTEDEND;
731 * Overwrite the preceding backslash.
733 INSIST(prev != NULL);
736 tokenp->type = isc_tokentype_qstring;
737 tokenp->value.as_textregion.base =
739 tokenp->value.as_textregion.length =
740 lex->max_token - remaining;
741 no_comments = ISC_FALSE;
745 if (c == '\n' && !escaped &&
746 (options & ISC_LEXOPT_QSTRINGMULTILINE) == 0) {
748 result = ISC_R_UNBALANCEDQUOTES;
751 if (c == '\\' && !escaped)
755 if (remaining == 0U) {
756 result = grow_data(lex, &remaining,
758 if (result != ISC_R_SUCCESS)
761 INSIST(remaining > 0U);
769 FATAL_ERROR(__FILE__, __LINE__,
770 isc_msgcat_get(isc_msgcat, ISC_MSGSET_LEX,
771 ISC_MSG_UNEXPECTEDSTATE,
772 "Unexpected state %d"),
774 /* Does not return. */
779 result = ISC_R_SUCCESS;
781 #ifdef HAVE_FLOCKFILE
783 funlockfile(source->input);
789 isc_lex_getmastertoken(isc_lex_t *lex, isc_token_t *token,
790 isc_tokentype_t expect, isc_boolean_t eol)
792 unsigned int options = ISC_LEXOPT_EOL | ISC_LEXOPT_EOF |
793 ISC_LEXOPT_DNSMULTILINE | ISC_LEXOPT_ESCAPE;
796 if (expect == isc_tokentype_qstring)
797 options |= ISC_LEXOPT_QSTRING;
798 else if (expect == isc_tokentype_number)
799 options |= ISC_LEXOPT_NUMBER;
800 result = isc_lex_gettoken(lex, options, token);
801 if (result == ISC_R_RANGE)
802 isc_lex_ungettoken(lex, token);
803 if (result != ISC_R_SUCCESS)
806 if (eol && ((token->type == isc_tokentype_eol) ||
807 (token->type == isc_tokentype_eof)))
808 return (ISC_R_SUCCESS);
809 if (token->type == isc_tokentype_string &&
810 expect == isc_tokentype_qstring)
811 return (ISC_R_SUCCESS);
812 if (token->type != expect) {
813 isc_lex_ungettoken(lex, token);
814 if (token->type == isc_tokentype_eol ||
815 token->type == isc_tokentype_eof)
816 return (ISC_R_UNEXPECTEDEND);
817 if (expect == isc_tokentype_number)
818 return (ISC_R_BADNUMBER);
819 return (ISC_R_UNEXPECTEDTOKEN);
821 return (ISC_R_SUCCESS);
825 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp) {
828 * Unget the current token.
831 REQUIRE(VALID_LEX(lex));
832 source = HEAD(lex->sources);
833 REQUIRE(source != NULL);
834 REQUIRE(tokenp != NULL);
835 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
836 tokenp->type == isc_tokentype_eof);
840 isc_buffer_first(source->pushback);
841 lex->paren_count = lex->saved_paren_count;
842 source->line = source->saved_line;
843 source->at_eof = ISC_FALSE;
847 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r)
851 REQUIRE(VALID_LEX(lex));
852 source = HEAD(lex->sources);
853 REQUIRE(source != NULL);
854 REQUIRE(tokenp != NULL);
855 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
856 tokenp->type == isc_tokentype_eof);
860 INSIST(source->ignored <= isc_buffer_consumedlength(source->pushback));
861 r->base = (unsigned char *)isc_buffer_base(source->pushback) +
863 r->length = isc_buffer_consumedlength(source->pushback) -
869 isc_lex_getsourcename(isc_lex_t *lex) {
872 REQUIRE(VALID_LEX(lex));
873 source = HEAD(lex->sources);
878 return (source->name);
882 isc_lex_getsourceline(isc_lex_t *lex) {
885 REQUIRE(VALID_LEX(lex));
886 source = HEAD(lex->sources);
891 return (source->line);
896 isc_lex_setsourcename(isc_lex_t *lex, const char *name) {
900 REQUIRE(VALID_LEX(lex));
901 source = HEAD(lex->sources);
904 return(ISC_R_NOTFOUND);
905 newname = isc_mem_strdup(lex->mctx, name);
907 return (ISC_R_NOMEMORY);
908 isc_mem_free(lex->mctx, source->name);
909 source->name = newname;
910 return (ISC_R_SUCCESS);
914 isc_lex_isfile(isc_lex_t *lex) {
917 REQUIRE(VALID_LEX(lex));
919 source = HEAD(lex->sources);
924 return (source->is_file);