2 * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1998-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: lex.c,v 1.66.2.8 2004/03/09 06:11:47 marka Exp $ */
26 #include <isc/buffer.h>
31 #include <isc/stdio.h>
32 #include <isc/string.h>
35 typedef struct inputsource {
37 isc_boolean_t is_file;
38 isc_boolean_t need_close;
40 isc_buffer_t * pushback;
45 unsigned long saved_line;
46 ISC_LINK(struct inputsource) link;
49 #define LEX_MAGIC ISC_MAGIC('L', 'e', 'x', '!')
50 #define VALID_LEX(l) ISC_MAGIC_VALID(l, LEX_MAGIC)
59 unsigned int comments;
60 isc_boolean_t comment_ok;
61 isc_boolean_t last_was_eol;
62 unsigned int paren_count;
63 unsigned int saved_paren_count;
64 isc_lexspecials_t specials;
65 LIST(struct inputsource) sources;
68 static inline isc_result_t
69 grow_data(isc_lex_t *lex, size_t *remainingp, char **currp, char **prevp) {
72 new = isc_mem_get(lex->mctx, lex->max_token * 2 + 1);
74 return (ISC_R_NOMEMORY);
75 memcpy(new, lex->data, lex->max_token + 1);
76 *currp = new + (*currp - lex->data);
78 *prevp = new + (*prevp - lex->data);
79 isc_mem_put(lex->mctx, lex->data, lex->max_token + 1);
81 *remainingp += lex->max_token;
83 return (ISC_R_SUCCESS);
87 isc_lex_create(isc_mem_t *mctx, size_t max_token, isc_lex_t **lexp) {
94 REQUIRE(lexp != NULL && *lexp == NULL);
95 REQUIRE(max_token > 0U);
97 lex = isc_mem_get(mctx, sizeof *lex);
99 return (ISC_R_NOMEMORY);
100 lex->data = isc_mem_get(mctx, max_token + 1);
101 if (lex->data == NULL) {
102 isc_mem_put(mctx, lex, sizeof *lex);
103 return (ISC_R_NOMEMORY);
106 lex->max_token = max_token;
108 lex->comment_ok = ISC_TRUE;
109 lex->last_was_eol = ISC_TRUE;
110 lex->paren_count = 0;
111 lex->saved_paren_count = 0;
112 memset(lex->specials, 0, 256);
113 INIT_LIST(lex->sources);
114 lex->magic = LEX_MAGIC;
118 return (ISC_R_SUCCESS);
122 isc_lex_destroy(isc_lex_t **lexp) {
129 REQUIRE(lexp != NULL);
131 REQUIRE(VALID_LEX(lex));
133 while (!EMPTY(lex->sources))
135 if (lex->data != NULL)
136 isc_mem_put(lex->mctx, lex->data, lex->max_token + 1);
138 isc_mem_put(lex->mctx, lex, sizeof *lex);
144 isc_lex_getcomments(isc_lex_t *lex) {
146 * Return the current lexer commenting styles.
149 REQUIRE(VALID_LEX(lex));
151 return (lex->comments);
155 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments) {
157 * Set allowed lexer commenting styles.
160 REQUIRE(VALID_LEX(lex));
162 lex->comments = comments;
166 isc_lex_getspecials(isc_lex_t *lex, isc_lexspecials_t specials) {
168 * Put the current list of specials into 'specials'.
171 REQUIRE(VALID_LEX(lex));
173 memcpy(specials, lex->specials, 256);
177 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials) {
179 * The characters in 'specials' are returned as tokens. Along with
180 * whitespace, they delimit strings and numbers.
183 REQUIRE(VALID_LEX(lex));
185 memcpy(lex->specials, specials, 256);
188 static inline isc_result_t
189 new_source(isc_lex_t *lex, isc_boolean_t is_file, isc_boolean_t need_close,
190 void *input, const char *name)
195 source = isc_mem_get(lex->mctx, sizeof *source);
197 return (ISC_R_NOMEMORY);
198 source->result = ISC_R_SUCCESS;
199 source->is_file = is_file;
200 source->need_close = need_close;
201 source->at_eof = ISC_FALSE;
202 source->input = input;
203 source->name = isc_mem_strdup(lex->mctx, name);
204 if (source->name == NULL) {
205 isc_mem_put(lex->mctx, source, sizeof *source);
206 return (ISC_R_NOMEMORY);
208 source->pushback = NULL;
209 result = isc_buffer_allocate(lex->mctx, &source->pushback,
211 if (result != ISC_R_SUCCESS) {
212 isc_mem_free(lex->mctx, source->name);
213 isc_mem_put(lex->mctx, source, sizeof *source);
218 ISC_LIST_INITANDPREPEND(lex->sources, source, link);
220 return (ISC_R_SUCCESS);
224 isc_lex_openfile(isc_lex_t *lex, const char *filename) {
229 * Open 'filename' and make it the current input source for 'lex'.
232 REQUIRE(VALID_LEX(lex));
234 result = isc_stdio_open(filename, "r", &stream);
235 if (result != ISC_R_SUCCESS)
238 result = new_source(lex, ISC_TRUE, ISC_TRUE, stream, filename);
239 if (result != ISC_R_SUCCESS)
245 isc_lex_openstream(isc_lex_t *lex, FILE *stream) {
249 * Make 'stream' the current input source for 'lex'.
252 REQUIRE(VALID_LEX(lex));
255 sprintf(name, "stream-%p", stream);
257 return (new_source(lex, ISC_TRUE, ISC_FALSE, stream, name));
261 isc_lex_openbuffer(isc_lex_t *lex, isc_buffer_t *buffer) {
265 * Make 'buffer' the current input source for 'lex'.
268 REQUIRE(VALID_LEX(lex));
271 sprintf(name, "buffer-%p", buffer);
273 return (new_source(lex, ISC_FALSE, ISC_FALSE, buffer, name));
277 isc_lex_close(isc_lex_t *lex) {
281 * Close the most recently opened object (i.e. file or buffer).
284 REQUIRE(VALID_LEX(lex));
286 source = HEAD(lex->sources);
288 return (ISC_R_NOMORE);
290 ISC_LIST_UNLINK(lex->sources, source, link);
291 if (source->is_file) {
292 if (source->need_close)
293 fclose((FILE *)(source->input));
295 isc_mem_free(lex->mctx, source->name);
296 isc_buffer_free(&source->pushback);
297 isc_mem_put(lex->mctx, source, sizeof *source);
299 return (ISC_R_SUCCESS);
307 lexstate_maybecomment,
309 lexstate_ccommentend,
314 #define IWSEOL (ISC_LEXOPT_INITIALWS | ISC_LEXOPT_EOL)
317 pushback(inputsource *source, int c) {
318 REQUIRE(source->pushback->current > 0);
320 source->at_eof = ISC_FALSE;
323 source->pushback->current--;
329 pushandgrow(isc_lex_t *lex, inputsource *source, int c) {
330 if (isc_buffer_availablelength(source->pushback) == 0) {
331 isc_buffer_t *tbuf = NULL;
336 oldlen = isc_buffer_length(source->pushback);
337 result = isc_buffer_allocate(lex->mctx, &tbuf, oldlen * 2);
338 if (result != ISC_R_SUCCESS)
340 isc_buffer_usedregion(source->pushback, &used);
341 result = isc_buffer_copyregion(tbuf, &used);
342 INSIST(result == ISC_R_SUCCESS);
343 tbuf->current = source->pushback->current;
344 isc_buffer_free(&source->pushback);
345 source->pushback = tbuf;
347 isc_buffer_putuint8(source->pushback, (isc_uint8_t)c);
348 return (ISC_R_SUCCESS);
352 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp) {
355 isc_boolean_t done = ISC_FALSE;
356 isc_boolean_t no_comments = ISC_FALSE;
357 isc_boolean_t escaped = ISC_FALSE;
358 lexstate state = lexstate_start;
359 lexstate saved_state = lexstate_start;
360 isc_buffer_t *buffer;
364 unsigned long as_ulong;
365 unsigned int saved_options;
370 * Get the next token.
373 REQUIRE(VALID_LEX(lex));
374 source = HEAD(lex->sources);
375 REQUIRE(tokenp != NULL);
377 lex->saved_paren_count = lex->paren_count;
378 source->saved_line = source->line;
380 if (source == NULL) {
381 if ((options & ISC_LEXOPT_NOMORE) != 0) {
382 tokenp->type = isc_tokentype_nomore;
383 return (ISC_R_SUCCESS);
385 return (ISC_R_NOMORE);
388 if (source->result != ISC_R_SUCCESS)
389 return (source->result);
391 if (isc_buffer_remaininglength(source->pushback) == 0 &&
394 if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 &&
395 lex->paren_count != 0) {
396 lex->paren_count = 0;
397 return (ISC_R_UNBALANCED);
399 if ((options & ISC_LEXOPT_EOF) != 0) {
400 tokenp->type = isc_tokentype_eof;
401 return (ISC_R_SUCCESS);
406 isc_buffer_compact(source->pushback);
408 saved_options = options;
409 if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 && lex->paren_count > 0)
416 remaining = lex->max_token;
418 #ifdef HAVE_FLOCKFILE
420 flockfile(source->input);
424 if (isc_buffer_remaininglength(source->pushback) == 0) {
425 if (source->is_file) {
426 stream = source->input;
428 #if defined(HAVE_FLOCKFILE) && defined(HAVE_GETCUNLOCKED)
429 c = getc_unlocked(stream);
434 if (ferror(stream)) {
435 source->result = ISC_R_IOERROR;
436 result = source->result;
439 source->at_eof = ISC_TRUE;
442 buffer = source->input;
444 if (buffer->current == buffer->used) {
446 source->at_eof = ISC_TRUE;
448 c = *((char *)buffer->base +
454 source->result = pushandgrow(lex, source, c);
455 if (source->result != ISC_R_SUCCESS) {
456 result = source->result;
462 if (!source->at_eof) {
463 if (state == lexstate_start)
464 /* Token has not started yet. */
466 isc_buffer_consumedlength(source->pushback);
467 c = isc_buffer_getuint8(source->pushback);
475 if (lex->comment_ok && !no_comments) {
476 if (!escaped && c == ';' &&
477 ((lex->comments & ISC_LEXCOMMENT_DNSMASTERFILE)
480 state = lexstate_eatline;
481 no_comments = ISC_TRUE;
483 } else if (c == '/' &&
486 ISC_LEXCOMMENT_CPLUSPLUS)) != 0) {
488 state = lexstate_maybecomment;
489 no_comments = ISC_TRUE;
491 } else if (c == '#' &&
492 ((lex->comments & ISC_LEXCOMMENT_SHELL)
495 state = lexstate_eatline;
496 no_comments = ISC_TRUE;
502 /* INSIST(c == EOF || (c >= 0 && c <= 255)); */
506 lex->last_was_eol = ISC_FALSE;
507 if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 &&
508 lex->paren_count != 0) {
509 lex->paren_count = 0;
510 result = ISC_R_UNBALANCED;
513 if ((options & ISC_LEXOPT_EOF) == 0) {
517 tokenp->type = isc_tokentype_eof;
519 } else if (c == ' ' || c == '\t') {
520 if (lex->last_was_eol &&
521 (options & ISC_LEXOPT_INITIALWS)
523 lex->last_was_eol = ISC_FALSE;
524 tokenp->type = isc_tokentype_initialws;
525 tokenp->value.as_char = c;
528 } else if (c == '\n') {
529 if ((options & ISC_LEXOPT_EOL) != 0) {
530 tokenp->type = isc_tokentype_eol;
533 lex->last_was_eol = ISC_TRUE;
534 } else if (c == '\r') {
535 if ((options & ISC_LEXOPT_EOL) != 0)
536 state = lexstate_crlf;
537 } else if (c == '"' &&
538 (options & ISC_LEXOPT_QSTRING) != 0) {
539 lex->last_was_eol = ISC_FALSE;
540 no_comments = ISC_TRUE;
541 state = lexstate_qstring;
542 } else if (lex->specials[c]) {
543 lex->last_was_eol = ISC_FALSE;
544 if ((c == '(' || c == ')') &&
545 (options & ISC_LEXOPT_DNSMULTILINE) != 0) {
547 if (lex->paren_count == 0)
551 if (lex->paren_count == 0) {
552 result = ISC_R_UNBALANCED;
556 if (lex->paren_count == 0)
562 tokenp->type = isc_tokentype_special;
563 tokenp->value.as_char = c;
565 } else if (isdigit((unsigned char)c) &&
566 (options & ISC_LEXOPT_NUMBER) != 0) {
567 lex->last_was_eol = ISC_FALSE;
568 state = lexstate_number;
571 lex->last_was_eol = ISC_FALSE;
572 state = lexstate_string;
579 tokenp->type = isc_tokentype_eol;
581 lex->last_was_eol = ISC_TRUE;
583 case lexstate_number:
584 if (c == EOF || !isdigit((unsigned char)c)) {
585 if (c == ' ' || c == '\t' || c == '\r' ||
586 c == '\n' || c == EOF ||
589 if ((options & ISC_LEXOPT_CNUMBER) != 0)
594 as_ulong = strtoul(lex->data, &e, base);
595 if (as_ulong == ULONG_MAX &&
597 result = ISC_R_RANGE;
599 } else if (*e == 0) {
601 isc_tokentype_number;
602 tokenp->value.as_ulong =
608 isc_tokentype_string;
609 v = &(tokenp->value);
610 v->as_textregion.base =
612 v->as_textregion.length =
618 } else if (!(options & ISC_LEXOPT_CNUMBER) ||
619 ((c != 'x' && c != 'X') ||
620 (curr != &lex->data[1]) ||
621 (lex->data[0] != '0'))) {
622 /* Above test supports hex numbers */
623 state = lexstate_string;
626 if (remaining == 0U) {
627 result = grow_data(lex, &remaining,
629 if (result != ISC_R_SUCCESS)
632 INSIST(remaining > 0U);
637 case lexstate_string:
639 (c == ' ' || c == '\t' || lex->specials[c])) ||
640 c == '\r' || c == '\n' || c == EOF) {
642 if (source->result != ISC_R_SUCCESS) {
643 result = source->result;
646 tokenp->type = isc_tokentype_string;
647 tokenp->value.as_textregion.base = lex->data;
648 tokenp->value.as_textregion.length =
649 lex->max_token - remaining;
653 if ((options & ISC_LEXOPT_ESCAPE) != 0)
654 escaped = (!escaped && c == '\\') ?
655 ISC_TRUE : ISC_FALSE;
656 if (remaining == 0U) {
657 result = grow_data(lex, &remaining,
659 if (result != ISC_R_SUCCESS)
662 INSIST(remaining > 0U);
667 case lexstate_maybecomment:
669 (lex->comments & ISC_LEXCOMMENT_C) != 0) {
670 state = lexstate_ccomment;
672 } else if (c == '/' &&
673 (lex->comments & ISC_LEXCOMMENT_CPLUSPLUS) != 0) {
674 state = lexstate_eatline;
679 no_comments = ISC_FALSE;
682 case lexstate_ccomment:
684 result = ISC_R_UNEXPECTEDEND;
688 state = lexstate_ccommentend;
690 case lexstate_ccommentend:
692 result = ISC_R_UNEXPECTEDEND;
697 * C-style comments become a single space.
698 * We do this to ensure that a comment will
699 * act as a delimiter for strings and
703 no_comments = ISC_FALSE;
707 state = lexstate_ccomment;
709 case lexstate_eatline:
711 result = ISC_R_UNEXPECTEDEND;
715 no_comments = ISC_FALSE;
720 case lexstate_qstring:
722 result = ISC_R_UNEXPECTEDEND;
729 * Overwrite the preceding backslash.
731 INSIST(prev != NULL);
734 tokenp->type = isc_tokentype_qstring;
735 tokenp->value.as_textregion.base =
737 tokenp->value.as_textregion.length =
738 lex->max_token - remaining;
739 no_comments = ISC_FALSE;
743 if (c == '\n' && !escaped &&
744 (options & ISC_LEXOPT_QSTRINGMULTILINE) == 0) {
746 result = ISC_R_UNBALANCEDQUOTES;
749 if (c == '\\' && !escaped)
753 if (remaining == 0U) {
754 result = grow_data(lex, &remaining,
756 if (result != ISC_R_SUCCESS)
759 INSIST(remaining > 0U);
767 FATAL_ERROR(__FILE__, __LINE__,
768 isc_msgcat_get(isc_msgcat, ISC_MSGSET_LEX,
769 ISC_MSG_UNEXPECTEDSTATE,
770 "Unexpected state %d"),
772 /* Does not return. */
777 result = ISC_R_SUCCESS;
779 #ifdef HAVE_FLOCKFILE
781 funlockfile(source->input);
787 isc_lex_getmastertoken(isc_lex_t *lex, isc_token_t *token,
788 isc_tokentype_t expect, isc_boolean_t eol)
790 unsigned int options = ISC_LEXOPT_EOL | ISC_LEXOPT_EOF |
791 ISC_LEXOPT_DNSMULTILINE | ISC_LEXOPT_ESCAPE;
794 if (expect == isc_tokentype_qstring)
795 options |= ISC_LEXOPT_QSTRING;
796 else if (expect == isc_tokentype_number)
797 options |= ISC_LEXOPT_NUMBER;
798 result = isc_lex_gettoken(lex, options, token);
799 if (result == ISC_R_RANGE)
800 isc_lex_ungettoken(lex, token);
801 if (result != ISC_R_SUCCESS)
804 if (eol && ((token->type == isc_tokentype_eol) ||
805 (token->type == isc_tokentype_eof)))
806 return (ISC_R_SUCCESS);
807 if (token->type == isc_tokentype_string &&
808 expect == isc_tokentype_qstring)
809 return (ISC_R_SUCCESS);
810 if (token->type != expect) {
811 isc_lex_ungettoken(lex, token);
812 if (token->type == isc_tokentype_eol ||
813 token->type == isc_tokentype_eof)
814 return (ISC_R_UNEXPECTEDEND);
815 if (expect == isc_tokentype_number)
816 return (ISC_R_BADNUMBER);
817 return (ISC_R_UNEXPECTEDTOKEN);
819 return (ISC_R_SUCCESS);
823 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp) {
826 * Unget the current token.
829 REQUIRE(VALID_LEX(lex));
830 source = HEAD(lex->sources);
831 REQUIRE(source != NULL);
832 REQUIRE(tokenp != NULL);
833 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
834 tokenp->type == isc_tokentype_eof);
838 isc_buffer_first(source->pushback);
839 lex->paren_count = lex->saved_paren_count;
840 source->line = source->saved_line;
841 source->at_eof = ISC_FALSE;
845 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r)
849 REQUIRE(VALID_LEX(lex));
850 source = HEAD(lex->sources);
851 REQUIRE(source != NULL);
852 REQUIRE(tokenp != NULL);
853 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
854 tokenp->type == isc_tokentype_eof);
858 INSIST(source->ignored <= isc_buffer_consumedlength(source->pushback));
859 r->base = (unsigned char *)isc_buffer_base(source->pushback) +
861 r->length = isc_buffer_consumedlength(source->pushback) -
867 isc_lex_getsourcename(isc_lex_t *lex) {
870 REQUIRE(VALID_LEX(lex));
871 source = HEAD(lex->sources);
876 return (source->name);
880 isc_lex_getsourceline(isc_lex_t *lex) {
883 REQUIRE(VALID_LEX(lex));
884 source = HEAD(lex->sources);
889 return (source->line);
893 isc_lex_isfile(isc_lex_t *lex) {
896 REQUIRE(VALID_LEX(lex));
898 source = HEAD(lex->sources);
903 return (source->is_file);