2 * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1998-2002 Internet Software Consortium.
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: lex.h,v 1.26.2.2.8.3 2004/03/08 09:04:51 marka Exp $ */
30 * The "lex" module provides a lightweight tokenizer. It can operate
31 * on files or buffers, and can handle "include". It is designed for
32 * parsing of DNS master files and the BIND configuration file, but
33 * should be general enough to tokenize other things, e.g. HTTP.
36 * No synchronization is provided. Clients must ensure exclusive
40 * No anticipated impact.
46 * No anticipated impact.
59 #include <isc/region.h>
60 #include <isc/types.h>
69 * Various options for isc_lex_gettoken().
72 #define ISC_LEXOPT_EOL 0x01 /* Want end-of-line token. */
73 #define ISC_LEXOPT_EOF 0x02 /* Want end-of-file token. */
74 #define ISC_LEXOPT_INITIALWS 0x04 /* Want initial whitespace. */
75 #define ISC_LEXOPT_NUMBER 0x08 /* Recognize numbers. */
76 #define ISC_LEXOPT_QSTRING 0x10 /* Recognize qstrings. */
79 * The ISC_LEXOPT_DNSMULTILINE option handles the processing of '(' and ')' in
80 * the DNS master file format. If this option is set, then the
81 * ISC_LEXOPT_INITIALWS and ISC_LEXOPT_EOL options will be ignored when
82 * the paren count is > 0. To use this option, '(' and ')' must be special
85 #define ISC_LEXOPT_DNSMULTILINE 0x20 /* Handle '(' and ')'. */
86 #define ISC_LEXOPT_NOMORE 0x40 /* Want "no more" token. */
88 #define ISC_LEXOPT_CNUMBER 0x80 /* Regognise octal and hex */
89 #define ISC_LEXOPT_ESCAPE 0x100 /* Recognize escapes. */
90 #define ISC_LEXOPT_QSTRINGMULTILINE 0x200 /* Allow multiline "" strings */
93 * Various commenting styles, which may be changed at any time with
94 * isc_lex_setcomments().
97 #define ISC_LEXCOMMENT_C 0x01
98 #define ISC_LEXCOMMENT_CPLUSPLUS 0x02
99 #define ISC_LEXCOMMENT_SHELL 0x04
100 #define ISC_LEXCOMMENT_DNSMASTERFILE 0x08
108 typedef char isc_lexspecials_t[256];
113 isc_tokentype_unknown = 0,
114 isc_tokentype_string = 1,
115 isc_tokentype_number = 2,
116 isc_tokentype_qstring = 3,
117 isc_tokentype_eol = 4,
118 isc_tokentype_eof = 5,
119 isc_tokentype_initialws = 6,
120 isc_tokentype_special = 7,
121 isc_tokentype_nomore = 8
126 unsigned long as_ulong;
127 isc_region_t as_region;
128 isc_textregion_t as_textregion;
132 typedef struct isc_token {
133 isc_tokentype_t type;
134 isc_tokenvalue_t value;
142 isc_lex_create(isc_mem_t *mctx, size_t max_token, isc_lex_t **lexp);
146 * 'max_token' is a hint of the number of bytes in the largest token.
149 * '*lexp' is a valid lexer.
154 * On success, *lexp is attached to the newly created lexer.
162 isc_lex_destroy(isc_lex_t **lexp);
167 * '*lexp' is a valid lexer.
174 isc_lex_getcomments(isc_lex_t *lex);
176 * Return the current lexer commenting styles.
179 * 'lex' is a valid lexer.
182 * The commenting sytles which are currently allowed.
186 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments);
188 * Set allowed lexer commenting styles.
191 * 'lex' is a valid lexer.
193 * 'comments' has meaningful values.
197 isc_lex_getspecials(isc_lex_t *lex, isc_lexspecials_t specials);
199 * Put the current list of specials into 'specials'.
202 * 'lex' is a valid lexer.
206 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials);
208 * The characters in 'specials' are returned as tokens. Along with
209 * whitespace, they delimit strings and numbers.
212 * Comment processing takes precedence over special character
216 * 'lex' is a valid lexer.
220 isc_lex_openfile(isc_lex_t *lex, const char *filename);
222 * Open 'filename' and make it the current input source for 'lex'.
225 * 'lex' is a valid lexer.
227 * filename is a valid C string.
231 * ISC_R_NOMEMORY Out of memory
232 * ISC_R_NOTFOUND File not found
233 * ISC_R_NOPERM No permission to open file
234 * ISC_R_FAILURE Couldn't open file, not sure why
239 isc_lex_openstream(isc_lex_t *lex, FILE *stream);
241 * Make 'stream' the current input source for 'lex'.
244 * 'lex' is a valid lexer.
246 * 'stream' is a valid C stream.
250 * ISC_R_NOMEMORY Out of memory
254 isc_lex_openbuffer(isc_lex_t *lex, isc_buffer_t *buffer);
256 * Make 'buffer' the current input source for 'lex'.
259 * 'lex' is a valid lexer.
261 * 'buffer' is a valid buffer.
265 * ISC_R_NOMEMORY Out of memory
269 isc_lex_close(isc_lex_t *lex);
271 * Close the most recently opened object (i.e. file or buffer).
275 * ISC_R_NOMORE No more input sources
279 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp);
281 * Get the next token.
284 * 'lex' is a valid lexer.
286 * 'lex' has an input source.
288 * 'options' contains valid options.
290 * '*tokenp' is a valid pointer.
294 * ISC_R_UNEXPECTEDEND
297 * These two results are returned only if their corresponding lexer
298 * options are not set.
300 * ISC_R_EOF End of input source
301 * ISC_R_NOMORE No more input sources
305 isc_lex_getmastertoken(isc_lex_t *lex, isc_token_t *token,
306 isc_tokentype_t expect, isc_boolean_t eol);
308 * Get the next token from a DNS master file type stream. This is a
309 * convenience function that sets appropriate options and handles quoted
310 * strings and end of line correctly for master files. It also ungets
314 * 'lex' is a valid lexer.
316 * 'token' is a valid pointer
320 * any return code from isc_lex_gettoken.
324 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp);
326 * Unget the current token.
329 * 'lex' is a valid lexer.
331 * 'lex' has an input source.
333 * 'tokenp' points to a valid token.
335 * There is no ungotten token already.
339 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r);
341 * Returns a region containing the text of the last token returned.
344 * 'lex' is a valid lexer.
346 * 'lex' has an input source.
348 * 'tokenp' points to a valid token.
350 * A token has been gotten and not ungotten.
354 isc_lex_getsourcename(isc_lex_t *lex);
356 * Return the input source name.
359 * 'lex' is a valid lexer.
362 * source name or NULL if no current source.
363 * result valid while current input source exists.
368 isc_lex_getsourceline(isc_lex_t *lex);
370 * Return the input source line number.
373 * 'lex' is a valid lexer.
376 * Current line number or 0 if no current source.
380 isc_lex_setsourcename(isc_lex_t *lex, const char *name);
382 * Assigns a new name to the input source.
386 * 'lex' is a valid lexer.
391 * ISC_R_NOTFOUND - there are no sources.
395 isc_lex_isfile(isc_lex_t *lex);
397 * Return whether the current input source is a file.
400 * 'lex' is a valid lexer.
403 * ISC_TRUE if the current input is a file,
404 * ISC_FALSE otherwise.
410 #endif /* ISC_LEX_H */