1 /* parse.y - parser for flex input */
3 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS
6 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
7 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
11 * Copyright (c) 1990 The Regents of the University of California.
12 * All rights reserved.
14 * This code is derived from software contributed to Berkeley by
17 * The United States Government has rights in this work pursuant
18 * to contract no. DE-AC03-76SF00098 between the United States
19 * Department of Energy and the University of California.
21 * Redistribution and use in source and binary forms are permitted provided
22 * that: (1) source distributions retain this entire copyright notice and
23 * comment, and (2) distributions including binaries display the following
24 * acknowledgement: ``This product includes software developed by the
25 * University of California, Berkeley and its contributors'' in the
26 * documentation or other materials provided with the distribution and in
27 * all advertising materials mentioning features or use of this software.
28 * Neither the name of the University nor the names of its contributors may
29 * be used to endorse or promote products derived from this software without
30 * specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
32 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
33 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
36 /* $Header: /home/daffy/u0/vern/flex/RCS/parse.y,v 2.28 95/04/21 11:51:51 vern Exp $ */
37 /* $FreeBSD: src/usr.bin/lex/parse.y,v 1.3 1999/10/27 07:56:46 obrien Exp $ */
38 /* $DragonFly: src/usr.bin/lex/parse.y,v 1.2 2003/06/17 04:29:28 dillon Exp $ */
41 /* Some versions of bison are broken in that they use alloca() but don't
42 * declare it properly. The following is the patented (just kidding!)
43 * #ifdef chud to fix the problem, courtesy of Francois Pinard.
46 /* AIX requires this to be the first thing in the file. What a piece. */
54 /* The remainder of the alloca() cruft has to come after including flexdef.h,
55 * so HAVE_ALLOCA_H is (possibly) defined.
60 # define alloca __builtin_alloca
79 /* Bletch, ^^^^ that was ugly! */
82 int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, rulelen;
83 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
88 static int madeany = false; /* whether we've made the '.' character class */
89 int previous_continued_action; /* whether the previous rule's action was '|' */
91 /* Expand a POSIX character class expression. */
92 #define CCL_EXPR(func) \
95 for ( c = 0; c < csize; ++c ) \
96 if ( isascii(c) && func(c) ) \
97 ccladd( currccl, c ); \
100 /* While POSIX defines isblank(), it's not ANSI C. */
101 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
103 /* On some over-ambitious machines, such as DEC Alpha's, the default
104 * token type is "long" instead of "int"; this leads to problems with
105 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
106 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
107 * following should ensure that the default token type is "int".
114 goal : initlex sect1 sect1end sect2 initforrule
115 { /* add default rule */
121 def_rule = mkstate( -pat );
123 /* Remember the number of the default rule so we
124 * don't generate "can't match" warnings for it.
126 default_rule = num_rules;
128 finish_rule( def_rule, false, 0, 0 );
130 for ( i = 1; i <= lastsc; ++i )
131 scset[i] = mkbranch( scset[i], def_rule );
135 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
137 add_action( "ECHO" );
139 add_action( ";\n\tYY_BREAK\n" );
144 { /* initialize for processing rules */
146 /* Create default DFA start condition. */
147 scinstal( "INITIAL", false );
151 sect1 : sect1 startconddecl namelist1
155 { synerr( "unknown error processing section 1" ); }
161 scon_stk = allocate_integer_array( lastsc + 1 );
166 startconddecl : SCDECL
173 namelist1 : namelist1 NAME
174 { scinstal( nmstr, xcluflg ); }
177 { scinstal( nmstr, xcluflg ); }
180 { synerr( "bad start condition list" ); }
183 options : OPTION_OP optionlist
186 optionlist : optionlist option
190 option : OPT_OUTFILE '=' NAME
192 outfilename = copy_string( nmstr );
195 | OPT_PREFIX '=' NAME
196 { prefix = copy_string( nmstr ); }
197 | OPT_YYCLASS '=' NAME
198 { yyclass = copy_string( nmstr ); }
201 sect2 : sect2 scon initforrule flexrule '\n'
202 { scon_stk_ptr = $2; }
203 | sect2 scon '{' sect2 '}'
204 { scon_stk_ptr = $2; }
210 /* Initialize for a parse of one rule. */
211 trlcontxt = variable_trail_rule = varlength = false;
212 trailcnt = headcnt = rulelen = 0;
213 current_state_type = STATE_NORMAL;
214 previous_continued_action = continued_action;
224 finish_rule( pat, variable_trail_rule,
227 if ( scon_stk_ptr > 0 )
229 for ( i = 1; i <= scon_stk_ptr; ++i )
231 mkbranch( scbol[scon_stk[i]],
237 /* Add to all non-exclusive start conditions,
238 * including the default (0) start condition.
241 for ( i = 1; i <= lastsc; ++i )
243 scbol[i] = mkbranch( scbol[i],
251 if ( performance_report > 1 )
253 "'^' operator results in sub-optimal performance" );
260 finish_rule( pat, variable_trail_rule,
263 if ( scon_stk_ptr > 0 )
265 for ( i = 1; i <= scon_stk_ptr; ++i )
267 mkbranch( scset[scon_stk[i]],
273 for ( i = 1; i <= lastsc; ++i )
283 if ( scon_stk_ptr > 0 )
288 /* This EOF applies to all start conditions
289 * which don't already have EOF actions.
291 for ( i = 1; i <= lastsc; ++i )
293 scon_stk[++scon_stk_ptr] = i;
295 if ( scon_stk_ptr == 0 )
297 "all start conditions already have <<EOF>> rules" );
305 { synerr( "unrecognized rule" ); }
309 { $$ = scon_stk_ptr; }
312 scon : '<' scon_stk_ptr namelist2 '>'
319 for ( i = 1; i <= lastsc; ++i )
323 for ( j = 1; j <= scon_stk_ptr; ++j )
324 if ( scon_stk[j] == i )
327 if ( j > scon_stk_ptr )
328 scon_stk[++scon_stk_ptr] = i;
333 { $$ = scon_stk_ptr; }
336 namelist2 : namelist2 ',' sconname
341 { synerr( "bad start condition list" ); }
346 if ( (scnum = sclookup( nmstr )) == 0 )
347 format_pinpoint_message(
348 "undeclared start condition %s",
352 for ( i = 1; i <= scon_stk_ptr; ++i )
353 if ( scon_stk[i] == scnum )
356 "<%s> specified twice",
361 if ( i > scon_stk_ptr )
362 scon_stk[++scon_stk_ptr] = scnum;
369 if ( transchar[lastst[$2]] != SYM_EPSILON )
370 /* Provide final transition \now/ so it
371 * will be marked as a trailing context
374 $2 = link_machines( $2,
375 mkstate( SYM_EPSILON ) );
377 mark_beginning_as_normal( $2 );
378 current_state_type = STATE_NORMAL;
380 if ( previous_continued_action )
382 /* We need to treat this as variable trailing
383 * context so that the backup does not happen
384 * in the action but before the action switch
385 * statement. If the backup happens in the
386 * action, then the rules "falling into" this
387 * one's action will *also* do the backup,
390 if ( ! varlength || headcnt != 0 )
392 "trailing context made variable due to preceding '|' action" );
394 /* Mark as variable. */
399 if ( lex_compat || (varlength && headcnt == 0) )
400 { /* variable trailing context rule */
401 /* Mark the first part of the rule as the
402 * accepting "head" part of a trailing
405 * By the way, we didn't do this at the
406 * beginning of this production because back
407 * then current_state_type was set up for a
408 * trail rule, and add_accept() can create
412 num_rules | YY_TRAILING_HEAD_MASK );
413 variable_trail_rule = true;
419 $$ = link_machines( $1, $2 );
423 { synerr( "trailing context used twice" ); }
432 current_state_type = STATE_TRAILING_CONTEXT;
436 synerr( "trailing context used twice" );
437 $$ = mkstate( SYM_EPSILON );
440 else if ( previous_continued_action )
442 /* See the comment in the rule for "re2 re"
446 "trailing context made variable due to preceding '|' action" );
451 if ( lex_compat || varlength )
453 /* Again, see the comment in the rule for
457 num_rules | YY_TRAILING_HEAD_MASK );
458 variable_trail_rule = true;
463 eps = mkstate( SYM_EPSILON );
464 $$ = link_machines( $1,
465 link_machines( eps, mkstate( '\n' ) ) );
474 if ( lex_compat || (varlength && headcnt == 0) )
475 /* Both head and trail are
478 variable_trail_rule = true;
499 /* This rule is written separately so the
500 * reduction will occur before the trailing
505 synerr( "trailing context used twice" );
510 /* We hope the trailing context is
519 current_state_type = STATE_TRAILING_CONTEXT;
524 series : series singleton
526 /* This is where concatenation of adjacent patterns
529 $$ = link_machines( $1, $2 );
536 singleton : singleton '*'
555 | singleton '{' NUMBER ',' NUMBER '}'
559 if ( $3 > $5 || $3 < 0 )
561 synerr( "bad iteration values" );
571 "bad iteration values" );
576 mkrep( $1, 1, $5 ) );
579 $$ = mkrep( $1, $3, $5 );
583 | singleton '{' NUMBER ',' '}'
589 synerr( "iteration value must be positive" );
594 $$ = mkrep( $1, $3, INFINITY );
597 | singleton '{' NUMBER '}'
599 /* The singleton could be something like "(foo)",
600 * in which case we have no idea what its length
601 * is, so we punt here.
607 synerr( "iteration value must be positive" );
612 $$ = link_machines( $1,
613 copysingl( $1, $3 - 1 ) );
620 /* Create the '.' character class. */
622 ccladd( anyccl, '\n' );
626 mkeccl( ccltbl + cclmap[anyccl],
627 ccllen[anyccl], nextecm,
628 ecgroup, csize, csize );
635 $$ = mkstate( -anyccl );
641 /* Sort characters for fast searching. We
642 * use a shell sort since this list could
645 cshell( ccltbl + cclmap[$1], ccllen[$1], true );
648 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
649 nextecm, ecgroup, csize, csize );
673 if ( caseins && $1 >= 'A' && $1 <= 'Z' )
680 fullccl : '[' ccl ']'
690 ccl : ccl CHAR '-' CHAR
694 if ( $2 >= 'A' && $2 <= 'Z' )
696 if ( $4 >= 'A' && $4 <= 'Z' )
701 synerr( "negative range in character class" );
705 for ( i = $2; i <= $4; ++i )
708 /* Keep track if this ccl is staying in
709 * alphabetical order.
711 cclsorted = cclsorted && ($2 > lastchar);
720 if ( caseins && $2 >= 'A' && $2 <= 'Z' )
724 cclsorted = cclsorted && ($2 > lastchar);
731 /* Too hard to properly maintain cclsorted. */
740 currccl = $$ = cclinit();
744 ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum) }
745 | CCE_ALPHA { CCL_EXPR(isalpha) }
746 | CCE_BLANK { CCL_EXPR(IS_BLANK) }
747 | CCE_CNTRL { CCL_EXPR(iscntrl) }
748 | CCE_DIGIT { CCL_EXPR(isdigit) }
749 | CCE_GRAPH { CCL_EXPR(isgraph) }
750 | CCE_LOWER { CCL_EXPR(islower) }
751 | CCE_PRINT { CCL_EXPR(isprint) }
752 | CCE_PUNCT { CCL_EXPR(ispunct) }
753 | CCE_SPACE { CCL_EXPR(isspace) }
760 | CCE_XDIGIT { CCL_EXPR(isxdigit) }
765 if ( caseins && $2 >= 'A' && $2 <= 'Z' )
770 $$ = link_machines( $1, mkstate( $2 ) );
774 { $$ = mkstate( SYM_EPSILON ); }
780 /* build_eof_action - build the "<<EOF>>" action for the active start
784 void build_eof_action()
787 char action_text[MAXLINE];
789 for ( i = 1; i <= scon_stk_ptr; ++i )
791 if ( sceof[scon_stk[i]] )
792 format_pinpoint_message(
793 "multiple <<EOF>> rules for start condition %s",
794 scname[scon_stk[i]] );
798 sceof[scon_stk[i]] = true;
799 sprintf( action_text, "case YY_STATE_EOF(%s):\n",
800 scname[scon_stk[i]] );
801 add_action( action_text );
805 line_directive_out( (FILE *) 0, 1 );
807 /* This isn't a normal rule after all - don't count it as
808 * such, so we don't have any holes in the rule numbering
809 * (which make generating "rule can never match" warnings
817 /* format_synerr - write out formatted syntax error */
819 void format_synerr( msg, arg )
822 char errmsg[MAXLINE];
824 (void) sprintf( errmsg, msg, arg );
829 /* synerr - report a syntax error */
835 pinpoint_message( str );
839 /* format_warn - write out formatted warning */
841 void format_warn( msg, arg )
844 char warn_msg[MAXLINE];
846 (void) sprintf( warn_msg, msg, arg );
851 /* warn - report a warning, unless -w was given */
856 line_warning( str, linenum );
859 /* format_pinpoint_message - write out a message formatted with one string,
860 * pinpointing its location
863 void format_pinpoint_message( msg, arg )
866 char errmsg[MAXLINE];
868 (void) sprintf( errmsg, msg, arg );
869 pinpoint_message( errmsg );
873 /* pinpoint_message - write out a message, pinpointing its location */
875 void pinpoint_message( str )
878 line_pinpoint( str, linenum );
882 /* line_warning - report a warning at a given line, unless -w was given */
884 void line_warning( str, line )
888 char warning[MAXLINE];
892 sprintf( warning, "warning, %s", str );
893 line_pinpoint( warning, line );
898 /* line_pinpoint - write out a message, pinpointing it at the given line */
900 void line_pinpoint( str, line )
904 fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str );
908 /* yyerror - eat up an error message from the parser;
909 * currently, messages are ignore