Get rid off conditionals for hpux, AIX, THINKC, TURBOC, MS_DOS, VMS.
[dragonfly.git] / usr.bin / lex / parse.y
CommitLineData
984263bc
MD
1/* parse.y - parser for flex input */
2
3%token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS
5
6%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
7%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
8
9%{
10/*-
11 * Copyright (c) 1990 The Regents of the University of California.
12 * All rights reserved.
13 *
14 * This code is derived from software contributed to Berkeley by
15 * Vern Paxson.
16 *
17 * The United States Government has rights in this work pursuant
18 * to contract no. DE-AC03-76SF00098 between the United States
19 * Department of Energy and the University of California.
20 *
21 * Redistribution and use in source and binary forms are permitted provided
22 * that: (1) source distributions retain this entire copyright notice and
23 * comment, and (2) distributions including binaries display the following
24 * acknowledgement: ``This product includes software developed by the
25 * University of California, Berkeley and its contributors'' in the
26 * documentation or other materials provided with the distribution and in
27 * all advertising materials mentioning features or use of this software.
28 * Neither the name of the University nor the names of its contributors may
29 * be used to endorse or promote products derived from this software without
30 * specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
32 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
33 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
34 */
35
36/* $Header: /home/daffy/u0/vern/flex/RCS/parse.y,v 2.28 95/04/21 11:51:51 vern Exp $ */
37/* $FreeBSD: src/usr.bin/lex/parse.y,v 1.3 1999/10/27 07:56:46 obrien Exp $ */
6e8149e3 38/* $DragonFly: src/usr.bin/lex/parse.y,v 1.3 2005/02/20 17:34:11 asmodai Exp $ */
984263bc
MD
39
40
41/* Some versions of bison are broken in that they use alloca() but don't
42 * declare it properly. The following is the patented (just kidding!)
43 * #ifdef chud to fix the problem, courtesy of Francois Pinard.
44 */
984263bc
MD
45
46#include "flexdef.h"
47
48/* The remainder of the alloca() cruft has to come after including flexdef.h,
49 * so HAVE_ALLOCA_H is (possibly) defined.
50 */
51#ifdef YYBISON
52# ifdef __GNUC__
53# ifndef alloca
54# define alloca __builtin_alloca
55# endif
56# else
57# if HAVE_ALLOCA_H
58# include <alloca.h>
59# else
984263bc 60char *alloca ();
984263bc
MD
61# endif
62# endif
63#endif
64
65/* Bletch, ^^^^ that was ugly! */
66
67
68int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, rulelen;
69int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
70
71int *scon_stk;
72int scon_stk_ptr;
73
74static int madeany = false; /* whether we've made the '.' character class */
75int previous_continued_action; /* whether the previous rule's action was '|' */
76
77/* Expand a POSIX character class expression. */
78#define CCL_EXPR(func) \
79 { \
80 int c; \
81 for ( c = 0; c < csize; ++c ) \
82 if ( isascii(c) && func(c) ) \
83 ccladd( currccl, c ); \
84 }
85
86/* While POSIX defines isblank(), it's not ANSI C. */
87#define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
88
89/* On some over-ambitious machines, such as DEC Alpha's, the default
90 * token type is "long" instead of "int"; this leads to problems with
91 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
92 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
93 * following should ensure that the default token type is "int".
94 */
95#define YYSTYPE int
96
97%}
98
99%%
100goal : initlex sect1 sect1end sect2 initforrule
101 { /* add default rule */
102 int def_rule;
103
104 pat = cclinit();
105 cclnegate( pat );
106
107 def_rule = mkstate( -pat );
108
109 /* Remember the number of the default rule so we
110 * don't generate "can't match" warnings for it.
111 */
112 default_rule = num_rules;
113
114 finish_rule( def_rule, false, 0, 0 );
115
116 for ( i = 1; i <= lastsc; ++i )
117 scset[i] = mkbranch( scset[i], def_rule );
118
119 if ( spprdflt )
120 add_action(
121 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
122 else
123 add_action( "ECHO" );
124
125 add_action( ";\n\tYY_BREAK\n" );
126 }
127 ;
128
129initlex :
130 { /* initialize for processing rules */
131
132 /* Create default DFA start condition. */
133 scinstal( "INITIAL", false );
134 }
135 ;
136
137sect1 : sect1 startconddecl namelist1
138 | sect1 options
139 |
140 | error
141 { synerr( "unknown error processing section 1" ); }
142 ;
143
144sect1end : SECTEND
145 {
146 check_options();
147 scon_stk = allocate_integer_array( lastsc + 1 );
148 scon_stk_ptr = 0;
149 }
150 ;
151
152startconddecl : SCDECL
153 { xcluflg = false; }
154
155 | XSCDECL
156 { xcluflg = true; }
157 ;
158
159namelist1 : namelist1 NAME
160 { scinstal( nmstr, xcluflg ); }
161
162 | NAME
163 { scinstal( nmstr, xcluflg ); }
164
165 | error
166 { synerr( "bad start condition list" ); }
167 ;
168
169options : OPTION_OP optionlist
170 ;
171
172optionlist : optionlist option
173 |
174 ;
175
176option : OPT_OUTFILE '=' NAME
177 {
178 outfilename = copy_string( nmstr );
179 did_outfilename = 1;
180 }
181 | OPT_PREFIX '=' NAME
182 { prefix = copy_string( nmstr ); }
183 | OPT_YYCLASS '=' NAME
184 { yyclass = copy_string( nmstr ); }
185 ;
186
187sect2 : sect2 scon initforrule flexrule '\n'
188 { scon_stk_ptr = $2; }
189 | sect2 scon '{' sect2 '}'
190 { scon_stk_ptr = $2; }
191 |
192 ;
193
194initforrule :
195 {
196 /* Initialize for a parse of one rule. */
197 trlcontxt = variable_trail_rule = varlength = false;
198 trailcnt = headcnt = rulelen = 0;
199 current_state_type = STATE_NORMAL;
200 previous_continued_action = continued_action;
201 in_rule = true;
202
203 new_rule();
204 }
205 ;
206
207flexrule : '^' rule
208 {
209 pat = $2;
210 finish_rule( pat, variable_trail_rule,
211 headcnt, trailcnt );
212
213 if ( scon_stk_ptr > 0 )
214 {
215 for ( i = 1; i <= scon_stk_ptr; ++i )
216 scbol[scon_stk[i]] =
217 mkbranch( scbol[scon_stk[i]],
218 pat );
219 }
220
221 else
222 {
223 /* Add to all non-exclusive start conditions,
224 * including the default (0) start condition.
225 */
226
227 for ( i = 1; i <= lastsc; ++i )
228 if ( ! scxclu[i] )
229 scbol[i] = mkbranch( scbol[i],
230 pat );
231 }
232
233 if ( ! bol_needed )
234 {
235 bol_needed = true;
236
237 if ( performance_report > 1 )
238 pinpoint_message(
239 "'^' operator results in sub-optimal performance" );
240 }
241 }
242
243 | rule
244 {
245 pat = $1;
246 finish_rule( pat, variable_trail_rule,
247 headcnt, trailcnt );
248
249 if ( scon_stk_ptr > 0 )
250 {
251 for ( i = 1; i <= scon_stk_ptr; ++i )
252 scset[scon_stk[i]] =
253 mkbranch( scset[scon_stk[i]],
254 pat );
255 }
256
257 else
258 {
259 for ( i = 1; i <= lastsc; ++i )
260 if ( ! scxclu[i] )
261 scset[i] =
262 mkbranch( scset[i],
263 pat );
264 }
265 }
266
267 | EOF_OP
268 {
269 if ( scon_stk_ptr > 0 )
270 build_eof_action();
271
272 else
273 {
274 /* This EOF applies to all start conditions
275 * which don't already have EOF actions.
276 */
277 for ( i = 1; i <= lastsc; ++i )
278 if ( ! sceof[i] )
279 scon_stk[++scon_stk_ptr] = i;
280
281 if ( scon_stk_ptr == 0 )
282 warn(
283 "all start conditions already have <<EOF>> rules" );
284
285 else
286 build_eof_action();
287 }
288 }
289
290 | error
291 { synerr( "unrecognized rule" ); }
292 ;
293
294scon_stk_ptr :
295 { $$ = scon_stk_ptr; }
296 ;
297
298scon : '<' scon_stk_ptr namelist2 '>'
299 { $$ = $2; }
300
301 | '<' '*' '>'
302 {
303 $$ = scon_stk_ptr;
304
305 for ( i = 1; i <= lastsc; ++i )
306 {
307 int j;
308
309 for ( j = 1; j <= scon_stk_ptr; ++j )
310 if ( scon_stk[j] == i )
311 break;
312
313 if ( j > scon_stk_ptr )
314 scon_stk[++scon_stk_ptr] = i;
315 }
316 }
317
318 |
319 { $$ = scon_stk_ptr; }
320 ;
321
322namelist2 : namelist2 ',' sconname
323
324 | sconname
325
326 | error
327 { synerr( "bad start condition list" ); }
328 ;
329
330sconname : NAME
331 {
332 if ( (scnum = sclookup( nmstr )) == 0 )
333 format_pinpoint_message(
334 "undeclared start condition %s",
335 nmstr );
336 else
337 {
338 for ( i = 1; i <= scon_stk_ptr; ++i )
339 if ( scon_stk[i] == scnum )
340 {
341 format_warn(
342 "<%s> specified twice",
343 scname[scnum] );
344 break;
345 }
346
347 if ( i > scon_stk_ptr )
348 scon_stk[++scon_stk_ptr] = scnum;
349 }
350 }
351 ;
352
353rule : re2 re
354 {
355 if ( transchar[lastst[$2]] != SYM_EPSILON )
356 /* Provide final transition \now/ so it
357 * will be marked as a trailing context
358 * state.
359 */
360 $2 = link_machines( $2,
361 mkstate( SYM_EPSILON ) );
362
363 mark_beginning_as_normal( $2 );
364 current_state_type = STATE_NORMAL;
365
366 if ( previous_continued_action )
367 {
368 /* We need to treat this as variable trailing
369 * context so that the backup does not happen
370 * in the action but before the action switch
371 * statement. If the backup happens in the
372 * action, then the rules "falling into" this
373 * one's action will *also* do the backup,
374 * erroneously.
375 */
376 if ( ! varlength || headcnt != 0 )
377 warn(
378 "trailing context made variable due to preceding '|' action" );
379
380 /* Mark as variable. */
381 varlength = true;
382 headcnt = 0;
383 }
384
385 if ( lex_compat || (varlength && headcnt == 0) )
386 { /* variable trailing context rule */
387 /* Mark the first part of the rule as the
388 * accepting "head" part of a trailing
389 * context rule.
390 *
391 * By the way, we didn't do this at the
392 * beginning of this production because back
393 * then current_state_type was set up for a
394 * trail rule, and add_accept() can create
395 * a new state ...
396 */
397 add_accept( $1,
398 num_rules | YY_TRAILING_HEAD_MASK );
399 variable_trail_rule = true;
400 }
401
402 else
403 trailcnt = rulelen;
404
405 $$ = link_machines( $1, $2 );
406 }
407
408 | re2 re '$'
409 { synerr( "trailing context used twice" ); }
410
411 | re '$'
412 {
413 headcnt = 0;
414 trailcnt = 1;
415 rulelen = 1;
416 varlength = false;
417
418 current_state_type = STATE_TRAILING_CONTEXT;
419
420 if ( trlcontxt )
421 {
422 synerr( "trailing context used twice" );
423 $$ = mkstate( SYM_EPSILON );
424 }
425
426 else if ( previous_continued_action )
427 {
428 /* See the comment in the rule for "re2 re"
429 * above.
430 */
431 warn(
432 "trailing context made variable due to preceding '|' action" );
433
434 varlength = true;
435 }
436
437 if ( lex_compat || varlength )
438 {
439 /* Again, see the comment in the rule for
440 * "re2 re" above.
441 */
442 add_accept( $1,
443 num_rules | YY_TRAILING_HEAD_MASK );
444 variable_trail_rule = true;
445 }
446
447 trlcontxt = true;
448
449 eps = mkstate( SYM_EPSILON );
450 $$ = link_machines( $1,
451 link_machines( eps, mkstate( '\n' ) ) );
452 }
453
454 | re
455 {
456 $$ = $1;
457
458 if ( trlcontxt )
459 {
460 if ( lex_compat || (varlength && headcnt == 0) )
461 /* Both head and trail are
462 * variable-length.
463 */
464 variable_trail_rule = true;
465 else
466 trailcnt = rulelen;
467 }
468 }
469 ;
470
471
472re : re '|' series
473 {
474 varlength = true;
475 $$ = mkor( $1, $3 );
476 }
477
478 | series
479 { $$ = $1; }
480 ;
481
482
483re2 : re '/'
484 {
485 /* This rule is written separately so the
486 * reduction will occur before the trailing
487 * series is parsed.
488 */
489
490 if ( trlcontxt )
491 synerr( "trailing context used twice" );
492 else
493 trlcontxt = true;
494
495 if ( varlength )
496 /* We hope the trailing context is
497 * fixed-length.
498 */
499 varlength = false;
500 else
501 headcnt = rulelen;
502
503 rulelen = 0;
504
505 current_state_type = STATE_TRAILING_CONTEXT;
506 $$ = $1;
507 }
508 ;
509
510series : series singleton
511 {
512 /* This is where concatenation of adjacent patterns
513 * gets done.
514 */
515 $$ = link_machines( $1, $2 );
516 }
517
518 | singleton
519 { $$ = $1; }
520 ;
521
522singleton : singleton '*'
523 {
524 varlength = true;
525
526 $$ = mkclos( $1 );
527 }
528
529 | singleton '+'
530 {
531 varlength = true;
532 $$ = mkposcl( $1 );
533 }
534
535 | singleton '?'
536 {
537 varlength = true;
538 $$ = mkopt( $1 );
539 }
540
541 | singleton '{' NUMBER ',' NUMBER '}'
542 {
543 varlength = true;
544
545 if ( $3 > $5 || $3 < 0 )
546 {
547 synerr( "bad iteration values" );
548 $$ = $1;
549 }
550 else
551 {
552 if ( $3 == 0 )
553 {
554 if ( $5 <= 0 )
555 {
556 synerr(
557 "bad iteration values" );
558 $$ = $1;
559 }
560 else
561 $$ = mkopt(
562 mkrep( $1, 1, $5 ) );
563 }
564 else
565 $$ = mkrep( $1, $3, $5 );
566 }
567 }
568
569 | singleton '{' NUMBER ',' '}'
570 {
571 varlength = true;
572
573 if ( $3 <= 0 )
574 {
575 synerr( "iteration value must be positive" );
576 $$ = $1;
577 }
578
579 else
580 $$ = mkrep( $1, $3, INFINITY );
581 }
582
583 | singleton '{' NUMBER '}'
584 {
585 /* The singleton could be something like "(foo)",
586 * in which case we have no idea what its length
587 * is, so we punt here.
588 */
589 varlength = true;
590
591 if ( $3 <= 0 )
592 {
593 synerr( "iteration value must be positive" );
594 $$ = $1;
595 }
596
597 else
598 $$ = link_machines( $1,
599 copysingl( $1, $3 - 1 ) );
600 }
601
602 | '.'
603 {
604 if ( ! madeany )
605 {
606 /* Create the '.' character class. */
607 anyccl = cclinit();
608 ccladd( anyccl, '\n' );
609 cclnegate( anyccl );
610
611 if ( useecs )
612 mkeccl( ccltbl + cclmap[anyccl],
613 ccllen[anyccl], nextecm,
614 ecgroup, csize, csize );
615
616 madeany = true;
617 }
618
619 ++rulelen;
620
621 $$ = mkstate( -anyccl );
622 }
623
624 | fullccl
625 {
626 if ( ! cclsorted )
627 /* Sort characters for fast searching. We
628 * use a shell sort since this list could
629 * be large.
630 */
631 cshell( ccltbl + cclmap[$1], ccllen[$1], true );
632
633 if ( useecs )
634 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
635 nextecm, ecgroup, csize, csize );
636
637 ++rulelen;
638
639 $$ = mkstate( -$1 );
640 }
641
642 | PREVCCL
643 {
644 ++rulelen;
645
646 $$ = mkstate( -$1 );
647 }
648
649 | '"' string '"'
650 { $$ = $2; }
651
652 | '(' re ')'
653 { $$ = $2; }
654
655 | CHAR
656 {
657 ++rulelen;
658
659 if ( caseins && $1 >= 'A' && $1 <= 'Z' )
660 $1 = clower( $1 );
661
662 $$ = mkstate( $1 );
663 }
664 ;
665
666fullccl : '[' ccl ']'
667 { $$ = $2; }
668
669 | '[' '^' ccl ']'
670 {
671 cclnegate( $3 );
672 $$ = $3;
673 }
674 ;
675
676ccl : ccl CHAR '-' CHAR
677 {
678 if ( caseins )
679 {
680 if ( $2 >= 'A' && $2 <= 'Z' )
681 $2 = clower( $2 );
682 if ( $4 >= 'A' && $4 <= 'Z' )
683 $4 = clower( $4 );
684 }
685
686 if ( $2 > $4 )
687 synerr( "negative range in character class" );
688
689 else
690 {
691 for ( i = $2; i <= $4; ++i )
692 ccladd( $1, i );
693
694 /* Keep track if this ccl is staying in
695 * alphabetical order.
696 */
697 cclsorted = cclsorted && ($2 > lastchar);
698 lastchar = $4;
699 }
700
701 $$ = $1;
702 }
703
704 | ccl CHAR
705 {
706 if ( caseins && $2 >= 'A' && $2 <= 'Z' )
707 $2 = clower( $2 );
708
709 ccladd( $1, $2 );
710 cclsorted = cclsorted && ($2 > lastchar);
711 lastchar = $2;
712 $$ = $1;
713 }
714
715 | ccl ccl_expr
716 {
717 /* Too hard to properly maintain cclsorted. */
718 cclsorted = false;
719 $$ = $1;
720 }
721
722 |
723 {
724 cclsorted = true;
725 lastchar = 0;
726 currccl = $$ = cclinit();
727 }
728 ;
729
730ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum) }
731 | CCE_ALPHA { CCL_EXPR(isalpha) }
732 | CCE_BLANK { CCL_EXPR(IS_BLANK) }
733 | CCE_CNTRL { CCL_EXPR(iscntrl) }
734 | CCE_DIGIT { CCL_EXPR(isdigit) }
735 | CCE_GRAPH { CCL_EXPR(isgraph) }
736 | CCE_LOWER { CCL_EXPR(islower) }
737 | CCE_PRINT { CCL_EXPR(isprint) }
738 | CCE_PUNCT { CCL_EXPR(ispunct) }
739 | CCE_SPACE { CCL_EXPR(isspace) }
740 | CCE_UPPER {
741 if ( caseins )
742 CCL_EXPR(islower)
743 else
744 CCL_EXPR(isupper)
745 }
746 | CCE_XDIGIT { CCL_EXPR(isxdigit) }
747 ;
748
749string : string CHAR
750 {
751 if ( caseins && $2 >= 'A' && $2 <= 'Z' )
752 $2 = clower( $2 );
753
754 ++rulelen;
755
756 $$ = link_machines( $1, mkstate( $2 ) );
757 }
758
759 |
760 { $$ = mkstate( SYM_EPSILON ); }
761 ;
762
763%%
764
765
766/* build_eof_action - build the "<<EOF>>" action for the active start
767 * conditions
768 */
769
770void build_eof_action()
771 {
772 register int i;
773 char action_text[MAXLINE];
774
775 for ( i = 1; i <= scon_stk_ptr; ++i )
776 {
777 if ( sceof[scon_stk[i]] )
778 format_pinpoint_message(
779 "multiple <<EOF>> rules for start condition %s",
780 scname[scon_stk[i]] );
781
782 else
783 {
784 sceof[scon_stk[i]] = true;
785 sprintf( action_text, "case YY_STATE_EOF(%s):\n",
786 scname[scon_stk[i]] );
787 add_action( action_text );
788 }
789 }
790
791 line_directive_out( (FILE *) 0, 1 );
792
793 /* This isn't a normal rule after all - don't count it as
794 * such, so we don't have any holes in the rule numbering
795 * (which make generating "rule can never match" warnings
796 * more difficult.
797 */
798 --num_rules;
799 ++num_eof_rules;
800 }
801
802
803/* format_synerr - write out formatted syntax error */
804
805void format_synerr( msg, arg )
806char msg[], arg[];
807 {
808 char errmsg[MAXLINE];
809
810 (void) sprintf( errmsg, msg, arg );
811 synerr( errmsg );
812 }
813
814
815/* synerr - report a syntax error */
816
817void synerr( str )
818char str[];
819 {
820 syntaxerror = true;
821 pinpoint_message( str );
822 }
823
824
825/* format_warn - write out formatted warning */
826
827void format_warn( msg, arg )
828char msg[], arg[];
829 {
830 char warn_msg[MAXLINE];
831
832 (void) sprintf( warn_msg, msg, arg );
833 warn( warn_msg );
834 }
835
836
837/* warn - report a warning, unless -w was given */
838
839void warn( str )
840char str[];
841 {
842 line_warning( str, linenum );
843 }
844
845/* format_pinpoint_message - write out a message formatted with one string,
846 * pinpointing its location
847 */
848
849void format_pinpoint_message( msg, arg )
850char msg[], arg[];
851 {
852 char errmsg[MAXLINE];
853
854 (void) sprintf( errmsg, msg, arg );
855 pinpoint_message( errmsg );
856 }
857
858
859/* pinpoint_message - write out a message, pinpointing its location */
860
861void pinpoint_message( str )
862char str[];
863 {
864 line_pinpoint( str, linenum );
865 }
866
867
868/* line_warning - report a warning at a given line, unless -w was given */
869
870void line_warning( str, line )
871char str[];
872int line;
873 {
874 char warning[MAXLINE];
875
876 if ( ! nowarn )
877 {
878 sprintf( warning, "warning, %s", str );
879 line_pinpoint( warning, line );
880 }
881 }
882
883
884/* line_pinpoint - write out a message, pinpointing it at the given line */
885
886void line_pinpoint( str, line )
887char str[];
888int line;
889 {
890 fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str );
891 }
892
893
894/* yyerror - eat up an error message from the parser;
895 * currently, messages are ignore
896 */
897
898void yyerror( msg )
899char msg[];
900 {
901 }