Merge branch 'vendor/EXPAT'
[dragonfly.git] / contrib / flex / parse.y
1 /* parse.y - parser for flex input */
2
3 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
5 %token OPT_TABLES
6
7 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
8 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
9
10 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
11 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
12
13 %left CCL_OP_DIFF CCL_OP_UNION
14
15 /*
16  *POSIX and AT&T lex place the
17  * precedence of the repeat operator, {}, below that of concatenation.
18  * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
19  * Regular Expression (ERE) precedence that has the repeat operator
20  * higher than concatenation.  This causes ab{3} to yield abbb.
21  *
22  * In order to support the POSIX and AT&T precedence and the flex
23  * precedence we define two token sets for the begin and end tokens of
24  * the repeat operator, '{' and '}'.  The lexical scanner chooses
25  * which tokens to return based on whether posix_compat or lex_compat
26  * are specified. Specifying either posix_compat or lex_compat will
27  * cause flex to parse scanner files as per the AT&T and
28  * POSIX-mandated behavior.
29  */
30
31 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
32
33
34 %{
35 /*  Copyright (c) 1990 The Regents of the University of California. */
36 /*  All rights reserved. */
37
38 /*  This code is derived from software contributed to Berkeley by */
39 /*  Vern Paxson. */
40
41 /*  The United States Government has rights in this work pursuant */
42 /*  to contract no. DE-AC03-76SF00098 between the United States */
43 /*  Department of Energy and the University of California. */
44
45 /*  This file is part of flex. */
46
47 /*  Redistribution and use in source and binary forms, with or without */
48 /*  modification, are permitted provided that the following conditions */
49 /*  are met: */
50
51 /*  1. Redistributions of source code must retain the above copyright */
52 /*     notice, this list of conditions and the following disclaimer. */
53 /*  2. Redistributions in binary form must reproduce the above copyright */
54 /*     notice, this list of conditions and the following disclaimer in the */
55 /*     documentation and/or other materials provided with the distribution. */
56
57 /*  Neither the name of the University nor the names of its contributors */
58 /*  may be used to endorse or promote products derived from this software */
59 /*  without specific prior written permission. */
60
61 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
62 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
63 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
64 /*  PURPOSE. */
65
66 #include "flexdef.h"
67 #include "tables.h"
68
69 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
70 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
71
72 int *scon_stk;
73 int scon_stk_ptr;
74
75 static int madeany = false;  /* whether we've made the '.' character class */
76 static int ccldot, cclany;
77 int previous_continued_action;  /* whether the previous rule's action was '|' */
78
79 #define format_warn3(fmt, a1, a2) \
80         do{ \
81         char fw3_msg[MAXLINE];\
82         snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
83         warn( fw3_msg );\
84         }while(0)
85
86 /* Expand a POSIX character class expression. */
87 #define CCL_EXPR(func) \
88         do{ \
89         int c; \
90         for ( c = 0; c < csize; ++c ) \
91                 if ( isascii(c) && func(c) ) \
92                         ccladd( currccl, c ); \
93         }while(0)
94
95 /* negated class */
96 #define CCL_NEG_EXPR(func) \
97         do{ \
98         int c; \
99         for ( c = 0; c < csize; ++c ) \
100                 if ( !func(c) ) \
101                         ccladd( currccl, c ); \
102         }while(0)
103
104 /* While POSIX defines isblank(), it's not ANSI C. */
105 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
106
107 /* On some over-ambitious machines, such as DEC Alpha's, the default
108  * token type is "long" instead of "int"; this leads to problems with
109  * declaring yylval in flexdef.h.  But so far, all the yacc's I've seen
110  * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
111  * following should ensure that the default token type is "int".
112  */
113 #define YYSTYPE int
114
115 %}
116
117 %%
118 goal            :  initlex sect1 sect1end sect2 initforrule
119                         { /* add default rule */
120                         int def_rule;
121
122                         pat = cclinit();
123                         cclnegate( pat );
124
125                         def_rule = mkstate( -pat );
126
127                         /* Remember the number of the default rule so we
128                          * don't generate "can't match" warnings for it.
129                          */
130                         default_rule = num_rules;
131
132                         finish_rule( def_rule, false, 0, 0, 0);
133
134                         for ( i = 1; i <= lastsc; ++i )
135                                 scset[i] = mkbranch( scset[i], def_rule );
136
137                         if ( spprdflt )
138                                 add_action(
139                                 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
140                         else
141                                 add_action( "ECHO" );
142
143                         add_action( ";\n\tYY_BREAK\n" );
144                         }
145                 ;
146
147 initlex         :
148                         { /* initialize for processing rules */
149
150                         /* Create default DFA start condition. */
151                         scinstal( "INITIAL", false );
152                         }
153                 ;
154
155 sect1           :  sect1 startconddecl namelist1
156                 |  sect1 options
157                 |
158                 |  error
159                         { synerr( _("unknown error processing section 1") ); }
160                 ;
161
162 sect1end        :  SECTEND
163                         {
164                         check_options();
165                         scon_stk = allocate_integer_array( lastsc + 1 );
166                         scon_stk_ptr = 0;
167                         }
168                 ;
169
170 startconddecl   :  SCDECL
171                         { xcluflg = false; }
172
173                 |  XSCDECL
174                         { xcluflg = true; }
175                 ;
176
177 namelist1       :  namelist1 NAME
178                         { scinstal( nmstr, xcluflg ); }
179
180                 |  NAME
181                         { scinstal( nmstr, xcluflg ); }
182
183                 |  error
184                         { synerr( _("bad start condition list") ); }
185                 ;
186
187 options         :  OPTION_OP optionlist
188                 ;
189
190 optionlist      :  optionlist option
191                 |
192                 ;
193
194 option          :  OPT_OUTFILE '=' NAME
195                         {
196                         outfilename = copy_string( nmstr );
197                         did_outfilename = 1;
198                         }
199                 |  OPT_EXTRA_TYPE '=' NAME
200                         { extra_type = copy_string( nmstr ); }
201                 |  OPT_PREFIX '=' NAME
202                         { prefix = copy_string( nmstr ); }
203                 |  OPT_YYCLASS '=' NAME
204                         { yyclass = copy_string( nmstr ); }
205                 |  OPT_HEADER '=' NAME
206                         { headerfilename = copy_string( nmstr ); }
207             |  OPT_TABLES '=' NAME
208             { tablesext = true; tablesfilename = copy_string( nmstr ); }
209                 ;
210
211 sect2           :  sect2 scon initforrule flexrule '\n'
212                         { scon_stk_ptr = $2; }
213                 |  sect2 scon '{' sect2 '}'
214                         { scon_stk_ptr = $2; }
215                 |
216                 ;
217
218 initforrule     :
219                         {
220                         /* Initialize for a parse of one rule. */
221                         trlcontxt = variable_trail_rule = varlength = false;
222                         trailcnt = headcnt = rulelen = 0;
223                         current_state_type = STATE_NORMAL;
224                         previous_continued_action = continued_action;
225                         in_rule = true;
226
227                         new_rule();
228                         }
229                 ;
230
231 flexrule        :  '^' rule
232                         {
233                         pat = $2;
234                         finish_rule( pat, variable_trail_rule,
235                                 headcnt, trailcnt , previous_continued_action);
236
237                         if ( scon_stk_ptr > 0 )
238                                 {
239                                 for ( i = 1; i <= scon_stk_ptr; ++i )
240                                         scbol[scon_stk[i]] =
241                                                 mkbranch( scbol[scon_stk[i]],
242                                                                 pat );
243                                 }
244
245                         else
246                                 {
247                                 /* Add to all non-exclusive start conditions,
248                                  * including the default (0) start condition.
249                                  */
250
251                                 for ( i = 1; i <= lastsc; ++i )
252                                         if ( ! scxclu[i] )
253                                                 scbol[i] = mkbranch( scbol[i],
254                                                                         pat );
255                                 }
256
257                         if ( ! bol_needed )
258                                 {
259                                 bol_needed = true;
260
261                                 if ( performance_report > 1 )
262                                         pinpoint_message(
263                         "'^' operator results in sub-optimal performance" );
264                                 }
265                         }
266
267                 |  rule
268                         {
269                         pat = $1;
270                         finish_rule( pat, variable_trail_rule,
271                                 headcnt, trailcnt , previous_continued_action);
272
273                         if ( scon_stk_ptr > 0 )
274                                 {
275                                 for ( i = 1; i <= scon_stk_ptr; ++i )
276                                         scset[scon_stk[i]] =
277                                                 mkbranch( scset[scon_stk[i]],
278                                                                 pat );
279                                 }
280
281                         else
282                                 {
283                                 for ( i = 1; i <= lastsc; ++i )
284                                         if ( ! scxclu[i] )
285                                                 scset[i] =
286                                                         mkbranch( scset[i],
287                                                                 pat );
288                                 }
289                         }
290
291                 |  EOF_OP
292                         {
293                         if ( scon_stk_ptr > 0 )
294                                 build_eof_action();
295         
296                         else
297                                 {
298                                 /* This EOF applies to all start conditions
299                                  * which don't already have EOF actions.
300                                  */
301                                 for ( i = 1; i <= lastsc; ++i )
302                                         if ( ! sceof[i] )
303                                                 scon_stk[++scon_stk_ptr] = i;
304
305                                 if ( scon_stk_ptr == 0 )
306                                         warn(
307                         "all start conditions already have <<EOF>> rules" );
308
309                                 else
310                                         build_eof_action();
311                                 }
312                         }
313
314                 |  error
315                         { synerr( _("unrecognized rule") ); }
316                 ;
317
318 scon_stk_ptr    :
319                         { $$ = scon_stk_ptr; }
320                 ;
321
322 scon            :  '<' scon_stk_ptr namelist2 '>'
323                         { $$ = $2; }
324
325                 |  '<' '*' '>'
326                         {
327                         $$ = scon_stk_ptr;
328
329                         for ( i = 1; i <= lastsc; ++i )
330                                 {
331                                 int j;
332
333                                 for ( j = 1; j <= scon_stk_ptr; ++j )
334                                         if ( scon_stk[j] == i )
335                                                 break;
336
337                                 if ( j > scon_stk_ptr )
338                                         scon_stk[++scon_stk_ptr] = i;
339                                 }
340                         }
341
342                 |
343                         { $$ = scon_stk_ptr; }
344                 ;
345
346 namelist2       :  namelist2 ',' sconname
347
348                 |  sconname
349
350                 |  error
351                         { synerr( _("bad start condition list") ); }
352                 ;
353
354 sconname        :  NAME
355                         {
356                         if ( (scnum = sclookup( nmstr )) == 0 )
357                                 format_pinpoint_message(
358                                         "undeclared start condition %s",
359                                         nmstr );
360                         else
361                                 {
362                                 for ( i = 1; i <= scon_stk_ptr; ++i )
363                                         if ( scon_stk[i] == scnum )
364                                                 {
365                                                 format_warn(
366                                                         "<%s> specified twice",
367                                                         scname[scnum] );
368                                                 break;
369                                                 }
370
371                                 if ( i > scon_stk_ptr )
372                                         scon_stk[++scon_stk_ptr] = scnum;
373                                 }
374                         }
375                 ;
376
377 rule            :  re2 re
378                         {
379                         if ( transchar[lastst[$2]] != SYM_EPSILON )
380                                 /* Provide final transition \now/ so it
381                                  * will be marked as a trailing context
382                                  * state.
383                                  */
384                                 $2 = link_machines( $2,
385                                                 mkstate( SYM_EPSILON ) );
386
387                         mark_beginning_as_normal( $2 );
388                         current_state_type = STATE_NORMAL;
389
390                         if ( previous_continued_action )
391                                 {
392                                 /* We need to treat this as variable trailing
393                                  * context so that the backup does not happen
394                                  * in the action but before the action switch
395                                  * statement.  If the backup happens in the
396                                  * action, then the rules "falling into" this
397                                  * one's action will *also* do the backup,
398                                  * erroneously.
399                                  */
400                                 if ( ! varlength || headcnt != 0 )
401                                         warn(
402                 "trailing context made variable due to preceding '|' action" );
403
404                                 /* Mark as variable. */
405                                 varlength = true;
406                                 headcnt = 0;
407
408                                 }
409
410                         if ( lex_compat || (varlength && headcnt == 0) )
411                                 { /* variable trailing context rule */
412                                 /* Mark the first part of the rule as the
413                                  * accepting "head" part of a trailing
414                                  * context rule.
415                                  *
416                                  * By the way, we didn't do this at the
417                                  * beginning of this production because back
418                                  * then current_state_type was set up for a
419                                  * trail rule, and add_accept() can create
420                                  * a new state ...
421                                  */
422                                 add_accept( $1,
423                                         num_rules | YY_TRAILING_HEAD_MASK );
424                                 variable_trail_rule = true;
425                                 }
426                         
427                         else
428                                 trailcnt = rulelen;
429
430                         $$ = link_machines( $1, $2 );
431                         }
432
433                 |  re2 re '$'
434                         { synerr( _("trailing context used twice") ); }
435
436                 |  re '$'
437                         {
438                         headcnt = 0;
439                         trailcnt = 1;
440                         rulelen = 1;
441                         varlength = false;
442
443                         current_state_type = STATE_TRAILING_CONTEXT;
444
445                         if ( trlcontxt )
446                                 {
447                                 synerr( _("trailing context used twice") );
448                                 $$ = mkstate( SYM_EPSILON );
449                                 }
450
451                         else if ( previous_continued_action )
452                                 {
453                                 /* See the comment in the rule for "re2 re"
454                                  * above.
455                                  */
456                                 warn(
457                 "trailing context made variable due to preceding '|' action" );
458
459                                 varlength = true;
460                                 }
461
462                         if ( lex_compat || varlength )
463                                 {
464                                 /* Again, see the comment in the rule for
465                                  * "re2 re" above.
466                                  */
467                                 add_accept( $1,
468                                         num_rules | YY_TRAILING_HEAD_MASK );
469                                 variable_trail_rule = true;
470                                 }
471
472                         trlcontxt = true;
473
474                         eps = mkstate( SYM_EPSILON );
475                         $$ = link_machines( $1,
476                                 link_machines( eps, mkstate( '\n' ) ) );
477                         }
478
479                 |  re
480                         {
481                         $$ = $1;
482
483                         if ( trlcontxt )
484                                 {
485                                 if ( lex_compat || (varlength && headcnt == 0) )
486                                         /* Both head and trail are
487                                          * variable-length.
488                                          */
489                                         variable_trail_rule = true;
490                                 else
491                                         trailcnt = rulelen;
492                                 }
493                         }
494                 ;
495
496
497 re              :  re '|' series
498                         {
499                         varlength = true;
500                         $$ = mkor( $1, $3 );
501                         }
502
503                 |  series
504                         { $$ = $1; }
505                 ;
506
507
508 re2             :  re '/'
509                         {
510                         /* This rule is written separately so the
511                          * reduction will occur before the trailing
512                          * series is parsed.
513                          */
514
515                         if ( trlcontxt )
516                                 synerr( _("trailing context used twice") );
517                         else
518                                 trlcontxt = true;
519
520                         if ( varlength )
521                                 /* We hope the trailing context is
522                                  * fixed-length.
523                                  */
524                                 varlength = false;
525                         else
526                                 headcnt = rulelen;
527
528                         rulelen = 0;
529
530                         current_state_type = STATE_TRAILING_CONTEXT;
531                         $$ = $1;
532                         }
533                 ;
534
535 series          :  series singleton
536                         {
537                         /* This is where concatenation of adjacent patterns
538                          * gets done.
539                          */
540                         $$ = link_machines( $1, $2 );
541                         }
542
543                 |  singleton
544                         { $$ = $1; }
545
546                 |  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
547                         {
548                         varlength = true;
549
550                         if ( $3 > $5 || $3 < 0 )
551                                 {
552                                 synerr( _("bad iteration values") );
553                                 $$ = $1;
554                                 }
555                         else
556                                 {
557                                 if ( $3 == 0 )
558                                         {
559                                         if ( $5 <= 0 )
560                                                 {
561                                                 synerr(
562                                                 _("bad iteration values") );
563                                                 $$ = $1;
564                                                 }
565                                         else
566                                                 $$ = mkopt(
567                                                         mkrep( $1, 1, $5 ) );
568                                         }
569                                 else
570                                         $$ = mkrep( $1, $3, $5 );
571                                 }
572                         }
573
574                 |  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
575                         {
576                         varlength = true;
577
578                         if ( $3 <= 0 )
579                                 {
580                                 synerr( _("iteration value must be positive") );
581                                 $$ = $1;
582                                 }
583
584                         else
585                                 $$ = mkrep( $1, $3, INFINITE_REPEAT );
586                         }
587
588                 |  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
589                         {
590                         /* The series could be something like "(foo)",
591                          * in which case we have no idea what its length
592                          * is, so we punt here.
593                          */
594                         varlength = true;
595
596                         if ( $3 <= 0 )
597                                 {
598                                   synerr( _("iteration value must be positive")
599                                           );
600                                 $$ = $1;
601                                 }
602
603                         else
604                                 $$ = link_machines( $1,
605                                                 copysingl( $1, $3 - 1 ) );
606                         }
607
608                 ;
609
610 singleton       :  singleton '*'
611                         {
612                         varlength = true;
613
614                         $$ = mkclos( $1 );
615                         }
616
617                 |  singleton '+'
618                         {
619                         varlength = true;
620                         $$ = mkposcl( $1 );
621                         }
622
623                 |  singleton '?'
624                         {
625                         varlength = true;
626                         $$ = mkopt( $1 );
627                         }
628
629                 |  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
630                         {
631                         varlength = true;
632
633                         if ( $3 > $5 || $3 < 0 )
634                                 {
635                                 synerr( _("bad iteration values") );
636                                 $$ = $1;
637                                 }
638                         else
639                                 {
640                                 if ( $3 == 0 )
641                                         {
642                                         if ( $5 <= 0 )
643                                                 {
644                                                 synerr(
645                                                 _("bad iteration values") );
646                                                 $$ = $1;
647                                                 }
648                                         else
649                                                 $$ = mkopt(
650                                                         mkrep( $1, 1, $5 ) );
651                                         }
652                                 else
653                                         $$ = mkrep( $1, $3, $5 );
654                                 }
655                         }
656
657                 |  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
658                         {
659                         varlength = true;
660
661                         if ( $3 <= 0 )
662                                 {
663                                 synerr( _("iteration value must be positive") );
664                                 $$ = $1;
665                                 }
666
667                         else
668                                 $$ = mkrep( $1, $3, INFINITE_REPEAT );
669                         }
670
671                 |  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
672                         {
673                         /* The singleton could be something like "(foo)",
674                          * in which case we have no idea what its length
675                          * is, so we punt here.
676                          */
677                         varlength = true;
678
679                         if ( $3 <= 0 )
680                                 {
681                                 synerr( _("iteration value must be positive") );
682                                 $$ = $1;
683                                 }
684
685                         else
686                                 $$ = link_machines( $1,
687                                                 copysingl( $1, $3 - 1 ) );
688                         }
689
690                 |  '.'
691                         {
692                         if ( ! madeany )
693                                 {
694                                 /* Create the '.' character class. */
695                     ccldot = cclinit();
696                     ccladd( ccldot, '\n' );
697                     cclnegate( ccldot );
698
699                     if ( useecs )
700                         mkeccl( ccltbl + cclmap[ccldot],
701                             ccllen[ccldot], nextecm,
702                             ecgroup, csize, csize );
703
704                                 /* Create the (?s:'.') character class. */
705                     cclany = cclinit();
706                     cclnegate( cclany );
707
708                     if ( useecs )
709                         mkeccl( ccltbl + cclmap[cclany],
710                             ccllen[cclany], nextecm,
711                             ecgroup, csize, csize );
712
713                                 madeany = true;
714                                 }
715
716                         ++rulelen;
717
718             if (sf_dot_all())
719                 $$ = mkstate( -cclany );
720             else
721                 $$ = mkstate( -ccldot );
722                         }
723
724                 |  fullccl
725                         {
726                                 /* Sort characters for fast searching.
727                                  */
728                                 qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp );
729
730                         if ( useecs )
731                                 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
732                                         nextecm, ecgroup, csize, csize );
733
734                         ++rulelen;
735
736                         if (ccl_has_nl[$1])
737                                 rule_has_nl[num_rules] = true;
738
739                         $$ = mkstate( -$1 );
740                         }
741
742                 |  PREVCCL
743                         {
744                         ++rulelen;
745
746                         if (ccl_has_nl[$1])
747                                 rule_has_nl[num_rules] = true;
748
749                         $$ = mkstate( -$1 );
750                         }
751
752                 |  '"' string '"'
753                         { $$ = $2; }
754
755                 |  '(' re ')'
756                         { $$ = $2; }
757
758                 |  CHAR
759                         {
760                         ++rulelen;
761
762                         if ($1 == nlch)
763                                 rule_has_nl[num_rules] = true;
764
765             if (sf_case_ins() && has_case($1))
766                 /* create an alternation, as in (a|A) */
767                 $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
768             else
769                 $$ = mkstate( $1 );
770                         }
771                 ;
772 fullccl:
773         fullccl CCL_OP_DIFF  braceccl  { $$ = ccl_set_diff  ($1, $3); }
774     |   fullccl CCL_OP_UNION braceccl  { $$ = ccl_set_union ($1, $3); }
775     |   braceccl
776     ;
777
778 braceccl: 
779
780             '[' ccl ']' { $$ = $2; }
781
782                 |  '[' '^' ccl ']'
783                         {
784                         cclnegate( $3 );
785                         $$ = $3;
786                         }
787                 ;
788
789 ccl             :  ccl CHAR '-' CHAR
790                         {
791
792                         if (sf_case_ins())
793                           {
794
795                             /* If one end of the range has case and the other
796                              * does not, or the cases are different, then we're not
797                              * sure what range the user is trying to express.
798                              * Examples: [@-z] or [S-t]
799                              */
800                             if (has_case ($2) != has_case ($4)
801                                      || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
802                                      || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
803                               format_warn3 (
804                               _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
805                                             $2, $4);
806
807                             /* If the range spans uppercase characters but not
808                              * lowercase (or vice-versa), then should we automatically
809                              * include lowercase characters in the range?
810                              * Example: [@-_] spans [a-z] but not [A-Z]
811                              */
812                             else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
813                               format_warn3 (
814                               _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
815                                             $2, $4);
816                           }
817
818                         if ( $2 > $4 )
819                                 synerr( _("negative range in character class") );
820
821                         else
822                                 {
823                                 for ( i = $2; i <= $4; ++i )
824                                         ccladd( $1, i );
825
826                                 /* Keep track if this ccl is staying in
827                                  * alphabetical order.
828                                  */
829                                 cclsorted = cclsorted && ($2 > lastchar);
830                                 lastchar = $4;
831
832                 /* Do it again for upper/lowercase */
833                 if (sf_case_ins() && has_case($2) && has_case($4)){
834                     $2 = reverse_case ($2);
835                     $4 = reverse_case ($4);
836                     
837                     for ( i = $2; i <= $4; ++i )
838                         ccladd( $1, i );
839
840                     cclsorted = cclsorted && ($2 > lastchar);
841                     lastchar = $4;
842                 }
843
844                                 }
845
846                         $$ = $1;
847                         }
848
849                 |  ccl CHAR
850                         {
851                         ccladd( $1, $2 );
852                         cclsorted = cclsorted && ($2 > lastchar);
853                         lastchar = $2;
854
855             /* Do it again for upper/lowercase */
856             if (sf_case_ins() && has_case($2)){
857                 $2 = reverse_case ($2);
858                 ccladd ($1, $2);
859
860                 cclsorted = cclsorted && ($2 > lastchar);
861                 lastchar = $2;
862             }
863
864                         $$ = $1;
865                         }
866
867                 |  ccl ccl_expr
868                         {
869                         /* Too hard to properly maintain cclsorted. */
870                         cclsorted = false;
871                         $$ = $1;
872                         }
873
874                 |
875                         {
876                         cclsorted = true;
877                         lastchar = 0;
878                         currccl = $$ = cclinit();
879                         }
880                 ;
881
882 ccl_expr:          
883            CCE_ALNUM    { CCL_EXPR(isalnum); }
884                 |  CCE_ALPHA    { CCL_EXPR(isalpha); }
885                 |  CCE_BLANK    { CCL_EXPR(IS_BLANK); }
886                 |  CCE_CNTRL    { CCL_EXPR(iscntrl); }
887                 |  CCE_DIGIT    { CCL_EXPR(isdigit); }
888                 |  CCE_GRAPH    { CCL_EXPR(isgraph); }
889                 |  CCE_LOWER    { 
890                           CCL_EXPR(islower);
891                           if (sf_case_ins())
892                               CCL_EXPR(isupper);
893                         }
894                 |  CCE_PRINT    { CCL_EXPR(isprint); }
895                 |  CCE_PUNCT    { CCL_EXPR(ispunct); }
896                 |  CCE_SPACE    { CCL_EXPR(isspace); }
897                 |  CCE_XDIGIT   { CCL_EXPR(isxdigit); }
898                 |  CCE_UPPER    {
899                     CCL_EXPR(isupper);
900                     if (sf_case_ins())
901                         CCL_EXPR(islower);
902                                 }
903
904         |  CCE_NEG_ALNUM        { CCL_NEG_EXPR(isalnum); }
905                 |  CCE_NEG_ALPHA        { CCL_NEG_EXPR(isalpha); }
906                 |  CCE_NEG_BLANK        { CCL_NEG_EXPR(IS_BLANK); }
907                 |  CCE_NEG_CNTRL        { CCL_NEG_EXPR(iscntrl); }
908                 |  CCE_NEG_DIGIT        { CCL_NEG_EXPR(isdigit); }
909                 |  CCE_NEG_GRAPH        { CCL_NEG_EXPR(isgraph); }
910                 |  CCE_NEG_PRINT        { CCL_NEG_EXPR(isprint); }
911                 |  CCE_NEG_PUNCT        { CCL_NEG_EXPR(ispunct); }
912                 |  CCE_NEG_SPACE        { CCL_NEG_EXPR(isspace); }
913                 |  CCE_NEG_XDIGIT       { CCL_NEG_EXPR(isxdigit); }
914                 |  CCE_NEG_LOWER        { 
915                                 if ( sf_case_ins() )
916                                         warn(_("[:^lower:] is ambiguous in case insensitive scanner"));
917                                 else
918                                         CCL_NEG_EXPR(islower);
919                                 }
920                 |  CCE_NEG_UPPER        {
921                                 if ( sf_case_ins() )
922                                         warn(_("[:^upper:] ambiguous in case insensitive scanner"));
923                                 else
924                                         CCL_NEG_EXPR(isupper);
925                                 }
926                 ;
927                 
928 string          :  string CHAR
929                         {
930                         if ( $2 == nlch )
931                                 rule_has_nl[num_rules] = true;
932
933                         ++rulelen;
934
935             if (sf_case_ins() && has_case($2))
936                 $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
937             else
938                 $$ = mkstate ($2);
939
940                         $$ = link_machines( $1, $$);
941                         }
942
943                 |
944                         { $$ = mkstate( SYM_EPSILON ); }
945                 ;
946
947 %%
948
949
950 /* build_eof_action - build the "<<EOF>>" action for the active start
951  *                    conditions
952  */
953
954 void build_eof_action()
955         {
956         register int i;
957         char action_text[MAXLINE];
958
959         for ( i = 1; i <= scon_stk_ptr; ++i )
960                 {
961                 if ( sceof[scon_stk[i]] )
962                         format_pinpoint_message(
963                                 "multiple <<EOF>> rules for start condition %s",
964                                 scname[scon_stk[i]] );
965
966                 else
967                         {
968                         sceof[scon_stk[i]] = true;
969
970                         if (previous_continued_action /* && previous action was regular */)
971                                 add_action("YY_RULE_SETUP\n");
972
973                         snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
974                                 scname[scon_stk[i]] );
975                         add_action( action_text );
976                         }
977                 }
978
979         line_directive_out( (FILE *) 0, 1 );
980
981         /* This isn't a normal rule after all - don't count it as
982          * such, so we don't have any holes in the rule numbering
983          * (which make generating "rule can never match" warnings
984          * more difficult.
985          */
986         --num_rules;
987         ++num_eof_rules;
988         }
989
990
991 /* format_synerr - write out formatted syntax error */
992
993 void format_synerr( msg, arg )
994 const char *msg, arg[];
995         {
996         char errmsg[MAXLINE];
997
998         (void) snprintf( errmsg, sizeof(errmsg), msg, arg );
999         synerr( errmsg );
1000         }
1001
1002
1003 /* synerr - report a syntax error */
1004
1005 void synerr( str )
1006 const char *str;
1007         {
1008         syntaxerror = true;
1009         pinpoint_message( str );
1010         }
1011
1012
1013 /* format_warn - write out formatted warning */
1014
1015 void format_warn( msg, arg )
1016 const char *msg, arg[];
1017         {
1018         char warn_msg[MAXLINE];
1019
1020         snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1021         warn( warn_msg );
1022         }
1023
1024
1025 /* warn - report a warning, unless -w was given */
1026
1027 void warn( str )
1028 const char *str;
1029         {
1030         line_warning( str, linenum );
1031         }
1032
1033 /* format_pinpoint_message - write out a message formatted with one string,
1034  *                           pinpointing its location
1035  */
1036
1037 void format_pinpoint_message( msg, arg )
1038 const char *msg, arg[];
1039         {
1040         char errmsg[MAXLINE];
1041
1042         snprintf( errmsg, sizeof(errmsg), msg, arg );
1043         pinpoint_message( errmsg );
1044         }
1045
1046
1047 /* pinpoint_message - write out a message, pinpointing its location */
1048
1049 void pinpoint_message( str )
1050 const char *str;
1051         {
1052         line_pinpoint( str, linenum );
1053         }
1054
1055
1056 /* line_warning - report a warning at a given line, unless -w was given */
1057
1058 void line_warning( str, line )
1059 const char *str;
1060 int line;
1061         {
1062         char warning[MAXLINE];
1063
1064         if ( ! nowarn )
1065                 {
1066                 snprintf( warning, sizeof(warning), "warning, %s", str );
1067                 line_pinpoint( warning, line );
1068                 }
1069         }
1070
1071
1072 /* line_pinpoint - write out a message, pinpointing it at the given line */
1073
1074 void line_pinpoint( str, line )
1075 const char *str;
1076 int line;
1077         {
1078         fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1079         }
1080
1081
1082 /* yyerror - eat up an error message from the parser;
1083  *           currently, messages are ignore
1084  */
1085
1086 void yyerror( msg )
1087 const char *msg;
1088         {
1089         }