Initial import from FreeBSD RELENG_4:
[dragonfly.git] / usr.bin / lex / scan.l
1 /* scan.l - scanner for flex input */
2
3 %{
4 /*-
5  * Copyright (c) 1990 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Vern Paxson.
10  * 
11  * The United States Government has rights in this work pursuant
12  * to contract no. DE-AC03-76SF00098 between the United States
13  * Department of Energy and the University of California.
14  *
15  * Redistribution and use in source and binary forms are permitted provided
16  * that: (1) source distributions retain this entire copyright notice and
17  * comment, and (2) distributions including binaries display the following
18  * acknowledgement:  ``This product includes software developed by the
19  * University of California, Berkeley and its contributors'' in the
20  * documentation or other materials provided with the distribution and in
21  * all advertising materials mentioning features or use of this software.
22  * Neither the name of the University nor the names of its contributors may
23  * be used to endorse or promote products derived from this software without
24  * specific prior written permission.
25  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
26  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
27  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
28  */
29
30 /* $Header: /home/daffy/u0/vern/flex/RCS/scan.l,v 2.56 95/04/24 12:17:19 vern Exp $ */
31 /* $FreeBSD: src/usr.bin/lex/scan.l,v 1.5 1999/10/27 07:56:46 obrien Exp $ */
32
33 #include "flexdef.h"
34 #include "parse.h"
35
36 #define ACTION_ECHO add_action( yytext )
37 #define ACTION_IFDEF(def, should_define) \
38         { \
39         if ( should_define ) \
40                 action_define( def, 1 ); \
41         }
42
43 #define MARK_END_OF_PROLOG mark_prolog();
44
45 #define YY_DECL \
46         int flexscan()
47
48 #define RETURNCHAR \
49         yylval = (unsigned char) yytext[0]; \
50         return CHAR;
51
52 #define RETURNNAME \
53         strcpy( nmstr, yytext ); \
54         return NAME;
55
56 #define PUT_BACK_STRING(str, start) \
57         for ( i = strlen( str ) - 1; i >= start; --i ) \
58                 unput((str)[i])
59
60 #define CHECK_REJECT(str) \
61         if ( all_upper( str ) ) \
62                 reject = true;
63
64 #define CHECK_YYMORE(str) \
65         if ( all_lower( str ) ) \
66                 yymore_used = true;
67 %}
68
69 %option caseless nodefault outfile="scan.c" stack noyy_top_state
70 %option nostdinit
71
72 %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
73 %x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION
74 %x OPTION LINEDIR
75
76 WS              [[:blank:]]+
77 OPTWS           [[:blank:]]*
78 NOT_WS          [^[:blank:]\n]
79
80 NL              \r?\n
81
82 NAME            ([[:alpha:]_][[:alnum:]_-]*)
83 NOT_NAME        [^[:alpha:]_*\n]+
84
85 SCNAME          {NAME}
86
87 ESCSEQ          (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))
88
89 FIRST_CCL_CHAR  ([^\\\n]|{ESCSEQ})
90 CCL_CHAR        ([^\\\n\]]|{ESCSEQ})
91 CCL_EXPR        ("[:"[[:alpha:]]+":]")
92
93 LEXOPT          [aceknopr]
94
95 %%
96         static int bracelevel, didadef, indented_code;
97         static int doing_rule_action = false;
98         static int option_sense;
99
100         int doing_codeblock = false;
101         int i;
102         Char nmdef[MAXLINE], myesc();
103
104
105 <INITIAL>{
106         ^{WS}           indented_code = true; BEGIN(CODEBLOCK);
107         ^"/*"           ACTION_ECHO; yy_push_state( COMMENT );
108         ^#{OPTWS}line{WS}       yy_push_state( LINEDIR );
109         ^"%s"{NAME}?    return SCDECL;
110         ^"%x"{NAME}?    return XSCDECL;
111         ^"%{".*{NL}     {
112                         ++linenum;
113                         line_directive_out( (FILE *) 0, 1 );
114                         indented_code = false;
115                         BEGIN(CODEBLOCK);
116                         }
117
118         {WS}            /* discard */
119
120         ^"%%".*         {
121                         sectnum = 2;
122                         bracelevel = 0;
123                         mark_defs1();
124                         line_directive_out( (FILE *) 0, 1 );
125                         BEGIN(SECT2PROLOG);
126                         return SECTEND;
127                         }
128
129         ^"%pointer".*{NL}       yytext_is_array = false; ++linenum;
130         ^"%array".*{NL}         yytext_is_array = true; ++linenum;
131
132         ^"%option"      BEGIN(OPTION); return OPTION_OP;
133
134         ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL}      ++linenum; /* ignore */
135         ^"%"{LEXOPT}{WS}.*{NL}  ++linenum;      /* ignore */
136
137         ^"%"[^sxaceknopr{}].*   synerr( _( "unrecognized '%' directive" ) );
138
139         ^{NAME}         {
140                         strcpy( nmstr, yytext );
141                         didadef = false;
142                         BEGIN(PICKUPDEF);
143                         }
144
145         {SCNAME}        RETURNNAME;
146         ^{OPTWS}{NL}    ++linenum; /* allows blank lines in section 1 */
147         {OPTWS}{NL}     ACTION_ECHO; ++linenum; /* maybe end of comment line */
148 }
149
150
151 <COMMENT>{
152         "*/"            ACTION_ECHO; yy_pop_state();
153         "*"             ACTION_ECHO;
154         [^*\n]+         ACTION_ECHO;
155         [^*\n]*{NL}     ++linenum; ACTION_ECHO;
156 }
157
158 <LINEDIR>{
159         \n              yy_pop_state();
160         [[:digit:]]+    linenum = myctoi( yytext );
161
162         \"[^"\n]*\"     {
163                         flex_free( (void *) infilename );
164                         infilename = copy_string( yytext + 1 );
165                         infilename[strlen( infilename ) - 1] = '\0';
166                         }
167         .               /* ignore spurious characters */
168 }
169
170 <CODEBLOCK>{
171         ^"%}".*{NL}     ++linenum; BEGIN(INITIAL);
172
173         {NAME}|{NOT_NAME}|.     ACTION_ECHO;
174
175         {NL}            {
176                         ++linenum;
177                         ACTION_ECHO;
178                         if ( indented_code )
179                                 BEGIN(INITIAL);
180                         }
181 }
182
183
184 <PICKUPDEF>{
185         {WS}            /* separates name and definition */
186
187         {NOT_WS}.*      {
188                         strcpy( (char *) nmdef, yytext );
189
190                         /* Skip trailing whitespace. */
191                         for ( i = strlen( (char *) nmdef ) - 1;
192                               i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t');
193                               --i )
194                                 ;
195
196                         nmdef[i + 1] = '\0';
197
198                         ndinstal( nmstr, nmdef );
199                         didadef = true;
200                         }
201
202         {NL}            {
203                         if ( ! didadef )
204                                 synerr( _( "incomplete name definition" ) );
205                         BEGIN(INITIAL);
206                         ++linenum;
207                         }
208 }
209
210
211 <OPTION>{
212         {NL}            ++linenum; BEGIN(INITIAL);
213         {WS}            option_sense = true;
214
215         "="             return '=';
216
217         no              option_sense = ! option_sense;
218
219         7bit            csize = option_sense ? 128 : 256;
220         8bit            csize = option_sense ? 256 : 128;
221
222         align           long_align = option_sense;
223         always-interactive      {
224                         action_define( "YY_ALWAYS_INTERACTIVE", option_sense );
225                         }
226         array           yytext_is_array = option_sense;
227         backup          backing_up_report = option_sense;
228         batch           interactive = ! option_sense;
229         "c++"           C_plus_plus = option_sense;
230         caseful|case-sensitive          caseins = ! option_sense;
231         caseless|case-insensitive       caseins = option_sense;
232         debug           ddebug = option_sense;
233         default         spprdflt = ! option_sense;
234         ecs             useecs = option_sense;
235         fast            {
236                         useecs = usemecs = false;
237                         use_read = fullspd = true;
238                         }
239         full            {
240                         useecs = usemecs = false;
241                         use_read = fulltbl = true;
242                         }
243         input           ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
244         interactive     interactive = option_sense;
245         lex-compat      lex_compat = option_sense;
246         main            {
247                         action_define( "YY_MAIN", option_sense );
248                         do_yywrap = ! option_sense;
249                         }
250         meta-ecs        usemecs = option_sense;
251         never-interactive       {
252                         action_define( "YY_NEVER_INTERACTIVE", option_sense );
253                         }
254         perf-report     performance_report += option_sense ? 1 : -1;
255         pointer         yytext_is_array = ! option_sense;
256         read            use_read = option_sense;
257         reject          reject_really_used = option_sense;
258         stack           action_define( "YY_STACK_USED", option_sense );
259         stdinit         do_stdinit = option_sense;
260         stdout          use_stdout = option_sense;
261         unput           ACTION_IFDEF("YY_NO_UNPUT", ! option_sense);
262         verbose         printstats = option_sense;
263         warn            nowarn = ! option_sense;
264         yylineno        do_yylineno = option_sense;
265         yymore          yymore_really_used = option_sense;
266         yywrap          do_yywrap = option_sense;
267
268         yy_push_state   ACTION_IFDEF("YY_NO_PUSH_STATE", ! option_sense);
269         yy_pop_state    ACTION_IFDEF("YY_NO_POP_STATE", ! option_sense);
270         yy_top_state    ACTION_IFDEF("YY_NO_TOP_STATE", ! option_sense);
271
272         yy_scan_buffer  ACTION_IFDEF("YY_NO_SCAN_BUFFER", ! option_sense);
273         yy_scan_bytes   ACTION_IFDEF("YY_NO_SCAN_BYTES", ! option_sense);
274         yy_scan_string  ACTION_IFDEF("YY_NO_SCAN_STRING", ! option_sense);
275
276         outfile         return OPT_OUTFILE;
277         prefix          return OPT_PREFIX;
278         yyclass         return OPT_YYCLASS;
279
280         \"[^"\n]*\"     {
281                         strcpy( nmstr, yytext + 1 );
282                         nmstr[strlen( nmstr ) - 1] = '\0';
283                         return NAME;
284                         }
285
286         (([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. {
287                         format_synerr( _( "unrecognized %%option: %s" ),
288                                 yytext );
289                         BEGIN(RECOVER);
290                         }
291 }
292
293 <RECOVER>.*{NL}         ++linenum; BEGIN(INITIAL);
294
295
296 <SECT2PROLOG>{
297         ^"%{".* ++bracelevel; yyless( 2 );      /* eat only %{ */
298         ^"%}".* --bracelevel; yyless( 2 );      /* eat only %} */
299
300         ^{WS}.* ACTION_ECHO;    /* indented code in prolog */
301
302         ^{NOT_WS}.*     {       /* non-indented code */
303                         if ( bracelevel <= 0 )
304                                 { /* not in %{ ... %} */
305                                 yyless( 0 );    /* put it all back */
306                                 yy_set_bol( 1 );
307                                 mark_prolog();
308                                 BEGIN(SECT2);
309                                 }
310                         else
311                                 ACTION_ECHO;
312                         }
313
314         .*              ACTION_ECHO;
315         {NL}    ++linenum; ACTION_ECHO;
316
317         <<EOF>>         {
318                         mark_prolog();
319                         sectnum = 0;
320                         yyterminate(); /* to stop the parser */
321                         }
322 }
323
324 <SECT2>{
325         ^{OPTWS}{NL}    ++linenum; /* allow blank lines in section 2 */
326
327         ^{OPTWS}"%{"    {
328                         indented_code = false;
329                         doing_codeblock = true;
330                         bracelevel = 1;
331                         BEGIN(PERCENT_BRACE_ACTION);
332                         }
333
334         ^{OPTWS}"<"     BEGIN(SC); return '<';
335         ^{OPTWS}"^"     return '^';
336         \"              BEGIN(QUOTE); return '"';
337         "{"/[[:digit:]] BEGIN(NUM); return '{';
338         "$"/([[:blank:]]|{NL})  return '$';
339
340         {WS}"%{"                {
341                         bracelevel = 1;
342                         BEGIN(PERCENT_BRACE_ACTION);
343
344                         if ( in_rule )
345                                 {
346                                 doing_rule_action = true;
347                                 in_rule = false;
348                                 return '\n';
349                                 }
350                         }
351         {WS}"|".*{NL}   continued_action = true; ++linenum; return '\n';
352
353         ^{WS}"/*"       {
354                         yyless( yyleng - 2 );   /* put back '/', '*' */
355                         bracelevel = 0;
356                         continued_action = false;
357                         BEGIN(ACTION);
358                         }
359
360         ^{WS}           /* allow indented rules */
361
362         {WS}            {
363                         /* This rule is separate from the one below because
364                          * otherwise we get variable trailing context, so
365                          * we can't build the scanner using -{f,F}.
366                          */
367                         bracelevel = 0;
368                         continued_action = false;
369                         BEGIN(ACTION);
370
371                         if ( in_rule )
372                                 {
373                                 doing_rule_action = true;
374                                 in_rule = false;
375                                 return '\n';
376                                 }
377                         }
378
379         {OPTWS}{NL}     {
380                         bracelevel = 0;
381                         continued_action = false;
382                         BEGIN(ACTION);
383                         unput( '\n' );  /* so <ACTION> sees it */
384
385                         if ( in_rule )
386                                 {
387                                 doing_rule_action = true;
388                                 in_rule = false;
389                                 return '\n';
390                                 }
391                         }
392
393         ^{OPTWS}"<<EOF>>"       |
394         "<<EOF>>"       return EOF_OP;
395
396         ^"%%".*         {
397                         sectnum = 3;
398                         BEGIN(SECT3);
399                         yyterminate(); /* to stop the parser */
400                         }
401
402         "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})*        {
403                         int cclval;
404
405                         strcpy( nmstr, yytext );
406
407                         /* Check to see if we've already encountered this
408                          * ccl.
409                          */
410                         if ( (cclval = ccllookup( (Char *) nmstr )) != 0 )
411                                 {
412                                 if ( input() != ']' )
413                                         synerr( _( "bad character class" ) );
414
415                                 yylval = cclval;
416                                 ++cclreuse;
417                                 return PREVCCL;
418                                 }
419                         else
420                                 {
421                                 /* We fudge a bit.  We know that this ccl will
422                                  * soon be numbered as lastccl + 1 by cclinit.
423                                  */
424                                 cclinstal( (Char *) nmstr, lastccl + 1 );
425
426                                 /* Push back everything but the leading bracket
427                                  * so the ccl can be rescanned.
428                                  */
429                                 yyless( 1 );
430
431                                 BEGIN(FIRSTCCL);
432                                 return '[';
433                                 }
434                         }
435
436         "{"{NAME}"}"    {
437                         register Char *nmdefptr;
438                         Char *ndlookup();
439
440                         strcpy( nmstr, yytext + 1 );
441                         nmstr[yyleng - 2] = '\0';  /* chop trailing brace */
442
443                         if ( (nmdefptr = ndlookup( nmstr )) == 0 )
444                                 format_synerr(
445                                         _( "undefined definition {%s}" ),
446                                                 nmstr );
447
448                         else
449                                 { /* push back name surrounded by ()'s */
450                                 int len = strlen( (char *) nmdefptr );
451
452                                 if ( lex_compat || nmdefptr[0] == '^' ||
453                                      (len > 0 && nmdefptr[len - 1] == '$') )
454                                         { /* don't use ()'s after all */
455                                         PUT_BACK_STRING((char *) nmdefptr, 0);
456
457                                         if ( nmdefptr[0] == '^' )
458                                                 BEGIN(CARETISBOL);
459                                         }
460
461                                 else
462                                         {
463                                         unput(')');
464                                         PUT_BACK_STRING((char *) nmdefptr, 0);
465                                         unput('(');
466                                         }
467                                 }
468                         }
469
470         [/|*+?.(){}]    return (unsigned char) yytext[0];
471         .               RETURNCHAR;
472 }
473
474
475 <SC>{
476         [,*]            return (unsigned char) yytext[0];
477         ">"             BEGIN(SECT2); return '>';
478         ">"/^           BEGIN(CARETISBOL); return '>';
479         {SCNAME}        RETURNNAME;
480         .               {
481                         format_synerr( _( "bad <start condition>: %s" ),
482                                 yytext );
483                         }
484 }
485
486 <CARETISBOL>"^"         BEGIN(SECT2); return '^';
487
488
489 <QUOTE>{
490         [^"\n]          RETURNCHAR;
491         \"              BEGIN(SECT2); return '"';
492
493         {NL}            {
494                         synerr( _( "missing quote" ) );
495                         BEGIN(SECT2);
496                         ++linenum;
497                         return '"';
498                         }
499 }
500
501
502 <FIRSTCCL>{
503         "^"/[^-\]\n]    BEGIN(CCL); return '^';
504         "^"/("-"|"]")   return '^';
505         .               BEGIN(CCL); RETURNCHAR;
506 }
507
508 <CCL>{
509         -/[^\]\n]       return '-';
510         [^\]\n]         RETURNCHAR;
511         "]"             BEGIN(SECT2); return ']';
512         .|{NL}          {
513                         synerr( _( "bad character class" ) );
514                         BEGIN(SECT2);
515                         return ']';
516                         }
517 }
518
519 <FIRSTCCL,CCL>{
520         "[:alnum:]"     BEGIN(CCL); return CCE_ALNUM;
521         "[:alpha:]"     BEGIN(CCL); return CCE_ALPHA;
522         "[:blank:]"     BEGIN(CCL); return CCE_BLANK;
523         "[:cntrl:]"     BEGIN(CCL); return CCE_CNTRL;
524         "[:digit:]"     BEGIN(CCL); return CCE_DIGIT;
525         "[:graph:]"     BEGIN(CCL); return CCE_GRAPH;
526         "[:lower:]"     BEGIN(CCL); return CCE_LOWER;
527         "[:print:]"     BEGIN(CCL); return CCE_PRINT;
528         "[:punct:]"     BEGIN(CCL); return CCE_PUNCT;
529         "[:space:]"     BEGIN(CCL); return CCE_SPACE;
530         "[:upper:]"     BEGIN(CCL); return CCE_UPPER;
531         "[:xdigit:]"    BEGIN(CCL); return CCE_XDIGIT;
532         {CCL_EXPR}      {
533                         format_synerr(
534                                 _( "bad character class expression: %s" ),
535                                         yytext );
536                         BEGIN(CCL); return CCE_ALNUM;
537                         }
538 }
539
540 <NUM>{
541         [[:digit:]]+    {
542                         yylval = myctoi( yytext );
543                         return NUMBER;
544                         }
545
546         ","             return ',';
547         "}"             BEGIN(SECT2); return '}';
548
549         .               {
550                         synerr( _( "bad character inside {}'s" ) );
551                         BEGIN(SECT2);
552                         return '}';
553                         }
554
555         {NL}            {
556                         synerr( _( "missing }" ) );
557                         BEGIN(SECT2);
558                         ++linenum;
559                         return '}';
560                         }
561 }
562
563
564 <PERCENT_BRACE_ACTION>{
565         {OPTWS}"%}".*           bracelevel = 0;
566
567         <ACTION>"/*"            ACTION_ECHO; yy_push_state( COMMENT );
568
569         <CODEBLOCK,ACTION>{
570                 "reject"        {
571                         ACTION_ECHO;
572                         CHECK_REJECT(yytext);
573                         }
574                 "yymore"        {
575                         ACTION_ECHO;
576                         CHECK_YYMORE(yytext);
577                         }
578         }
579
580         {NAME}|{NOT_NAME}|.     ACTION_ECHO;
581         {NL}            {
582                         ++linenum;
583                         ACTION_ECHO;
584                         if ( bracelevel == 0 ||
585                              (doing_codeblock && indented_code) )
586                                 {
587                                 if ( doing_rule_action )
588                                         add_action( "\tYY_BREAK\n" );
589
590                                 doing_rule_action = doing_codeblock = false;
591                                 BEGIN(SECT2);
592                                 }
593                         }
594 }
595
596
597         /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
598 <ACTION>{
599         "{"             ACTION_ECHO; ++bracelevel;
600         "}"             ACTION_ECHO; --bracelevel;
601         [^[:alpha:]_{}"'/\n]+   ACTION_ECHO;
602         {NAME}          ACTION_ECHO;
603         "'"([^'\\\n]|\\.)*"'"   ACTION_ECHO; /* character constant */
604         \"              ACTION_ECHO; BEGIN(ACTION_STRING);
605         {NL}            {
606                         ++linenum;
607                         ACTION_ECHO;
608                         if ( bracelevel == 0 )
609                                 {
610                                 if ( doing_rule_action )
611                                         add_action( "\tYY_BREAK\n" );
612
613                                 doing_rule_action = false;
614                                 BEGIN(SECT2);
615                                 }
616                         }
617         .               ACTION_ECHO;
618 }
619
620 <ACTION_STRING>{
621         [^"\\\n]+       ACTION_ECHO;
622         \\.             ACTION_ECHO;
623         {NL}            ++linenum; ACTION_ECHO;
624         \"              ACTION_ECHO; BEGIN(ACTION);
625         .               ACTION_ECHO;
626 }
627
628 <COMMENT,ACTION,ACTION_STRING><<EOF>>   {
629                         synerr( _( "EOF encountered inside an action" ) );
630                         yyterminate();
631                         }
632
633
634 <SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ}      {
635                         yylval = myesc( (Char *) yytext );
636
637                         if ( YY_START == FIRSTCCL )
638                                 BEGIN(CCL);
639
640                         return CHAR;
641                         }
642
643
644 <SECT3>{
645         .*(\n?)         ECHO;
646         <<EOF>>         sectnum = 0; yyterminate();
647 }
648
649 <*>.|\n                 format_synerr( _( "bad character: %s" ), yytext );
650
651 %%
652
653
654 int yywrap()
655         {
656         if ( --num_input_files > 0 )
657                 {
658                 set_input_file( *++input_files );
659                 return 0;
660                 }
661
662         else
663                 return 1;
664         }
665
666
667 /* set_input_file - open the given file (if NULL, stdin) for scanning */
668
669 void set_input_file( file )
670 char *file;
671         {
672         if ( file && strcmp( file, "-" ) )
673                 {
674                 infilename = copy_string( file );
675                 yyin = fopen( infilename, "r" );
676
677                 if ( yyin == NULL )
678                         lerrsf( _( "can't open %s" ), file );
679                 }
680
681         else
682                 {
683                 yyin = stdin;
684                 infilename = copy_string( "<stdin>" );
685                 }
686
687         linenum = 1;
688         }
689
690
691 /* Wrapper routines for accessing the scanner's malloc routines. */
692
693 void *flex_alloc( size )
694 size_t size;
695         {
696         return (void *) malloc( size );
697         }
698
699 void *flex_realloc( ptr, size )
700 void *ptr;
701 size_t size;
702         {
703         return (void *) realloc( ptr, size );
704         }
705
706 void flex_free( ptr )
707 void *ptr;
708         {
709         if ( ptr )
710                 free( ptr );
711         }