61bbe4b18291ecc8e5cf7ead9fc5b30e28a070d3
[dragonfly.git] / usr.bin / lex / scan.l
1 /* scan.l - scanner for flex input */
2
3 %{
4 /*-
5  * Copyright (c) 1990 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Vern Paxson.
10  * 
11  * The United States Government has rights in this work pursuant
12  * to contract no. DE-AC03-76SF00098 between the United States
13  * Department of Energy and the University of California.
14  *
15  * Redistribution and use in source and binary forms are permitted provided
16  * that: (1) source distributions retain this entire copyright notice and
17  * comment, and (2) distributions including binaries display the following
18  * acknowledgement:  ``This product includes software developed by the
19  * University of California, Berkeley and its contributors'' in the
20  * documentation or other materials provided with the distribution and in
21  * all advertising materials mentioning features or use of this software.
22  * Neither the name of the University nor the names of its contributors may
23  * be used to endorse or promote products derived from this software without
24  * specific prior written permission.
25  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
26  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
27  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
28  */
29
30 /* $Header: /home/daffy/u0/vern/flex/RCS/scan.l,v 2.56 95/04/24 12:17:19 vern Exp $ */
31 /* $FreeBSD: src/usr.bin/lex/scan.l,v 1.5 1999/10/27 07:56:46 obrien Exp $ */
32 /* $DragonFly: src/usr.bin/lex/scan.l,v 1.3 2008/06/21 19:29:21 swildner Exp $ */
33
34 #include "flexdef.h"
35 #include "parse.h"
36
37 #define ACTION_ECHO add_action( yytext )
38 #define ACTION_IFDEF(def, should_define) \
39         { \
40         if ( should_define ) \
41                 action_define( def, 1 ); \
42         }
43
44 #define MARK_END_OF_PROLOG mark_prolog();
45
46 #define YY_DECL \
47         int flexscan( void )
48
49 #define RETURNCHAR \
50         yylval = (unsigned char) yytext[0]; \
51         return CHAR;
52
53 #define RETURNNAME \
54         strcpy( nmstr, yytext ); \
55         return NAME;
56
57 #define PUT_BACK_STRING(str, start) \
58         for ( i = strlen( str ) - 1; i >= start; --i ) \
59                 unput((str)[i])
60
61 #define CHECK_REJECT(str) \
62         if ( all_upper( str ) ) \
63                 reject = true;
64
65 #define CHECK_YYMORE(str) \
66         if ( all_lower( str ) ) \
67                 yymore_used = true;
68 %}
69
70 %option caseless nodefault outfile="scan.c" stack noyy_top_state
71 %option nostdinit
72
73 %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
74 %x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION
75 %x OPTION LINEDIR
76
77 WS              [[:blank:]]+
78 OPTWS           [[:blank:]]*
79 NOT_WS          [^[:blank:]\n]
80
81 NL              \r?\n
82
83 NAME            ([[:alpha:]_][[:alnum:]_-]*)
84 NOT_NAME        [^[:alpha:]_*\n]+
85
86 SCNAME          {NAME}
87
88 ESCSEQ          (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))
89
90 FIRST_CCL_CHAR  ([^\\\n]|{ESCSEQ})
91 CCL_CHAR        ([^\\\n\]]|{ESCSEQ})
92 CCL_EXPR        ("[:"[[:alpha:]]+":]")
93
94 LEXOPT          [aceknopr]
95
96 %%
97         static int bracelevel, didadef, indented_code;
98         static int doing_rule_action = false;
99         static int option_sense;
100
101         int doing_codeblock = false;
102         int i;
103         Char nmdef[MAXLINE], myesc();
104
105
106 <INITIAL>{
107         ^{WS}           indented_code = true; BEGIN(CODEBLOCK);
108         ^"/*"           ACTION_ECHO; yy_push_state( COMMENT );
109         ^#{OPTWS}line{WS}       yy_push_state( LINEDIR );
110         ^"%s"{NAME}?    return SCDECL;
111         ^"%x"{NAME}?    return XSCDECL;
112         ^"%{".*{NL}     {
113                         ++linenum;
114                         line_directive_out( (FILE *) 0, 1 );
115                         indented_code = false;
116                         BEGIN(CODEBLOCK);
117                         }
118
119         {WS}            /* discard */
120
121         ^"%%".*         {
122                         sectnum = 2;
123                         bracelevel = 0;
124                         mark_defs1();
125                         line_directive_out( (FILE *) 0, 1 );
126                         BEGIN(SECT2PROLOG);
127                         return SECTEND;
128                         }
129
130         ^"%pointer".*{NL}       yytext_is_array = false; ++linenum;
131         ^"%array".*{NL}         yytext_is_array = true; ++linenum;
132
133         ^"%option"      BEGIN(OPTION); return OPTION_OP;
134
135         ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL}      ++linenum; /* ignore */
136         ^"%"{LEXOPT}{WS}.*{NL}  ++linenum;      /* ignore */
137
138         ^"%"[^sxaceknopr{}].*   synerr( _( "unrecognized '%' directive" ) );
139
140         ^{NAME}         {
141                         strcpy( nmstr, yytext );
142                         didadef = false;
143                         BEGIN(PICKUPDEF);
144                         }
145
146         {SCNAME}        RETURNNAME;
147         ^{OPTWS}{NL}    ++linenum; /* allows blank lines in section 1 */
148         {OPTWS}{NL}     ACTION_ECHO; ++linenum; /* maybe end of comment line */
149 }
150
151
152 <COMMENT>{
153         "*/"            ACTION_ECHO; yy_pop_state();
154         "*"             ACTION_ECHO;
155         [^*\n]+         ACTION_ECHO;
156         [^*\n]*{NL}     ++linenum; ACTION_ECHO;
157 }
158
159 <LINEDIR>{
160         \n              yy_pop_state();
161         [[:digit:]]+    linenum = myctoi( yytext );
162
163         \"[^"\n]*\"     {
164                         flex_free( (void *) infilename );
165                         infilename = copy_string( yytext + 1 );
166                         infilename[strlen( infilename ) - 1] = '\0';
167                         }
168         .               /* ignore spurious characters */
169 }
170
171 <CODEBLOCK>{
172         ^"%}".*{NL}     ++linenum; BEGIN(INITIAL);
173
174         {NAME}|{NOT_NAME}|.     ACTION_ECHO;
175
176         {NL}            {
177                         ++linenum;
178                         ACTION_ECHO;
179                         if ( indented_code )
180                                 BEGIN(INITIAL);
181                         }
182 }
183
184
185 <PICKUPDEF>{
186         {WS}            /* separates name and definition */
187
188         {NOT_WS}.*      {
189                         strcpy( (char *) nmdef, yytext );
190
191                         /* Skip trailing whitespace. */
192                         for ( i = strlen( (char *) nmdef ) - 1;
193                               i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t');
194                               --i )
195                                 ;
196
197                         nmdef[i + 1] = '\0';
198
199                         ndinstal( nmstr, nmdef );
200                         didadef = true;
201                         }
202
203         {NL}            {
204                         if ( ! didadef )
205                                 synerr( _( "incomplete name definition" ) );
206                         BEGIN(INITIAL);
207                         ++linenum;
208                         }
209 }
210
211
212 <OPTION>{
213         {NL}            ++linenum; BEGIN(INITIAL);
214         {WS}            option_sense = true;
215
216         "="             return '=';
217
218         no              option_sense = ! option_sense;
219
220         7bit            csize = option_sense ? 128 : 256;
221         8bit            csize = option_sense ? 256 : 128;
222
223         align           long_align = option_sense;
224         always-interactive      {
225                         action_define( "YY_ALWAYS_INTERACTIVE", option_sense );
226                         }
227         array           yytext_is_array = option_sense;
228         backup          backing_up_report = option_sense;
229         batch           interactive = ! option_sense;
230         "c++"           C_plus_plus = option_sense;
231         caseful|case-sensitive          caseins = ! option_sense;
232         caseless|case-insensitive       caseins = option_sense;
233         debug           ddebug = option_sense;
234         default         spprdflt = ! option_sense;
235         ecs             useecs = option_sense;
236         fast            {
237                         useecs = usemecs = false;
238                         use_read = fullspd = true;
239                         }
240         full            {
241                         useecs = usemecs = false;
242                         use_read = fulltbl = true;
243                         }
244         input           ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
245         interactive     interactive = option_sense;
246         lex-compat      lex_compat = option_sense;
247         main            {
248                         action_define( "YY_MAIN", option_sense );
249                         do_yywrap = ! option_sense;
250                         }
251         meta-ecs        usemecs = option_sense;
252         never-interactive       {
253                         action_define( "YY_NEVER_INTERACTIVE", option_sense );
254                         }
255         perf-report     performance_report += option_sense ? 1 : -1;
256         pointer         yytext_is_array = ! option_sense;
257         read            use_read = option_sense;
258         reject          reject_really_used = option_sense;
259         stack           action_define( "YY_STACK_USED", option_sense );
260         stdinit         do_stdinit = option_sense;
261         stdout          use_stdout = option_sense;
262         unput           ACTION_IFDEF("YY_NO_UNPUT", ! option_sense);
263         verbose         printstats = option_sense;
264         warn            nowarn = ! option_sense;
265         yylineno        do_yylineno = option_sense;
266         yymore          yymore_really_used = option_sense;
267         yywrap          do_yywrap = option_sense;
268
269         yy_push_state   ACTION_IFDEF("YY_NO_PUSH_STATE", ! option_sense);
270         yy_pop_state    ACTION_IFDEF("YY_NO_POP_STATE", ! option_sense);
271         yy_top_state    ACTION_IFDEF("YY_NO_TOP_STATE", ! option_sense);
272
273         yy_scan_buffer  ACTION_IFDEF("YY_NO_SCAN_BUFFER", ! option_sense);
274         yy_scan_bytes   ACTION_IFDEF("YY_NO_SCAN_BYTES", ! option_sense);
275         yy_scan_string  ACTION_IFDEF("YY_NO_SCAN_STRING", ! option_sense);
276
277         outfile         return OPT_OUTFILE;
278         prefix          return OPT_PREFIX;
279         yyclass         return OPT_YYCLASS;
280
281         \"[^"\n]*\"     {
282                         strcpy( nmstr, yytext + 1 );
283                         nmstr[strlen( nmstr ) - 1] = '\0';
284                         return NAME;
285                         }
286
287         (([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. {
288                         format_synerr( _( "unrecognized %%option: %s" ),
289                                 yytext );
290                         BEGIN(RECOVER);
291                         }
292 }
293
294 <RECOVER>.*{NL}         ++linenum; BEGIN(INITIAL);
295
296
297 <SECT2PROLOG>{
298         ^"%{".* ++bracelevel; yyless( 2 );      /* eat only %{ */
299         ^"%}".* --bracelevel; yyless( 2 );      /* eat only %} */
300
301         ^{WS}.* ACTION_ECHO;    /* indented code in prolog */
302
303         ^{NOT_WS}.*     {       /* non-indented code */
304                         if ( bracelevel <= 0 )
305                                 { /* not in %{ ... %} */
306                                 yyless( 0 );    /* put it all back */
307                                 yy_set_bol( 1 );
308                                 mark_prolog();
309                                 BEGIN(SECT2);
310                                 }
311                         else
312                                 ACTION_ECHO;
313                         }
314
315         .*              ACTION_ECHO;
316         {NL}    ++linenum; ACTION_ECHO;
317
318         <<EOF>>         {
319                         mark_prolog();
320                         sectnum = 0;
321                         yyterminate(); /* to stop the parser */
322                         }
323 }
324
325 <SECT2>{
326         ^{OPTWS}{NL}    ++linenum; /* allow blank lines in section 2 */
327
328         ^{OPTWS}"%{"    {
329                         indented_code = false;
330                         doing_codeblock = true;
331                         bracelevel = 1;
332                         BEGIN(PERCENT_BRACE_ACTION);
333                         }
334
335         ^{OPTWS}"<"     BEGIN(SC); return '<';
336         ^{OPTWS}"^"     return '^';
337         \"              BEGIN(QUOTE); return '"';
338         "{"/[[:digit:]] BEGIN(NUM); return '{';
339         "$"/([[:blank:]]|{NL})  return '$';
340
341         {WS}"%{"                {
342                         bracelevel = 1;
343                         BEGIN(PERCENT_BRACE_ACTION);
344
345                         if ( in_rule )
346                                 {
347                                 doing_rule_action = true;
348                                 in_rule = false;
349                                 return '\n';
350                                 }
351                         }
352         {WS}"|".*{NL}   continued_action = true; ++linenum; return '\n';
353
354         ^{WS}"/*"       {
355                         yyless( yyleng - 2 );   /* put back '/', '*' */
356                         bracelevel = 0;
357                         continued_action = false;
358                         BEGIN(ACTION);
359                         }
360
361         ^{WS}           /* allow indented rules */
362
363         {WS}            {
364                         /* This rule is separate from the one below because
365                          * otherwise we get variable trailing context, so
366                          * we can't build the scanner using -{f,F}.
367                          */
368                         bracelevel = 0;
369                         continued_action = false;
370                         BEGIN(ACTION);
371
372                         if ( in_rule )
373                                 {
374                                 doing_rule_action = true;
375                                 in_rule = false;
376                                 return '\n';
377                                 }
378                         }
379
380         {OPTWS}{NL}     {
381                         bracelevel = 0;
382                         continued_action = false;
383                         BEGIN(ACTION);
384                         unput( '\n' );  /* so <ACTION> sees it */
385
386                         if ( in_rule )
387                                 {
388                                 doing_rule_action = true;
389                                 in_rule = false;
390                                 return '\n';
391                                 }
392                         }
393
394         ^{OPTWS}"<<EOF>>"       |
395         "<<EOF>>"       return EOF_OP;
396
397         ^"%%".*         {
398                         sectnum = 3;
399                         BEGIN(SECT3);
400                         yyterminate(); /* to stop the parser */
401                         }
402
403         "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})*        {
404                         int cclval;
405
406                         strcpy( nmstr, yytext );
407
408                         /* Check to see if we've already encountered this
409                          * ccl.
410                          */
411                         if ( (cclval = ccllookup( (Char *) nmstr )) != 0 )
412                                 {
413                                 if ( input() != ']' )
414                                         synerr( _( "bad character class" ) );
415
416                                 yylval = cclval;
417                                 ++cclreuse;
418                                 return PREVCCL;
419                                 }
420                         else
421                                 {
422                                 /* We fudge a bit.  We know that this ccl will
423                                  * soon be numbered as lastccl + 1 by cclinit.
424                                  */
425                                 cclinstal( (Char *) nmstr, lastccl + 1 );
426
427                                 /* Push back everything but the leading bracket
428                                  * so the ccl can be rescanned.
429                                  */
430                                 yyless( 1 );
431
432                                 BEGIN(FIRSTCCL);
433                                 return '[';
434                                 }
435                         }
436
437         "{"{NAME}"}"    {
438                         register Char *nmdefptr;
439                         Char *ndlookup();
440
441                         strcpy( nmstr, yytext + 1 );
442                         nmstr[yyleng - 2] = '\0';  /* chop trailing brace */
443
444                         if ( (nmdefptr = ndlookup( nmstr )) == 0 )
445                                 format_synerr(
446                                         _( "undefined definition {%s}" ),
447                                                 nmstr );
448
449                         else
450                                 { /* push back name surrounded by ()'s */
451                                 int len = strlen( (char *) nmdefptr );
452
453                                 if ( lex_compat || nmdefptr[0] == '^' ||
454                                      (len > 0 && nmdefptr[len - 1] == '$') )
455                                         { /* don't use ()'s after all */
456                                         PUT_BACK_STRING((char *) nmdefptr, 0);
457
458                                         if ( nmdefptr[0] == '^' )
459                                                 BEGIN(CARETISBOL);
460                                         }
461
462                                 else
463                                         {
464                                         unput(')');
465                                         PUT_BACK_STRING((char *) nmdefptr, 0);
466                                         unput('(');
467                                         }
468                                 }
469                         }
470
471         [/|*+?.(){}]    return (unsigned char) yytext[0];
472         .               RETURNCHAR;
473 }
474
475
476 <SC>{
477         [,*]            return (unsigned char) yytext[0];
478         ">"             BEGIN(SECT2); return '>';
479         ">"/^           BEGIN(CARETISBOL); return '>';
480         {SCNAME}        RETURNNAME;
481         .               {
482                         format_synerr( _( "bad <start condition>: %s" ),
483                                 yytext );
484                         }
485 }
486
487 <CARETISBOL>"^"         BEGIN(SECT2); return '^';
488
489
490 <QUOTE>{
491         [^"\n]          RETURNCHAR;
492         \"              BEGIN(SECT2); return '"';
493
494         {NL}            {
495                         synerr( _( "missing quote" ) );
496                         BEGIN(SECT2);
497                         ++linenum;
498                         return '"';
499                         }
500 }
501
502
503 <FIRSTCCL>{
504         "^"/[^-\]\n]    BEGIN(CCL); return '^';
505         "^"/("-"|"]")   return '^';
506         .               BEGIN(CCL); RETURNCHAR;
507 }
508
509 <CCL>{
510         -/[^\]\n]       return '-';
511         [^\]\n]         RETURNCHAR;
512         "]"             BEGIN(SECT2); return ']';
513         .|{NL}          {
514                         synerr( _( "bad character class" ) );
515                         BEGIN(SECT2);
516                         return ']';
517                         }
518 }
519
520 <FIRSTCCL,CCL>{
521         "[:alnum:]"     BEGIN(CCL); return CCE_ALNUM;
522         "[:alpha:]"     BEGIN(CCL); return CCE_ALPHA;
523         "[:blank:]"     BEGIN(CCL); return CCE_BLANK;
524         "[:cntrl:]"     BEGIN(CCL); return CCE_CNTRL;
525         "[:digit:]"     BEGIN(CCL); return CCE_DIGIT;
526         "[:graph:]"     BEGIN(CCL); return CCE_GRAPH;
527         "[:lower:]"     BEGIN(CCL); return CCE_LOWER;
528         "[:print:]"     BEGIN(CCL); return CCE_PRINT;
529         "[:punct:]"     BEGIN(CCL); return CCE_PUNCT;
530         "[:space:]"     BEGIN(CCL); return CCE_SPACE;
531         "[:upper:]"     BEGIN(CCL); return CCE_UPPER;
532         "[:xdigit:]"    BEGIN(CCL); return CCE_XDIGIT;
533         {CCL_EXPR}      {
534                         format_synerr(
535                                 _( "bad character class expression: %s" ),
536                                         yytext );
537                         BEGIN(CCL); return CCE_ALNUM;
538                         }
539 }
540
541 <NUM>{
542         [[:digit:]]+    {
543                         yylval = myctoi( yytext );
544                         return NUMBER;
545                         }
546
547         ","             return ',';
548         "}"             BEGIN(SECT2); return '}';
549
550         .               {
551                         synerr( _( "bad character inside {}'s" ) );
552                         BEGIN(SECT2);
553                         return '}';
554                         }
555
556         {NL}            {
557                         synerr( _( "missing }" ) );
558                         BEGIN(SECT2);
559                         ++linenum;
560                         return '}';
561                         }
562 }
563
564
565 <PERCENT_BRACE_ACTION>{
566         {OPTWS}"%}".*           bracelevel = 0;
567
568         <ACTION>"/*"            ACTION_ECHO; yy_push_state( COMMENT );
569
570         <CODEBLOCK,ACTION>{
571                 "reject"        {
572                         ACTION_ECHO;
573                         CHECK_REJECT(yytext);
574                         }
575                 "yymore"        {
576                         ACTION_ECHO;
577                         CHECK_YYMORE(yytext);
578                         }
579         }
580
581         {NAME}|{NOT_NAME}|.     ACTION_ECHO;
582         {NL}            {
583                         ++linenum;
584                         ACTION_ECHO;
585                         if ( bracelevel == 0 ||
586                              (doing_codeblock && indented_code) )
587                                 {
588                                 if ( doing_rule_action )
589                                         add_action( "\tYY_BREAK\n" );
590
591                                 doing_rule_action = doing_codeblock = false;
592                                 BEGIN(SECT2);
593                                 }
594                         }
595 }
596
597
598         /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
599 <ACTION>{
600         "{"             ACTION_ECHO; ++bracelevel;
601         "}"             ACTION_ECHO; --bracelevel;
602         [^[:alpha:]_{}"'/\n]+   ACTION_ECHO;
603         {NAME}          ACTION_ECHO;
604         "'"([^'\\\n]|\\.)*"'"   ACTION_ECHO; /* character constant */
605         \"              ACTION_ECHO; BEGIN(ACTION_STRING);
606         {NL}            {
607                         ++linenum;
608                         ACTION_ECHO;
609                         if ( bracelevel == 0 )
610                                 {
611                                 if ( doing_rule_action )
612                                         add_action( "\tYY_BREAK\n" );
613
614                                 doing_rule_action = false;
615                                 BEGIN(SECT2);
616                                 }
617                         }
618         .               ACTION_ECHO;
619 }
620
621 <ACTION_STRING>{
622         [^"\\\n]+       ACTION_ECHO;
623         \\.             ACTION_ECHO;
624         {NL}            ++linenum; ACTION_ECHO;
625         \"              ACTION_ECHO; BEGIN(ACTION);
626         .               ACTION_ECHO;
627 }
628
629 <COMMENT,ACTION,ACTION_STRING><<EOF>>   {
630                         synerr( _( "EOF encountered inside an action" ) );
631                         yyterminate();
632                         }
633
634
635 <SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ}      {
636                         yylval = myesc( (Char *) yytext );
637
638                         if ( YY_START == FIRSTCCL )
639                                 BEGIN(CCL);
640
641                         return CHAR;
642                         }
643
644
645 <SECT3>{
646         .*(\n?)         ECHO;
647         <<EOF>>         sectnum = 0; yyterminate();
648 }
649
650 <*>.|\n                 format_synerr( _( "bad character: %s" ), yytext );
651
652 %%
653
654
655 int yywrap( void )
656         {
657         if ( --num_input_files > 0 )
658                 {
659                 set_input_file( *++input_files );
660                 return 0;
661                 }
662
663         else
664                 return 1;
665         }
666
667
668 /* set_input_file - open the given file (if NULL, stdin) for scanning */
669
670 void set_input_file( char *file )
671         {
672         if ( file && strcmp( file, "-" ) )
673                 {
674                 infilename = copy_string( file );
675                 yyin = fopen( infilename, "r" );
676
677                 if ( yyin == NULL )
678                         lerrsf( _( "can't open %s" ), file );
679                 }
680
681         else
682                 {
683                 yyin = stdin;
684                 infilename = copy_string( "<stdin>" );
685                 }
686
687         linenum = 1;
688         }
689
690
691 /* Wrapper routines for accessing the scanner's malloc routines. */
692
693 void *flex_alloc( size_t size )
694         {
695         return (void *) malloc( size );
696         }
697
698 void *flex_realloc( void *ptr, size_t size )
699         {
700         return (void *) realloc( ptr, size );
701         }
702
703 void flex_free( void *ptr )
704         {
705         if ( ptr )
706                 free( ptr );
707         }