Disconnect hostapd from building in base
[dragonfly.git] / contrib / awk / lex.c
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <ctype.h>
29 #include "awk.h"
30 #include "ytab.h"
31
32 extern YYSTYPE  yylval;
33 extern int      infunc;
34
35 int     lineno  = 1;
36 int     bracecnt = 0;
37 int     brackcnt  = 0;
38 int     parencnt = 0;
39
40 typedef struct Keyword {
41         const char *word;
42         int     sub;
43         int     type;
44 } Keyword;
45
46 Keyword keywords[] ={   /* keep sorted: binary searched */
47         { "BEGIN",      XBEGIN,         XBEGIN },
48         { "END",        XEND,           XEND },
49         { "NF",         VARNF,          VARNF },
50         { "atan2",      FATAN,          BLTIN },
51         { "break",      BREAK,          BREAK },
52         { "close",      CLOSE,          CLOSE },
53         { "continue",   CONTINUE,       CONTINUE },
54         { "cos",        FCOS,           BLTIN },
55         { "delete",     DELETE,         DELETE },
56         { "do",         DO,             DO },
57         { "else",       ELSE,           ELSE },
58         { "exit",       EXIT,           EXIT },
59         { "exp",        FEXP,           BLTIN },
60         { "fflush",     FFLUSH,         BLTIN },
61         { "for",        FOR,            FOR },
62         { "func",       FUNC,           FUNC },
63         { "function",   FUNC,           FUNC },
64         { "getline",    GETLINE,        GETLINE },
65         { "gsub",       GSUB,           GSUB },
66         { "if",         IF,             IF },
67         { "in",         IN,             IN },
68         { "index",      INDEX,          INDEX },
69         { "int",        FINT,           BLTIN },
70         { "length",     FLENGTH,        BLTIN },
71         { "log",        FLOG,           BLTIN },
72         { "match",      MATCHFCN,       MATCHFCN },
73         { "next",       NEXT,           NEXT },
74         { "nextfile",   NEXTFILE,       NEXTFILE },
75         { "print",      PRINT,          PRINT },
76         { "printf",     PRINTF,         PRINTF },
77         { "rand",       FRAND,          BLTIN },
78         { "return",     RETURN,         RETURN },
79         { "sin",        FSIN,           BLTIN },
80         { "split",      SPLIT,          SPLIT },
81         { "sprintf",    SPRINTF,        SPRINTF },
82         { "sqrt",       FSQRT,          BLTIN },
83         { "srand",      FSRAND,         BLTIN },
84         { "sub",        SUB,            SUB },
85         { "substr",     SUBSTR,         SUBSTR },
86         { "system",     FSYSTEM,        BLTIN },
87         { "tolower",    FTOLOWER,       BLTIN },
88         { "toupper",    FTOUPPER,       BLTIN },
89         { "while",      WHILE,          WHILE },
90 };
91
92 #define RET(x)  { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
93
94 int peek(void)
95 {
96         int c = input();
97         unput(c);
98         return c;
99 }
100
101 int gettok(char **pbuf, int *psz)       /* get next input token */
102 {
103         int c, retc;
104         char *buf = *pbuf;
105         int sz = *psz;
106         char *bp = buf;
107
108         c = input();
109         if (c == 0)
110                 return 0;
111         buf[0] = c;
112         buf[1] = 0;
113         if (!isalnum(c) && c != '.' && c != '_')
114                 return c;
115
116         *bp++ = c;
117         if (isalpha(c) || c == '_') {   /* it's a varname */
118                 for ( ; (c = input()) != 0; ) {
119                         if (bp-buf >= sz)
120                                 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
121                                         FATAL( "out of space for name %.10s...", buf );
122                         if (isalnum(c) || c == '_')
123                                 *bp++ = c;
124                         else {
125                                 *bp = 0;
126                                 unput(c);
127                                 break;
128                         }
129                 }
130                 *bp = 0;
131                 retc = 'a';     /* alphanumeric */
132         } else {        /* maybe it's a number, but could be . */
133                 char *rem;
134                 /* read input until can't be a number */
135                 for ( ; (c = input()) != 0; ) {
136                         if (bp-buf >= sz)
137                                 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
138                                         FATAL( "out of space for number %.10s...", buf );
139                         if (isdigit(c) || c == 'e' || c == 'E' 
140                           || c == '.' || c == '+' || c == '-')
141                                 *bp++ = c;
142                         else {
143                                 unput(c);
144                                 break;
145                         }
146                 }
147                 *bp = 0;
148                 strtod(buf, &rem);      /* parse the number */
149                 if (rem == buf) {       /* it wasn't a valid number at all */
150                         buf[1] = 0;     /* return one character as token */
151                         retc = buf[0];  /* character is its own type */
152                         unputstr(rem+1); /* put rest back for later */
153                 } else {        /* some prefix was a number */
154                         unputstr(rem);  /* put rest back for later */
155                         rem[0] = 0;     /* truncate buf after number part */
156                         retc = '0';     /* type is number */
157                 }
158         }
159         *pbuf = buf;
160         *psz = sz;
161         return retc;
162 }
163
164 int     word(char *);
165 int     string(void);
166 int     regexpr(void);
167 int     sc      = 0;    /* 1 => return a } right now */
168 int     reg     = 0;    /* 1 => return a REGEXPR now */
169
170 int yylex(void)
171 {
172         int c;
173         static char *buf = 0;
174         static int bufsize = 5; /* BUG: setting this small causes core dump! */
175
176         if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
177                 FATAL( "out of space in yylex" );
178         if (sc) {
179                 sc = 0;
180                 RET('}');
181         }
182         if (reg) {
183                 reg = 0;
184                 return regexpr();
185         }
186         for (;;) {
187                 c = gettok(&buf, &bufsize);
188                 if (c == 0)
189                         return 0;
190                 if (isalpha(c) || c == '_')
191                         return word(buf);
192                 if (isdigit(c)) {
193                         yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
194                         /* should this also have STR set? */
195                         RET(NUMBER);
196                 }
197         
198                 yylval.i = c;
199                 switch (c) {
200                 case '\n':      /* {EOL} */
201                         RET(NL);
202                 case '\r':      /* assume \n is coming */
203                 case ' ':       /* {WS}+ */
204                 case '\t':
205                         break;
206                 case '#':       /* #.* strip comments */
207                         while ((c = input()) != '\n' && c != 0)
208                                 ;
209                         unput(c);
210                         break;
211                 case ';':
212                         RET(';');
213                 case '\\':
214                         if (peek() == '\n') {
215                                 input();
216                         } else if (peek() == '\r') {
217                                 input(); input();       /* \n */
218                                 lineno++;
219                         } else {
220                                 RET(c);
221                         }
222                         break;
223                 case '&':
224                         if (peek() == '&') {
225                                 input(); RET(AND);
226                         } else 
227                                 RET('&');
228                 case '|':
229                         if (peek() == '|') {
230                                 input(); RET(BOR);
231                         } else
232                                 RET('|');
233                 case '!':
234                         if (peek() == '=') {
235                                 input(); yylval.i = NE; RET(NE);
236                         } else if (peek() == '~') {
237                                 input(); yylval.i = NOTMATCH; RET(MATCHOP);
238                         } else
239                                 RET(NOT);
240                 case '~':
241                         yylval.i = MATCH;
242                         RET(MATCHOP);
243                 case '<':
244                         if (peek() == '=') {
245                                 input(); yylval.i = LE; RET(LE);
246                         } else {
247                                 yylval.i = LT; RET(LT);
248                         }
249                 case '=':
250                         if (peek() == '=') {
251                                 input(); yylval.i = EQ; RET(EQ);
252                         } else {
253                                 yylval.i = ASSIGN; RET(ASGNOP);
254                         }
255                 case '>':
256                         if (peek() == '=') {
257                                 input(); yylval.i = GE; RET(GE);
258                         } else if (peek() == '>') {
259                                 input(); yylval.i = APPEND; RET(APPEND);
260                         } else {
261                                 yylval.i = GT; RET(GT);
262                         }
263                 case '+':
264                         if (peek() == '+') {
265                                 input(); yylval.i = INCR; RET(INCR);
266                         } else if (peek() == '=') {
267                                 input(); yylval.i = ADDEQ; RET(ASGNOP);
268                         } else
269                                 RET('+');
270                 case '-':
271                         if (peek() == '-') {
272                                 input(); yylval.i = DECR; RET(DECR);
273                         } else if (peek() == '=') {
274                                 input(); yylval.i = SUBEQ; RET(ASGNOP);
275                         } else
276                                 RET('-');
277                 case '*':
278                         if (peek() == '=') {    /* *= */
279                                 input(); yylval.i = MULTEQ; RET(ASGNOP);
280                         } else if (peek() == '*') {     /* ** or **= */
281                                 input();        /* eat 2nd * */
282                                 if (peek() == '=') {
283                                         input(); yylval.i = POWEQ; RET(ASGNOP);
284                                 } else {
285                                         RET(POWER);
286                                 }
287                         } else
288                                 RET('*');
289                 case '/':
290                         RET('/');
291                 case '%':
292                         if (peek() == '=') {
293                                 input(); yylval.i = MODEQ; RET(ASGNOP);
294                         } else
295                                 RET('%');
296                 case '^':
297                         if (peek() == '=') {
298                                 input(); yylval.i = POWEQ; RET(ASGNOP);
299                         } else
300                                 RET(POWER);
301
302                 case '$':
303                         /* BUG: awkward, if not wrong */
304                         c = gettok(&buf, &bufsize);
305                         if (isalpha(c)) {
306                                 if (strcmp(buf, "NF") == 0) {   /* very special */
307                                         unputstr("(NF)");
308                                         RET(INDIRECT);
309                                 }
310                                 c = peek();
311                                 if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
312                                         unputstr(buf);
313                                         RET(INDIRECT);
314                                 }
315                                 yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
316                                 RET(IVAR);
317                         } else if (c == 0) {    /*  */
318                                 SYNTAX( "unexpected end of input after $" );
319                                 RET(';');
320                         } else {
321                                 unputstr(buf);
322                                 RET(INDIRECT);
323                         }
324         
325                 case '}':
326                         if (--bracecnt < 0)
327                                 SYNTAX( "extra }" );
328                         sc = 1;
329                         RET(';');
330                 case ']':
331                         if (--brackcnt < 0)
332                                 SYNTAX( "extra ]" );
333                         RET(']');
334                 case ')':
335                         if (--parencnt < 0)
336                                 SYNTAX( "extra )" );
337                         RET(')');
338                 case '{':
339                         bracecnt++;
340                         RET('{');
341                 case '[':
342                         brackcnt++;
343                         RET('[');
344                 case '(':
345                         parencnt++;
346                         RET('(');
347         
348                 case '"':
349                         return string();        /* BUG: should be like tran.c ? */
350         
351                 default:
352                         RET(c);
353                 }
354         }
355 }
356
357 int string(void)
358 {
359         int c, n;
360         char *s, *bp;
361         static char *buf = 0;
362         static int bufsz = 500;
363
364         if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
365                 FATAL("out of space for strings");
366         for (bp = buf; (c = input()) != '"'; ) {
367                 if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
368                         FATAL("out of space for string %.10s...", buf);
369                 switch (c) {
370                 case '\n':
371                 case '\r':
372                 case 0:
373                         SYNTAX( "non-terminated string %.10s...", buf );
374                         lineno++;
375                         if (c == 0)     /* hopeless */
376                                 FATAL( "giving up" );
377                         break;
378                 case '\\':
379                         c = input();
380                         switch (c) {
381                         case '"': *bp++ = '"'; break;
382                         case 'n': *bp++ = '\n'; break;  
383                         case 't': *bp++ = '\t'; break;
384                         case 'f': *bp++ = '\f'; break;
385                         case 'r': *bp++ = '\r'; break;
386                         case 'b': *bp++ = '\b'; break;
387                         case 'v': *bp++ = '\v'; break;
388                         case 'a': *bp++ = '\007'; break;
389                         case '\\': *bp++ = '\\'; break;
390
391                         case '0': case '1': case '2': /* octal: \d \dd \ddd */
392                         case '3': case '4': case '5': case '6': case '7':
393                                 n = c - '0';
394                                 if ((c = peek()) >= '0' && c < '8') {
395                                         n = 8 * n + input() - '0';
396                                         if ((c = peek()) >= '0' && c < '8')
397                                                 n = 8 * n + input() - '0';
398                                 }
399                                 *bp++ = n;
400                                 break;
401
402                         case 'x':       /* hex  \x0-9a-fA-F + */
403                             {   char xbuf[100], *px;
404                                 for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
405                                         if (isdigit(c)
406                                          || (c >= 'a' && c <= 'f')
407                                          || (c >= 'A' && c <= 'F'))
408                                                 *px++ = c;
409                                         else
410                                                 break;
411                                 }
412                                 *px = 0;
413                                 unput(c);
414                                 sscanf(xbuf, "%x", (unsigned int *) &n);
415                                 *bp++ = n;
416                                 break;
417                             }
418
419                         default: 
420                                 *bp++ = c;
421                                 break;
422                         }
423                         break;
424                 default:
425                         *bp++ = c;
426                         break;
427                 }
428         }
429         *bp = 0; 
430         s = tostring(buf);
431         *bp++ = ' '; *bp++ = 0;
432         yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
433         RET(STRING);
434 }
435
436
437 int binsearch(char *w, Keyword *kp, int n)
438 {
439         int cond, low, mid, high;
440
441         low = 0;
442         high = n - 1;
443         while (low <= high) {
444                 mid = (low + high) / 2;
445                 if ((cond = strcmp(w, kp[mid].word)) < 0)
446                         high = mid - 1;
447                 else if (cond > 0)
448                         low = mid + 1;
449                 else
450                         return mid;
451         }
452         return -1;
453 }
454
455 int word(char *w) 
456 {
457         Keyword *kp;
458         int c, n;
459
460         n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
461 /* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */
462         kp = keywords + n;
463         if (n != -1) {  /* found in table */
464                 yylval.i = kp->sub;
465                 switch (kp->type) {     /* special handling */
466                 case BLTIN:
467                         if (kp->sub == FSYSTEM && safe)
468                                 SYNTAX( "system is unsafe" );
469                         RET(kp->type);
470                 case FUNC:
471                         if (infunc)
472                                 SYNTAX( "illegal nested function" );
473                         RET(kp->type);
474                 case RETURN:
475                         if (!infunc)
476                                 SYNTAX( "return not in function" );
477                         RET(kp->type);
478                 case VARNF:
479                         yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
480                         RET(VARNF);
481                 default:
482                         RET(kp->type);
483                 }
484         }
485         c = peek();     /* look for '(' */
486         if (c != '(' && infunc && (n=isarg(w)) >= 0) {
487                 yylval.i = n;
488                 RET(ARG);
489         } else {
490                 yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
491                 if (c == '(') {
492                         RET(CALL);
493                 } else {
494                         RET(VAR);
495                 }
496         }
497 }
498
499 void startreg(void)     /* next call to yylex will return a regular expression */
500 {
501         reg = 1;
502 }
503
504 int regexpr(void)
505 {
506         int c;
507         static char *buf = 0;
508         static int bufsz = 500;
509         char *bp;
510
511         if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
512                 FATAL("out of space for rex expr");
513         bp = buf;
514         for ( ; (c = input()) != '/' && c != 0; ) {
515                 if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
516                         FATAL("out of space for reg expr %.10s...", buf);
517                 if (c == '\n') {
518                         SYNTAX( "newline in regular expression %.10s...", buf ); 
519                         unput('\n');
520                         break;
521                 } else if (c == '\\') {
522                         *bp++ = '\\'; 
523                         *bp++ = input();
524                 } else {
525                         *bp++ = c;
526                 }
527         }
528         *bp = 0;
529         if (c == 0)
530                 SYNTAX("non-terminated regular expression %.10s...", buf);
531         yylval.s = tostring(buf);
532         unput('/');
533         RET(REGEXPR);
534 }
535
536 /* low-level lexical stuff, sort of inherited from lex */
537
538 char    ebuf[300];
539 char    *ep = ebuf;
540 char    yysbuf[100];    /* pushback buffer */
541 char    *yysptr = yysbuf;
542 FILE    *yyin = 0;
543
544 int input(void) /* get next lexical input character */
545 {
546         int c;
547         extern char *lexprog;
548
549         if (yysptr > yysbuf)
550                 c = (uschar)*--yysptr;
551         else if (lexprog != NULL) {     /* awk '...' */
552                 if ((c = (uschar)*lexprog) != 0)
553                         lexprog++;
554         } else                          /* awk -f ... */
555                 c = pgetc();
556         if (c == '\n')
557                 lineno++;
558         else if (c == EOF)
559                 c = 0;
560         if (ep >= ebuf + sizeof ebuf)
561                 ep = ebuf;
562         return *ep++ = c;
563 }
564
565 void unput(int c)       /* put lexical character back on input */
566 {
567         if (c == '\n')
568                 lineno--;
569         if (yysptr >= yysbuf + sizeof(yysbuf))
570                 FATAL("pushed back too much: %.20s...", yysbuf);
571         *yysptr++ = c;
572         if (--ep < ebuf)
573                 ep = ebuf + sizeof(ebuf) - 1;
574 }
575
576 void unputstr(const char *s)    /* put a string back on input */
577 {
578         int i;
579
580         for (i = strlen(s)-1; i >= 0; i--)
581                 unput(s[i]);
582 }