Merge from vendor branch TNFTP:
[dragonfly.git] / usr.bin / window / scanner.c
1 /*
2  * Copyright (c) 1983, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Edward Wang at The University of California, Berkeley.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by the University of
19  *      California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  * @(#)scanner.c        8.1 (Berkeley) 6/6/93
37  * $FreeBSD: src/usr.bin/window/scanner.c,v 1.2.14.2 2001/05/17 09:46:29 obrien Exp $
38  * $DragonFly: src/usr.bin/window/scanner.c,v 1.2 2003/06/17 04:29:34 dillon Exp $
39  */
40
41 #include <ctype.h>
42 #include "value.h"
43 #include "token.h"
44 #include "context.h"
45 #include "string.h"
46 #include "mystring.h"
47
48 s_getc()
49 {
50         register c;
51
52         switch (cx.x_type) {
53         case X_FILE:
54                 c = getc(cx.x_fp);
55                 if (cx.x_bol && c != EOF) {
56                         cx.x_bol = 0;
57                         cx.x_lineno++;
58                 }
59                 if (c == '\n')
60                         cx.x_bol = 1;
61                 return c;
62         case X_BUF:
63                 if (*cx.x_bufp != 0)
64                         return *cx.x_bufp++ & 0xff;
65                 else
66                         return EOF;
67         }
68         /*NOTREACHED*/
69 }
70
71 s_ungetc(c)
72 {
73         if (c == EOF)
74                 return EOF;
75         switch (cx.x_type) {
76         case X_FILE:
77                 cx.x_bol = 0;
78                 return ungetc(c, cx.x_fp);
79         case X_BUF:
80                 if (cx.x_bufp > cx.x_buf)
81                         return *--cx.x_bufp = c;
82                 else
83                         return EOF;
84         }
85         /*NOTREACHED*/
86 }
87
88 s_gettok()
89 {
90         char buf[100];
91         register char *p = buf;
92         register c;
93         register state = 0;
94
95 loop:
96         c = s_getc();
97         switch (state) {
98         case 0:
99                 switch (c) {
100                 case ' ':
101                 case '\t':
102                         break;
103                 case '\n':
104                 case ';':
105                         cx.x_token = T_EOL;
106                         state = -1;
107                         break;
108                 case '#':
109                         state = 1;
110                         break;
111                 case EOF:
112                         cx.x_token = T_EOF;
113                         state = -1;
114                         break;
115                 case '"':
116                         state = 3;
117                         break;
118                 case '\'':
119                         state = 4;
120                         break;
121                 case '\\':
122                         switch (c = s_gettok1()) {
123                         case -1:
124                                 break;
125                         case -2:
126                                 state = 0;
127                                 break;
128                         default:
129                                 *p++ = c;
130                                 state = 2;
131                         }
132                         break;
133                 case '0':
134                         cx.x_val.v_num = 0;
135                         state = 10;
136                         break;
137                 case '1': case '2': case '3': case '4':
138                 case '5': case '6': case '7': case '8': case '9':
139                         cx.x_val.v_num = c - '0';
140                         state = 11;
141                         break;
142                 case '>':
143                         state = 20;
144                         break;
145                 case '<':
146                         state = 21;
147                         break;
148                 case '=':
149                         state = 22;
150                         break;
151                 case '!':
152                         state = 23;
153                         break;
154                 case '&':
155                         state = 24;
156                         break;
157                 case '|':
158                         state = 25;
159                         break;
160                 case '$':
161                         state = 26;
162                         break;
163                 case '~':
164                         cx.x_token = T_COMP;
165                         state = -1;
166                         break;
167                 case '+':
168                         cx.x_token = T_PLUS;
169                         state = -1;
170                         break;
171                 case '-':
172                         cx.x_token = T_MINUS;
173                         state = -1;
174                         break;
175                 case '*':
176                         cx.x_token = T_MUL;
177                         state = -1;
178                         break;
179                 case '/':
180                         cx.x_token = T_DIV;
181                         state = -1;
182                         break;
183                 case '%':
184                         cx.x_token = T_MOD;
185                         state = -1;
186                         break;
187                 case '^':
188                         cx.x_token = T_XOR;
189                         state = -1;
190                         break;
191                 case '(':
192                         cx.x_token = T_LP;
193                         state = -1;
194                         break;
195                 case ')':
196                         cx.x_token = T_RP;
197                         state = -1;
198                         break;
199                 case ',':
200                         cx.x_token = T_COMMA;
201                         state = -1;
202                         break;
203                 case '?':
204                         cx.x_token = T_QUEST;
205                         state = -1;
206                         break;
207                 case ':':
208                         cx.x_token = T_COLON;
209                         state = -1;
210                         break;
211                 case '[':
212                         cx.x_token = T_LB;
213                         state = -1;
214                         break;
215                 case ']':
216                         cx.x_token = T_RB;
217                         state = -1;
218                         break;
219                 default:
220                         if (isalpha(c) || c == '_' || c == '.') {
221                                 *p++ = c;
222                                 state = 2;
223                                 break;
224                         }
225                         cx.x_val.v_num = c;
226                         cx.x_token = T_CHAR;
227                         state = -1;
228                         break;
229                 }
230                 break;
231         case 1:                         /* got # */
232                 if (c == '\n' || c == EOF) {
233                         (void) s_ungetc(c);
234                         state = 0;
235                 }
236                 break;
237         case 2:                         /* unquoted string */
238                 switch (c) {
239                 case '"':
240                         state = 3;
241                         break;
242                 case '\'':
243                         state = 4;
244                         break;
245                 case '\\':
246                         switch (c = s_gettok1()) {
247                         case -2:
248                                 (void) s_ungetc(' ');
249                         case -1:
250                                 break;
251                         default:
252                                 if (p < buf + sizeof buf - 1)
253                                         *p++ = c;
254                         }
255                         break;
256                 default:
257                         if (isalnum(c) || c == '_' || c == '.') {
258                                 if (p < buf + sizeof buf - 1)
259                                         *p++ = c;
260                                 break;
261                         }
262                         (void) s_ungetc(c);
263                 case EOF:
264                         *p = 0;
265                         cx.x_token = T_STR;
266                         switch (*buf) {
267                         case 'i':
268                                 if (buf[1] == 'f' && buf[2] == 0)
269                                         cx.x_token = T_IF;
270                                 break;
271                         case 't':
272                                 if (buf[1] == 'h' && buf[2] == 'e'
273                                     && buf[3] == 'n' && buf[4] == 0)
274                                         cx.x_token = T_THEN;
275                                 break;
276                         case 'e':
277                                 if (buf[1] == 'n' && buf[2] == 'd'
278                                     && buf[3] == 'i' && buf[4] == 'f'
279                                     && buf[5] == 0)
280                                         cx.x_token = T_ENDIF;
281                                 else if (buf[1] == 'l' && buf[2] == 's')
282                                         if (buf[3] == 'i' && buf[4] == 'f'
283                                             && buf[5] == 0)
284                                                 cx.x_token = T_ELSIF;
285                                         else if (buf[3] == 'e' && buf[4] == 0)
286                                                 cx.x_token = T_ELSE;
287                                 break;
288                         }
289                         if (cx.x_token == T_STR
290                             && (cx.x_val.v_str = str_cpy(buf)) == 0) {
291                                 p_memerror();
292                                 cx.x_token = T_EOF;
293                         }
294                         state = -1;
295                         break;
296                 }
297                 break;
298         case 3:                         /* " quoted string */
299                 switch (c) {
300                 case '\n':
301                         (void) s_ungetc(c);
302                 case EOF:
303                 case '"':
304                         state = 2;
305                         break;
306                 case '\\':
307                         switch (c = s_gettok1()) {
308                         case -1:
309                         case -2:        /* newlines are invisible */
310                                 break;
311                         default:
312                                 if (p < buf + sizeof buf - 1)
313                                         *p++ = c;
314                         }
315                         break;
316                 default:
317                         if (p < buf + sizeof buf - 1)
318                                 *p++ = c;
319                         break;
320                 }
321                 break;
322         case 4:                         /* ' quoted string */
323                 switch (c) {
324                 case '\n':
325                         (void) s_ungetc(c);
326                 case EOF:
327                 case '\'':
328                         state = 2;
329                         break;
330                 case '\\':
331                         switch (c = s_gettok1()) {
332                         case -1:
333                         case -2:        /* newlines are invisible */
334                                 break;
335                         default:
336                                 if (p < buf + sizeof buf - 1)
337                                         *p++ = c;
338                         }
339                         break;
340                 default:
341                         if (p < buf + sizeof buf - 1)
342                                 *p++ = c;
343                         break;
344                 }
345                 break;
346         case 10:                        /* got 0 */
347                 switch (c) {
348                 case 'x':
349                 case 'X':
350                         cx.x_val.v_num = 0;
351                         state = 12;
352                         break;
353                 case '0': case '1': case '2': case '3': case '4':
354                 case '5': case '6': case '7':
355                         cx.x_val.v_num = c - '0';
356                         state = 13;
357                         break;
358                 case '8': case '9':
359                         cx.x_val.v_num = c - '0';
360                         state = 11;
361                         break;
362                 default:
363                         (void) s_ungetc(c);
364                         state = -1;
365                         cx.x_token = T_NUM;
366                 }
367                 break;
368         case 11:                        /* decimal number */
369                 switch (c) {
370                 case '0': case '1': case '2': case '3': case '4':
371                 case '5': case '6': case '7': case '8': case '9':
372                         cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
373                         break;
374                 default:
375                         (void) s_ungetc(c);
376                         state = -1;
377                         cx.x_token = T_NUM;
378                 }
379                 break;
380         case 12:                        /* hex number */
381                 switch (c) {
382                 case '0': case '1': case '2': case '3': case '4':
383                 case '5': case '6': case '7': case '8': case '9':
384                         cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
385                         break;
386                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
387                         cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
388                         break;
389                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
390                         cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
391                         break;
392                 default:
393                         (void) s_ungetc(c);
394                         state = -1;
395                         cx.x_token = T_NUM;
396                 }
397                 break;
398         case 13:                        /* octal number */
399                 switch (c) {
400                 case '0': case '1': case '2': case '3': case '4':
401                 case '5': case '6': case '7':
402                         cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
403                         break;
404                 default:
405                         (void) s_ungetc(c);
406                         state = -1;
407                         cx.x_token = T_NUM;
408                 }
409                 break;
410         case 20:                        /* got > */
411                 switch (c) {
412                 case '=':
413                         cx.x_token = T_GE;
414                         state = -1;
415                         break;
416                 case '>':
417                         cx.x_token = T_RS;
418                         state = -1;
419                         break;
420                 default:
421                         (void) s_ungetc(c);
422                         cx.x_token = T_GT;
423                         state = -1;
424                 }
425                 break;
426         case 21:                        /* got < */
427                 switch (c) {
428                 case '=':
429                         cx.x_token = T_LE;
430                         state = -1;
431                         break;
432                 case '<':
433                         cx.x_token = T_LS;
434                         state = -1;
435                         break;
436                 default:
437                         (void) s_ungetc(c);
438                         cx.x_token = T_LT;
439                         state = -1;
440                 }
441                 break;
442         case 22:                        /* got = */
443                 switch (c) {
444                 case '=':
445                         cx.x_token = T_EQ;
446                         state = -1;
447                         break;
448                 default:
449                         (void) s_ungetc(c);
450                         cx.x_token = T_ASSIGN;
451                         state = -1;
452                 }
453                 break;
454         case 23:                        /* got ! */
455                 switch (c) {
456                 case '=':
457                         cx.x_token = T_NE;
458                         state = -1;
459                         break;
460                 default:
461                         (void) s_ungetc(c);
462                         cx.x_token = T_NOT;
463                         state = -1;
464                 }
465                 break;
466         case 24:                        /* got & */
467                 switch (c) {
468                 case '&':
469                         cx.x_token = T_ANDAND;
470                         state = -1;
471                         break;
472                 default:
473                         (void) s_ungetc(c);
474                         cx.x_token = T_AND;
475                         state = -1;
476                 }
477                 break;
478         case 25:                        /* got | */
479                 switch (c) {
480                 case '|':
481                         cx.x_token = T_OROR;
482                         state = -1;
483                         break;
484                 default:
485                         (void) s_ungetc(c);
486                         cx.x_token = T_OR;
487                         state = -1;
488                 }
489                 break;
490         case 26:                        /* got $ */
491                 switch (c) {
492                 case '?':
493                         cx.x_token = T_DQ;
494                         state = -1;
495                         break;
496                 default:
497                         (void) s_ungetc(c);
498                         cx.x_token = T_DOLLAR;
499                         state = -1;
500                 }
501                 break;
502         default:
503                 abort();
504         }
505         if (state >= 0)
506                 goto loop;
507         return cx.x_token;
508 }
509
510 s_gettok1()
511 {
512         register c;
513         register n;
514
515         c = s_getc();                   /* got \ */
516         switch (c) {
517         case EOF:
518                 return -1;
519         case '\n':
520                 return -2;
521         case 'b':
522                 return '\b';
523         case 'f':
524                 return '\f';
525         case 'n':
526                 return '\n';
527         case 'r':
528                 return '\r';
529         case 't':
530                 return '\t';
531         default:
532                 return c;
533         case '0': case '1': case '2': case '3': case '4':
534         case '5': case '6': case '7':
535                 break;
536         }
537         n = c - '0';
538         c = s_getc();                   /* got \[0-7] */
539         if (c < '0' || c > '7') {
540                 (void) s_ungetc(c);
541                 return n;
542         }
543         n = n * 8 + c - '0';
544         c = s_getc();                   /* got \[0-7][0-7] */
545         if (c < '0' || c > '7') {
546                 (void) s_ungetc(c);
547                 return n;
548         }
549         return n * 8 + c - '0';
550 }