2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * @(#)lexi.c 8.1 (Berkeley) 6/6/93
36 * $FreeBSD: src/usr.bin/indent/lexi.c,v 1.21 2010/04/15 21:41:07 avg Exp $
40 * Here we have the token scanner for indent. It scans off one token and puts
41 * it in the global variable "token". It returns a code, indicating the type
50 #include "indent_globs.h"
51 #include "indent_codes.h"
62 struct templ specials[1000] =
98 { /* this is used to facilitate the decision of
99 * what type (alphanumeric, operator) each
101 0, 0, 0, 0, 0, 0, 0, 0,
102 0, 0, 0, 0, 0, 0, 0, 0,
103 0, 0, 0, 0, 0, 0, 0, 0,
104 0, 0, 0, 0, 0, 0, 0, 0,
105 0, 3, 0, 0, 1, 3, 3, 0,
106 0, 0, 3, 3, 0, 3, 0, 3,
107 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 0, 0, 3, 3, 3, 3,
109 0, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 0, 0, 0, 3, 1,
113 0, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1,
115 1, 1, 1, 1, 1, 1, 1, 1,
116 1, 1, 1, 0, 3, 0, 3, 0
122 int unary_delim; /* this is set to 1 if the current token
123 * forces a following operator to be unary */
124 static int last_code; /* the last token type returned */
125 static int l_struct; /* set to 1 if the last token was 'struct' */
126 int code; /* internal code to be returned */
127 char qchar; /* the delimiter character for a string */
129 e_token = s_token; /* point to start of place to save token */
131 ps.col_1 = ps.last_nl; /* tell world that this token started in
132 * column 1 iff the last thing scanned was nl */
135 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
136 ps.col_1 = false; /* leading blanks imply token is not in column
138 if (++buf_ptr >= buf_end)
142 /* Scan an alphanumeric token */
143 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
145 * we have a character or number
147 const char *j; /* used for searching thru list of
152 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
156 if (*buf_ptr == '0' &&
157 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
158 *e_token++ = *buf_ptr++;
159 *e_token++ = *buf_ptr++;
160 while (isxdigit(*buf_ptr)) {
162 *e_token++ = *buf_ptr++;
167 if (*buf_ptr == '.') {
174 *e_token++ = *buf_ptr++;
175 if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
176 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
182 *e_token++ = *buf_ptr++;
183 if (*buf_ptr == '+' || *buf_ptr == '-')
184 *e_token++ = *buf_ptr++;
189 if (!(seensfx & 1) &&
190 (*buf_ptr == 'U' || *buf_ptr == 'u')) {
192 *e_token++ = *buf_ptr++;
196 if (!(seensfx & 2) &&
197 (*buf_ptr == 'L' || *buf_ptr == 'l')) {
199 if (buf_ptr[1] == buf_ptr[0])
200 *e_token++ = *buf_ptr++;
201 *e_token++ = *buf_ptr++;
209 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
210 /* fill_buffer() terminates buffer with newline */
211 if (*buf_ptr == BACKSLASH) {
212 if (*(buf_ptr + 1) == '\n') {
214 if (buf_ptr >= buf_end)
221 *e_token++ = *buf_ptr++;
222 if (buf_ptr >= buf_end)
226 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
227 if (++buf_ptr >= buf_end)
230 ps.its_a_keyword = false;
231 ps.sizeof_keyword = false;
232 if (l_struct && !ps.p_l_follow) {
233 /* if last token was 'struct' and we're not
234 * in parentheses, then this token
235 * should be treated as a declaration */
241 ps.last_u_d = l_struct; /* Operator after identifier is binary
242 * unless last token was 'struct' */
244 last_code = ident; /* Remember that this is the code we will
248 const char *q = s_token;
249 size_t q_len = strlen(q);
250 /* Check if we have an "_t" in the end */
252 (strcmp(q + q_len - 2, "_t") == 0)) {
253 ps.its_a_keyword = true;
255 goto found_auto_typedef;
260 * This loop will check if the token is a keyword.
262 for (p = specials; (j = p->rwd) != NULL; p++) {
263 const char *q = s_token; /* point at scanned token */
264 if (*j++ != *q++ || *j++ != *q++)
265 continue; /* This test depends on the fact that
266 * identifiers are always at least 1 character
267 * long (ie. the first two bytes of the
268 * identifier are always meaningful) */
270 break; /* If its a one-character identifier */
273 goto found_keyword; /* I wish that C had a multi-level
276 if (p->rwd) { /* we have a keyword */
278 ps.its_a_keyword = true;
281 case 1: /* it is a switch */
283 case 2: /* a case or default */
286 case 3: /* a "struct" */
288 * Next time around, we will want to know that we have had a
294 case 4: /* one of the declaration keywords */
297 ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.sizeof_mask;
298 break; /* inside parens: cast, param list or sizeof */
303 case 5: /* if, while, for */
306 case 6: /* do, else */
310 ps.sizeof_keyword = true;
311 default: /* all others are treated like any other
314 } /* end of switch */
315 } /* end of if (found_it) */
316 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
319 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
321 strncpy(ps.procname, token, sizeof ps.procname - 1);
322 ps.in_parameter_declaration = 1;
327 * The following hack attempts to guess whether or not the current
328 * token is in fact a declaration keyword -- one that has been
331 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
334 && (ps.last_token == rparen || ps.last_token == semicolon ||
335 ps.last_token == decl ||
336 ps.last_token == lbrace || ps.last_token == rbrace)) {
337 ps.its_a_keyword = true;
342 if (last_code == decl) /* if this is a declared variable, then
343 * following sign is unary */
344 ps.last_u_d = true; /* will make "int a -1" work */
346 return (ident); /* the ident is not in the list */
347 } /* end of procesing for alpanum character */
349 /* Scan a non-alphanumeric token */
351 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
354 if (++buf_ptr >= buf_end)
359 unary_delim = ps.last_u_d;
360 ps.last_nl = true; /* remember that we just had a newline */
361 code = (had_eof ? 0 : newline);
364 * if data has been exhausted, the newline is a dummy, and we should
365 * return code to stop
369 case '\'': /* start of quoted character */
370 case '"': /* start of string */
376 e_token = chfont(&bodyf, &stringf, e_token);
378 do { /* copy the string */
379 while (1) { /* move one character or [/<char>]<char> */
380 if (*buf_ptr == '\n') {
381 diag2(1, "Unterminated literal");
384 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
385 * since CHECK_SIZE guarantees that there
386 * are at least 5 entries left */
387 *e_token = *buf_ptr++;
388 if (buf_ptr >= buf_end)
390 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
391 if (*buf_ptr == '\n') /* check for escaped newline */
394 *++e_token = BACKSLASH;
395 if (*buf_ptr == BACKSLASH)
396 *++e_token = BACKSLASH;
398 *++e_token = *buf_ptr++;
399 ++e_token; /* we must increment this again because we
400 * copied two chars */
401 if (buf_ptr >= buf_end)
405 break; /* we copied one character */
406 } /* end of while (1) */
407 } while (*e_token++ != qchar);
409 e_token = chfont(&stringf, &bodyf, e_token - 1);
429 unary_delim = ps.last_u_d;
452 * if (ps.in_or_st) ps.block_init = 1;
454 /* ? code = ps.block_init ? lparen : lbrace; */
460 /* ? code = ps.block_init ? rparen : rbrace; */
464 case 014: /* a form feed */
465 unary_delim = ps.last_u_d;
466 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
482 case '+': /* check for -, +, --, ++ */
483 code = (ps.last_u_d ? unary_op : binary_op);
486 if (*buf_ptr == token[0]) {
487 /* check for doubled character */
488 *e_token++ = *buf_ptr++;
489 /* buffer overflow will be checked at end of loop */
490 if (last_code == ident || last_code == rparen) {
491 code = (ps.last_u_d ? unary_op : postop);
492 /* check for following ++ or -- */
496 else if (*buf_ptr == '=')
497 /* check for operator += */
498 *e_token++ = *buf_ptr++;
499 else if (*buf_ptr == '>') {
500 /* check for operator -> */
501 *e_token++ = *buf_ptr++;
502 if (!pointer_as_binop) {
505 ps.want_blank = false;
508 break; /* buffer overflow will be checked at end of
515 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
516 e_token[-1] = *buf_ptr++;
517 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
518 *e_token++ = *buf_ptr++;
519 *e_token++ = '='; /* Flip =+ to += */
523 if (*buf_ptr == '=') {/* == */
524 *e_token++ = '='; /* Flip =+ to += */
532 /* can drop thru!!! */
536 case '!': /* ops like <, <<, <=, !=, etc */
537 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
538 *e_token++ = *buf_ptr;
539 if (++buf_ptr >= buf_end)
543 *e_token++ = *buf_ptr++;
544 code = (ps.last_u_d ? unary_op : binary_op);
549 if (token[0] == '/' && *buf_ptr == '*') {
550 /* it is start of comment */
553 if (++buf_ptr >= buf_end)
557 unary_delim = ps.last_u_d;
560 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
562 * handle ||, &&, etc, and also things as in int *****i
564 *e_token++ = *buf_ptr;
565 if (++buf_ptr >= buf_end)
568 code = (ps.last_u_d ? unary_op : binary_op);
572 } /* end of switch */
573 if (code != newline) {
577 if (buf_ptr >= buf_end) /* check for input buffer empty */
579 ps.last_u_d = unary_delim;
580 *e_token = '\0'; /* null terminate the token */
585 * Add the given keyword to the keyword table, using val as the keyword type
588 addkey(char *key, int val)
590 struct templ *p = specials;
592 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
596 if (p >= specials + sizeof specials / sizeof specials[0])
597 return; /* For now, table overflows are silently