Remove advertising header from all userland binaries.
[dragonfly.git] / usr.bin / ctags / C.c
1 /*
2  * Copyright (c) 1987, 1993, 1994
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * @(#)C.c      8.4 (Berkeley) 4/2/94
30  * $FreeBSD: src/usr.bin/ctags/C.c,v 1.3.2.2 2002/07/30 00:55:07 tjr Exp $
31  * $DragonFly: src/usr.bin/ctags/C.c,v 1.2 2003/06/17 04:29:25 dillon Exp $
32  */
33
34 #include <limits.h>
35 #include <stdio.h>
36 #include <string.h>
37
38 #include "ctags.h"
39
40 static int      func_entry(void);
41 static void     hash_entry(void);
42 static void     skip_string(int);
43 static int      str_entry(int);
44
45 /*
46  * c_entries --
47  *      read .c and .h files and call appropriate routines
48  */
49 void
50 c_entries(void)
51 {
52         int     c;                      /* current character */
53         int     level;                  /* brace level */
54         int     token;                  /* if reading a token */
55         int     t_def;                  /* if reading a typedef */
56         int     t_level;                /* typedef's brace level */
57         char    *sp;                    /* buffer pointer */
58         char    tok[MAXTOKEN];          /* token buffer */
59
60         lineftell = ftell(inf);
61         sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
62         while (GETC(!=, EOF)) {
63                 switch (c) {
64                 /*
65                  * Here's where it DOESN'T handle: {
66                  *      foo(a)
67                  *      {
68                  *      #ifdef notdef
69                  *              }
70                  *      #endif
71                  *              if (a)
72                  *                      puts("hello, world");
73                  *      }
74                  */
75                 case '{':
76                         ++level;
77                         goto endtok;
78                 case '}':
79                         /*
80                          * if level goes below zero, try and fix
81                          * it, even though we've already messed up
82                          */
83                         if (--level < 0)
84                                 level = 0;
85                         goto endtok;
86
87                 case '\n':
88                         SETLINE;
89                         /*
90                          * the above 3 cases are similar in that they
91                          * are special characters that also end tokens.
92                          */
93         endtok:                 if (sp > tok) {
94                                 *sp = EOS;
95                                 token = YES;
96                                 sp = tok;
97                         }
98                         else
99                                 token = NO;
100                         continue;
101
102                 /*
103                  * We ignore quoted strings and character constants
104                  * completely.
105                  */
106                 case '"':
107                 case '\'':
108                         (void)skip_string(c);
109                         break;
110
111                 /*
112                  * comments can be fun; note the state is unchanged after
113                  * return, in case we found:
114                  *      "foo() XX comment XX { int bar; }"
115                  */
116                 case '/':
117                         if (GETC(==, '*') || c == '/') {
118                                 skip_comment(c);
119                                 continue;
120                         }
121                         (void)ungetc(c, inf);
122                         c = '/';
123                         goto storec;
124
125                 /* hash marks flag #define's. */
126                 case '#':
127                         if (sp == tok) {
128                                 hash_entry();
129                                 break;
130                         }
131                         goto storec;
132
133                 /*
134                  * if we have a current token, parenthesis on
135                  * level zero indicates a function.
136                  */
137                 case '(':
138                         if (!level && token) {
139                                 int     curline;
140
141                                 if (sp != tok)
142                                         *sp = EOS;
143                                 /*
144                                  * grab the line immediately, we may
145                                  * already be wrong, for example,
146                                  *      foo\n
147                                  *      (arg1,
148                                  */
149                                 getline();
150                                 curline = lineno;
151                                 if (func_entry()) {
152                                         ++level;
153                                         pfnote(tok, curline);
154                                 }
155                                 break;
156                         }
157                         goto storec;
158
159                 /*
160                  * semi-colons indicate the end of a typedef; if we find a
161                  * typedef we search for the next semi-colon of the same
162                  * level as the typedef.  Ignoring "structs", they are
163                  * tricky, since you can find:
164                  *
165                  *      "typedef long time_t;"
166                  *      "typedef unsigned int u_int;"
167                  *      "typedef unsigned int u_int [10];"
168                  *
169                  * If looking at a typedef, we save a copy of the last token
170                  * found.  Then, when we find the ';' we take the current
171                  * token if it starts with a valid token name, else we take
172                  * the one we saved.  There's probably some reasonable
173                  * alternative to this...
174                  */
175                 case ';':
176                         if (t_def && level == t_level) {
177                                 t_def = NO;
178                                 getline();
179                                 if (sp != tok)
180                                         *sp = EOS;
181                                 pfnote(tok, lineno);
182                                 break;
183                         }
184                         goto storec;
185
186                 /*
187                  * store characters until one that can't be part of a token
188                  * comes along; check the current token against certain
189                  * reserved words.
190                  */
191                 default:
192                         /* ignore whitespace */
193                         if (c == ' ' || c == '\t') {
194                                 int save = c;
195                                 while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
196                                         ;
197                                 if (c == EOF)
198                                         return;
199                                 (void)ungetc(c, inf);
200                                 c = save;
201                         }
202         storec:         if (!intoken(c)) {
203                                 if (sp == tok)
204                                         break;
205                                 *sp = EOS;
206                                 if (tflag) {
207                                         /* no typedefs inside typedefs */
208                                         if (!t_def &&
209                                                    !memcmp(tok, "typedef",8)) {
210                                                 t_def = YES;
211                                                 t_level = level;
212                                                 break;
213                                         }
214                                         /* catch "typedef struct" */
215                                         if ((!t_def || t_level < level)
216                                             && (!memcmp(tok, "struct", 7)
217                                             || !memcmp(tok, "union", 6)
218                                             || !memcmp(tok, "enum", 5))) {
219                                                 /*
220                                                  * get line immediately;
221                                                  * may change before '{'
222                                                  */
223                                                 getline();
224                                                 if (str_entry(c))
225                                                         ++level;
226                                                 break;
227                                                 /* } */
228                                         }
229                                 }
230                                 sp = tok;
231                         }
232                         else if (sp != tok || begtoken(c)) {
233                                 if (sp == tok + sizeof tok - 1)
234                                         /* Too long -- truncate it */
235                                         *sp = EOS;
236                                 else 
237                                         *sp++ = c;
238                                 token = YES;
239                         }
240                         continue;
241                 }
242
243                 sp = tok;
244                 token = NO;
245         }
246 }
247
248 /*
249  * func_entry --
250  *      handle a function reference
251  */
252 static int
253 func_entry(void)
254 {
255         int     c;                      /* current character */
256         int     level = 0;              /* for matching '()' */
257
258         /*
259          * Find the end of the assumed function declaration.
260          * Note that ANSI C functions can have type definitions so keep
261          * track of the parentheses nesting level.
262          */
263         while (GETC(!=, EOF)) {
264                 switch (c) {
265                 case '\'':
266                 case '"':
267                         /* skip strings and character constants */
268                         skip_string(c);
269                         break;
270                 case '/':
271                         /* skip comments */
272                         if (GETC(==, '*') || c == '/')
273                                 skip_comment(c);
274                         break;
275                 case '(':
276                         level++;
277                         break;
278                 case ')':
279                         if (level == 0)
280                                 goto fnd;
281                         level--;
282                         break;
283                 case '\n':
284                         SETLINE;
285                 }
286         }
287         return (NO);
288 fnd:
289         /*
290          * we assume that the character after a function's right paren
291          * is a token character if it's a function and a non-token
292          * character if it's a declaration.  Comments don't count...
293          */
294         for (;;) {
295                 while (GETC(!=, EOF) && iswhite(c))
296                         if (c == '\n')
297                                 SETLINE;
298                 if (intoken(c) || c == '{')
299                         break;
300                 if (c == '/' && (GETC(==, '*') || c == '/'))
301                         skip_comment(c);
302                 else {                          /* don't ever "read" '/' */
303                         (void)ungetc(c, inf);
304                         return (NO);
305                 }
306         }
307         if (c != '{')
308                 (void)skip_key('{');
309         return (YES);
310 }
311
312 /*
313  * hash_entry --
314  *      handle a line starting with a '#'
315  */
316 static void
317 hash_entry(void)
318 {
319         int     c;                      /* character read */
320         int     curline;                /* line started on */
321         char    *sp;                    /* buffer pointer */
322         char    tok[MAXTOKEN];          /* storage buffer */
323
324         /* ignore leading whitespace */
325         while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
326                 ;
327         (void)ungetc(c, inf);
328
329         curline = lineno;
330         for (sp = tok;;) {              /* get next token */
331                 if (GETC(==, EOF))
332                         return;
333                 if (iswhite(c))
334                         break;
335                 if (sp == tok + sizeof tok - 1)
336                         /* Too long -- truncate it */
337                         *sp = EOS;
338                 else 
339                         *sp++ = c;
340         }
341         *sp = EOS;
342         if (memcmp(tok, "define", 6))   /* only interested in #define's */
343                 goto skip;
344         for (;;) {                      /* this doesn't handle "#define \n" */
345                 if (GETC(==, EOF))
346                         return;
347                 if (!iswhite(c))
348                         break;
349         }
350         for (sp = tok;;) {              /* get next token */
351                 if (sp == tok + sizeof tok - 1)
352                         /* Too long -- truncate it */
353                         *sp = EOS;
354                 else 
355                         *sp++ = c;
356                 if (GETC(==, EOF))
357                         return;
358                 /*
359                  * this is where it DOESN'T handle
360                  * "#define \n"
361                  */
362                 if (!intoken(c))
363                         break;
364         }
365         *sp = EOS;
366         if (dflag || c == '(') {        /* only want macros */
367                 getline();
368                 pfnote(tok, curline);
369         }
370 skip:   if (c == '\n') {                /* get rid of rest of define */
371                 SETLINE
372                 if (*(sp - 1) != '\\')
373                         return;
374         }
375         (void)skip_key('\n');
376 }
377
378 /*
379  * str_entry --
380  *      handle a struct, union or enum entry
381  */
382 static int
383 str_entry(int c) /* c is current character */
384 {
385         int     curline;                /* line started on */
386         char    *sp;                    /* buffer pointer */
387         char    tok[LINE_MAX];          /* storage buffer */
388
389         curline = lineno;
390         while (iswhite(c))
391                 if (GETC(==, EOF))
392                         return (NO);
393         if (c == '{')           /* it was "struct {" */
394                 return (YES);
395         for (sp = tok;;) {              /* get next token */
396                 if (sp == tok + sizeof tok - 1)
397                         /* Too long -- truncate it */
398                         *sp = EOS;
399                 else 
400                         *sp++ = c;
401                 if (GETC(==, EOF))
402                         return (NO);
403                 if (!intoken(c))
404                         break;
405         }
406         switch (c) {
407                 case '{':               /* it was "struct foo{" */
408                         --sp;
409                         break;
410                 case '\n':              /* it was "struct foo\n" */
411                         SETLINE;
412                         /*FALLTHROUGH*/
413                 default:                /* probably "struct foo " */
414                         while (GETC(!=, EOF))
415                                 if (!iswhite(c))
416                                         break;
417                         if (c != '{') {
418                                 (void)ungetc(c, inf);
419                                 return (NO);
420                         }
421         }
422         *sp = EOS;
423         pfnote(tok, curline);
424         return (YES);
425 }
426
427 /*
428  * skip_comment --
429  *      skip over comment
430  */
431 void
432 skip_comment(int t) /* t is comment character */
433 {
434         int     c;                      /* character read */
435         int     star;                   /* '*' flag */
436
437         for (star = 0; GETC(!=, EOF);)
438                 switch(c) {
439                 /* comments don't nest, nor can they be escaped. */
440                 case '*':
441                         star = YES;
442                         break;
443                 case '/':
444                         if (star && t == '*')
445                                 return;
446                         break;
447                 case '\n':
448                         if (t == '/')
449                                 return;
450                         SETLINE;
451                         /*FALLTHROUGH*/
452                 default:
453                         star = NO;
454                         break;
455                 }
456 }
457
458 /*
459  * skip_string --
460  *      skip to the end of a string or character constant.
461  */
462 void
463 skip_string(int key)
464 {
465         int     c,
466                 skip;
467
468         for (skip = NO; GETC(!=, EOF); )
469                 switch (c) {
470                 case '\\':              /* a backslash escapes anything */
471                         skip = !skip;   /* we toggle in case it's "\\" */
472                         break;
473                 case '\n':
474                         SETLINE;
475                         /*FALLTHROUGH*/
476                 default:
477                         if (c == key && !skip)
478                                 return;
479                         skip = NO;
480                 }
481 }
482
483 /*
484  * skip_key --
485  *      skip to next char "key"
486  */
487 int
488 skip_key(int key)
489 {
490         int     c,
491                 skip,
492                 retval;
493
494         for (skip = retval = NO; GETC(!=, EOF);)
495                 switch(c) {
496                 case '\\':              /* a backslash escapes anything */
497                         skip = !skip;   /* we toggle in case it's "\\" */
498                         break;
499                 case ';':               /* special case for yacc; if one */
500                 case '|':               /* of these chars occurs, we may */
501                         retval = YES;   /* have moved out of the rule */
502                         break;          /* not used by C */
503                 case '\'':
504                 case '"':
505                         /* skip strings and character constants */
506                         skip_string(c);
507                         break;
508                 case '/':
509                         /* skip comments */
510                         if (GETC(==, '*') || c == '/') {
511                                 skip_comment(c);
512                                 break;
513                         }
514                         (void)ungetc(c, inf);
515                         c = '/';
516                         goto norm;
517                 case '\n':
518                         SETLINE;
519                         /*FALLTHROUGH*/
520                 default:
521                 norm:
522                         if (c == key && !skip)
523                                 return (retval);
524                         skip = NO;
525                 }
526         return (retval);
527 }