Initial import from FreeBSD RELENG_4:
[dragonfly.git] / usr.bin / ctags / C.c
1 /*
2  * Copyright (c) 1987, 1993, 1994
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by the University of
16  *      California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33
34 #if 0
35 #ifndef lint
36 static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94";
37 #endif
38 #endif
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD: src/usr.bin/ctags/C.c,v 1.3.2.2 2002/07/30 00:55:07 tjr Exp $");
42
43 #include <limits.h>
44 #include <stdio.h>
45 #include <string.h>
46
47 #include "ctags.h"
48
49 static int      func_entry(void);
50 static void     hash_entry(void);
51 static void     skip_string(int);
52 static int      str_entry(int);
53
54 /*
55  * c_entries --
56  *      read .c and .h files and call appropriate routines
57  */
58 void
59 c_entries(void)
60 {
61         int     c;                      /* current character */
62         int     level;                  /* brace level */
63         int     token;                  /* if reading a token */
64         int     t_def;                  /* if reading a typedef */
65         int     t_level;                /* typedef's brace level */
66         char    *sp;                    /* buffer pointer */
67         char    tok[MAXTOKEN];          /* token buffer */
68
69         lineftell = ftell(inf);
70         sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
71         while (GETC(!=, EOF)) {
72                 switch (c) {
73                 /*
74                  * Here's where it DOESN'T handle: {
75                  *      foo(a)
76                  *      {
77                  *      #ifdef notdef
78                  *              }
79                  *      #endif
80                  *              if (a)
81                  *                      puts("hello, world");
82                  *      }
83                  */
84                 case '{':
85                         ++level;
86                         goto endtok;
87                 case '}':
88                         /*
89                          * if level goes below zero, try and fix
90                          * it, even though we've already messed up
91                          */
92                         if (--level < 0)
93                                 level = 0;
94                         goto endtok;
95
96                 case '\n':
97                         SETLINE;
98                         /*
99                          * the above 3 cases are similar in that they
100                          * are special characters that also end tokens.
101                          */
102         endtok:                 if (sp > tok) {
103                                 *sp = EOS;
104                                 token = YES;
105                                 sp = tok;
106                         }
107                         else
108                                 token = NO;
109                         continue;
110
111                 /*
112                  * We ignore quoted strings and character constants
113                  * completely.
114                  */
115                 case '"':
116                 case '\'':
117                         (void)skip_string(c);
118                         break;
119
120                 /*
121                  * comments can be fun; note the state is unchanged after
122                  * return, in case we found:
123                  *      "foo() XX comment XX { int bar; }"
124                  */
125                 case '/':
126                         if (GETC(==, '*') || c == '/') {
127                                 skip_comment(c);
128                                 continue;
129                         }
130                         (void)ungetc(c, inf);
131                         c = '/';
132                         goto storec;
133
134                 /* hash marks flag #define's. */
135                 case '#':
136                         if (sp == tok) {
137                                 hash_entry();
138                                 break;
139                         }
140                         goto storec;
141
142                 /*
143                  * if we have a current token, parenthesis on
144                  * level zero indicates a function.
145                  */
146                 case '(':
147                         if (!level && token) {
148                                 int     curline;
149
150                                 if (sp != tok)
151                                         *sp = EOS;
152                                 /*
153                                  * grab the line immediately, we may
154                                  * already be wrong, for example,
155                                  *      foo\n
156                                  *      (arg1,
157                                  */
158                                 getline();
159                                 curline = lineno;
160                                 if (func_entry()) {
161                                         ++level;
162                                         pfnote(tok, curline);
163                                 }
164                                 break;
165                         }
166                         goto storec;
167
168                 /*
169                  * semi-colons indicate the end of a typedef; if we find a
170                  * typedef we search for the next semi-colon of the same
171                  * level as the typedef.  Ignoring "structs", they are
172                  * tricky, since you can find:
173                  *
174                  *      "typedef long time_t;"
175                  *      "typedef unsigned int u_int;"
176                  *      "typedef unsigned int u_int [10];"
177                  *
178                  * If looking at a typedef, we save a copy of the last token
179                  * found.  Then, when we find the ';' we take the current
180                  * token if it starts with a valid token name, else we take
181                  * the one we saved.  There's probably some reasonable
182                  * alternative to this...
183                  */
184                 case ';':
185                         if (t_def && level == t_level) {
186                                 t_def = NO;
187                                 getline();
188                                 if (sp != tok)
189                                         *sp = EOS;
190                                 pfnote(tok, lineno);
191                                 break;
192                         }
193                         goto storec;
194
195                 /*
196                  * store characters until one that can't be part of a token
197                  * comes along; check the current token against certain
198                  * reserved words.
199                  */
200                 default:
201                         /* ignore whitespace */
202                         if (c == ' ' || c == '\t') {
203                                 int save = c;
204                                 while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
205                                         ;
206                                 if (c == EOF)
207                                         return;
208                                 (void)ungetc(c, inf);
209                                 c = save;
210                         }
211         storec:         if (!intoken(c)) {
212                                 if (sp == tok)
213                                         break;
214                                 *sp = EOS;
215                                 if (tflag) {
216                                         /* no typedefs inside typedefs */
217                                         if (!t_def &&
218                                                    !memcmp(tok, "typedef",8)) {
219                                                 t_def = YES;
220                                                 t_level = level;
221                                                 break;
222                                         }
223                                         /* catch "typedef struct" */
224                                         if ((!t_def || t_level < level)
225                                             && (!memcmp(tok, "struct", 7)
226                                             || !memcmp(tok, "union", 6)
227                                             || !memcmp(tok, "enum", 5))) {
228                                                 /*
229                                                  * get line immediately;
230                                                  * may change before '{'
231                                                  */
232                                                 getline();
233                                                 if (str_entry(c))
234                                                         ++level;
235                                                 break;
236                                                 /* } */
237                                         }
238                                 }
239                                 sp = tok;
240                         }
241                         else if (sp != tok || begtoken(c)) {
242                                 if (sp == tok + sizeof tok - 1)
243                                         /* Too long -- truncate it */
244                                         *sp = EOS;
245                                 else 
246                                         *sp++ = c;
247                                 token = YES;
248                         }
249                         continue;
250                 }
251
252                 sp = tok;
253                 token = NO;
254         }
255 }
256
257 /*
258  * func_entry --
259  *      handle a function reference
260  */
261 static int
262 func_entry(void)
263 {
264         int     c;                      /* current character */
265         int     level = 0;              /* for matching '()' */
266
267         /*
268          * Find the end of the assumed function declaration.
269          * Note that ANSI C functions can have type definitions so keep
270          * track of the parentheses nesting level.
271          */
272         while (GETC(!=, EOF)) {
273                 switch (c) {
274                 case '\'':
275                 case '"':
276                         /* skip strings and character constants */
277                         skip_string(c);
278                         break;
279                 case '/':
280                         /* skip comments */
281                         if (GETC(==, '*') || c == '/')
282                                 skip_comment(c);
283                         break;
284                 case '(':
285                         level++;
286                         break;
287                 case ')':
288                         if (level == 0)
289                                 goto fnd;
290                         level--;
291                         break;
292                 case '\n':
293                         SETLINE;
294                 }
295         }
296         return (NO);
297 fnd:
298         /*
299          * we assume that the character after a function's right paren
300          * is a token character if it's a function and a non-token
301          * character if it's a declaration.  Comments don't count...
302          */
303         for (;;) {
304                 while (GETC(!=, EOF) && iswhite(c))
305                         if (c == '\n')
306                                 SETLINE;
307                 if (intoken(c) || c == '{')
308                         break;
309                 if (c == '/' && (GETC(==, '*') || c == '/'))
310                         skip_comment(c);
311                 else {                          /* don't ever "read" '/' */
312                         (void)ungetc(c, inf);
313                         return (NO);
314                 }
315         }
316         if (c != '{')
317                 (void)skip_key('{');
318         return (YES);
319 }
320
321 /*
322  * hash_entry --
323  *      handle a line starting with a '#'
324  */
325 static void
326 hash_entry(void)
327 {
328         int     c;                      /* character read */
329         int     curline;                /* line started on */
330         char    *sp;                    /* buffer pointer */
331         char    tok[MAXTOKEN];          /* storage buffer */
332
333         /* ignore leading whitespace */
334         while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
335                 ;
336         (void)ungetc(c, inf);
337
338         curline = lineno;
339         for (sp = tok;;) {              /* get next token */
340                 if (GETC(==, EOF))
341                         return;
342                 if (iswhite(c))
343                         break;
344                 if (sp == tok + sizeof tok - 1)
345                         /* Too long -- truncate it */
346                         *sp = EOS;
347                 else 
348                         *sp++ = c;
349         }
350         *sp = EOS;
351         if (memcmp(tok, "define", 6))   /* only interested in #define's */
352                 goto skip;
353         for (;;) {                      /* this doesn't handle "#define \n" */
354                 if (GETC(==, EOF))
355                         return;
356                 if (!iswhite(c))
357                         break;
358         }
359         for (sp = tok;;) {              /* get next token */
360                 if (sp == tok + sizeof tok - 1)
361                         /* Too long -- truncate it */
362                         *sp = EOS;
363                 else 
364                         *sp++ = c;
365                 if (GETC(==, EOF))
366                         return;
367                 /*
368                  * this is where it DOESN'T handle
369                  * "#define \n"
370                  */
371                 if (!intoken(c))
372                         break;
373         }
374         *sp = EOS;
375         if (dflag || c == '(') {        /* only want macros */
376                 getline();
377                 pfnote(tok, curline);
378         }
379 skip:   if (c == '\n') {                /* get rid of rest of define */
380                 SETLINE
381                 if (*(sp - 1) != '\\')
382                         return;
383         }
384         (void)skip_key('\n');
385 }
386
387 /*
388  * str_entry --
389  *      handle a struct, union or enum entry
390  */
391 static int
392 str_entry(int c) /* c is current character */
393 {
394         int     curline;                /* line started on */
395         char    *sp;                    /* buffer pointer */
396         char    tok[LINE_MAX];          /* storage buffer */
397
398         curline = lineno;
399         while (iswhite(c))
400                 if (GETC(==, EOF))
401                         return (NO);
402         if (c == '{')           /* it was "struct {" */
403                 return (YES);
404         for (sp = tok;;) {              /* get next token */
405                 if (sp == tok + sizeof tok - 1)
406                         /* Too long -- truncate it */
407                         *sp = EOS;
408                 else 
409                         *sp++ = c;
410                 if (GETC(==, EOF))
411                         return (NO);
412                 if (!intoken(c))
413                         break;
414         }
415         switch (c) {
416                 case '{':               /* it was "struct foo{" */
417                         --sp;
418                         break;
419                 case '\n':              /* it was "struct foo\n" */
420                         SETLINE;
421                         /*FALLTHROUGH*/
422                 default:                /* probably "struct foo " */
423                         while (GETC(!=, EOF))
424                                 if (!iswhite(c))
425                                         break;
426                         if (c != '{') {
427                                 (void)ungetc(c, inf);
428                                 return (NO);
429                         }
430         }
431         *sp = EOS;
432         pfnote(tok, curline);
433         return (YES);
434 }
435
436 /*
437  * skip_comment --
438  *      skip over comment
439  */
440 void
441 skip_comment(int t) /* t is comment character */
442 {
443         int     c;                      /* character read */
444         int     star;                   /* '*' flag */
445
446         for (star = 0; GETC(!=, EOF);)
447                 switch(c) {
448                 /* comments don't nest, nor can they be escaped. */
449                 case '*':
450                         star = YES;
451                         break;
452                 case '/':
453                         if (star && t == '*')
454                                 return;
455                         break;
456                 case '\n':
457                         if (t == '/')
458                                 return;
459                         SETLINE;
460                         /*FALLTHROUGH*/
461                 default:
462                         star = NO;
463                         break;
464                 }
465 }
466
467 /*
468  * skip_string --
469  *      skip to the end of a string or character constant.
470  */
471 void
472 skip_string(int key)
473 {
474         int     c,
475                 skip;
476
477         for (skip = NO; GETC(!=, EOF); )
478                 switch (c) {
479                 case '\\':              /* a backslash escapes anything */
480                         skip = !skip;   /* we toggle in case it's "\\" */
481                         break;
482                 case '\n':
483                         SETLINE;
484                         /*FALLTHROUGH*/
485                 default:
486                         if (c == key && !skip)
487                                 return;
488                         skip = NO;
489                 }
490 }
491
492 /*
493  * skip_key --
494  *      skip to next char "key"
495  */
496 int
497 skip_key(int key)
498 {
499         int     c,
500                 skip,
501                 retval;
502
503         for (skip = retval = NO; GETC(!=, EOF);)
504                 switch(c) {
505                 case '\\':              /* a backslash escapes anything */
506                         skip = !skip;   /* we toggle in case it's "\\" */
507                         break;
508                 case ';':               /* special case for yacc; if one */
509                 case '|':               /* of these chars occurs, we may */
510                         retval = YES;   /* have moved out of the rule */
511                         break;          /* not used by C */
512                 case '\'':
513                 case '"':
514                         /* skip strings and character constants */
515                         skip_string(c);
516                         break;
517                 case '/':
518                         /* skip comments */
519                         if (GETC(==, '*') || c == '/') {
520                                 skip_comment(c);
521                                 break;
522                         }
523                         (void)ungetc(c, inf);
524                         c = '/';
525                         goto norm;
526                 case '\n':
527                         SETLINE;
528                         /*FALLTHROUGH*/
529                 default:
530                 norm:
531                         if (c == key && !skip)
532                                 return (retval);
533                         skip = NO;
534                 }
535         return (retval);
536 }