c5f0bb80fcc36ce1107d6ca685ef7273ae6ac67b
[dragonfly.git] / usr.bin / ctags / C.c
1 /*
2  * Copyright (c) 1987, 1993, 1994
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by the University of
16  *      California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * @(#)C.c      8.4 (Berkeley) 4/2/94
34  * $FreeBSD: src/usr.bin/ctags/C.c,v 1.3.2.2 2002/07/30 00:55:07 tjr Exp $
35  * $DragonFly: src/usr.bin/ctags/C.c,v 1.2 2003/06/17 04:29:25 dillon Exp $
36  */
37
38 #include <limits.h>
39 #include <stdio.h>
40 #include <string.h>
41
42 #include "ctags.h"
43
44 static int      func_entry(void);
45 static void     hash_entry(void);
46 static void     skip_string(int);
47 static int      str_entry(int);
48
49 /*
50  * c_entries --
51  *      read .c and .h files and call appropriate routines
52  */
53 void
54 c_entries(void)
55 {
56         int     c;                      /* current character */
57         int     level;                  /* brace level */
58         int     token;                  /* if reading a token */
59         int     t_def;                  /* if reading a typedef */
60         int     t_level;                /* typedef's brace level */
61         char    *sp;                    /* buffer pointer */
62         char    tok[MAXTOKEN];          /* token buffer */
63
64         lineftell = ftell(inf);
65         sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
66         while (GETC(!=, EOF)) {
67                 switch (c) {
68                 /*
69                  * Here's where it DOESN'T handle: {
70                  *      foo(a)
71                  *      {
72                  *      #ifdef notdef
73                  *              }
74                  *      #endif
75                  *              if (a)
76                  *                      puts("hello, world");
77                  *      }
78                  */
79                 case '{':
80                         ++level;
81                         goto endtok;
82                 case '}':
83                         /*
84                          * if level goes below zero, try and fix
85                          * it, even though we've already messed up
86                          */
87                         if (--level < 0)
88                                 level = 0;
89                         goto endtok;
90
91                 case '\n':
92                         SETLINE;
93                         /*
94                          * the above 3 cases are similar in that they
95                          * are special characters that also end tokens.
96                          */
97         endtok:                 if (sp > tok) {
98                                 *sp = EOS;
99                                 token = YES;
100                                 sp = tok;
101                         }
102                         else
103                                 token = NO;
104                         continue;
105
106                 /*
107                  * We ignore quoted strings and character constants
108                  * completely.
109                  */
110                 case '"':
111                 case '\'':
112                         (void)skip_string(c);
113                         break;
114
115                 /*
116                  * comments can be fun; note the state is unchanged after
117                  * return, in case we found:
118                  *      "foo() XX comment XX { int bar; }"
119                  */
120                 case '/':
121                         if (GETC(==, '*') || c == '/') {
122                                 skip_comment(c);
123                                 continue;
124                         }
125                         (void)ungetc(c, inf);
126                         c = '/';
127                         goto storec;
128
129                 /* hash marks flag #define's. */
130                 case '#':
131                         if (sp == tok) {
132                                 hash_entry();
133                                 break;
134                         }
135                         goto storec;
136
137                 /*
138                  * if we have a current token, parenthesis on
139                  * level zero indicates a function.
140                  */
141                 case '(':
142                         if (!level && token) {
143                                 int     curline;
144
145                                 if (sp != tok)
146                                         *sp = EOS;
147                                 /*
148                                  * grab the line immediately, we may
149                                  * already be wrong, for example,
150                                  *      foo\n
151                                  *      (arg1,
152                                  */
153                                 getline();
154                                 curline = lineno;
155                                 if (func_entry()) {
156                                         ++level;
157                                         pfnote(tok, curline);
158                                 }
159                                 break;
160                         }
161                         goto storec;
162
163                 /*
164                  * semi-colons indicate the end of a typedef; if we find a
165                  * typedef we search for the next semi-colon of the same
166                  * level as the typedef.  Ignoring "structs", they are
167                  * tricky, since you can find:
168                  *
169                  *      "typedef long time_t;"
170                  *      "typedef unsigned int u_int;"
171                  *      "typedef unsigned int u_int [10];"
172                  *
173                  * If looking at a typedef, we save a copy of the last token
174                  * found.  Then, when we find the ';' we take the current
175                  * token if it starts with a valid token name, else we take
176                  * the one we saved.  There's probably some reasonable
177                  * alternative to this...
178                  */
179                 case ';':
180                         if (t_def && level == t_level) {
181                                 t_def = NO;
182                                 getline();
183                                 if (sp != tok)
184                                         *sp = EOS;
185                                 pfnote(tok, lineno);
186                                 break;
187                         }
188                         goto storec;
189
190                 /*
191                  * store characters until one that can't be part of a token
192                  * comes along; check the current token against certain
193                  * reserved words.
194                  */
195                 default:
196                         /* ignore whitespace */
197                         if (c == ' ' || c == '\t') {
198                                 int save = c;
199                                 while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
200                                         ;
201                                 if (c == EOF)
202                                         return;
203                                 (void)ungetc(c, inf);
204                                 c = save;
205                         }
206         storec:         if (!intoken(c)) {
207                                 if (sp == tok)
208                                         break;
209                                 *sp = EOS;
210                                 if (tflag) {
211                                         /* no typedefs inside typedefs */
212                                         if (!t_def &&
213                                                    !memcmp(tok, "typedef",8)) {
214                                                 t_def = YES;
215                                                 t_level = level;
216                                                 break;
217                                         }
218                                         /* catch "typedef struct" */
219                                         if ((!t_def || t_level < level)
220                                             && (!memcmp(tok, "struct", 7)
221                                             || !memcmp(tok, "union", 6)
222                                             || !memcmp(tok, "enum", 5))) {
223                                                 /*
224                                                  * get line immediately;
225                                                  * may change before '{'
226                                                  */
227                                                 getline();
228                                                 if (str_entry(c))
229                                                         ++level;
230                                                 break;
231                                                 /* } */
232                                         }
233                                 }
234                                 sp = tok;
235                         }
236                         else if (sp != tok || begtoken(c)) {
237                                 if (sp == tok + sizeof tok - 1)
238                                         /* Too long -- truncate it */
239                                         *sp = EOS;
240                                 else 
241                                         *sp++ = c;
242                                 token = YES;
243                         }
244                         continue;
245                 }
246
247                 sp = tok;
248                 token = NO;
249         }
250 }
251
252 /*
253  * func_entry --
254  *      handle a function reference
255  */
256 static int
257 func_entry(void)
258 {
259         int     c;                      /* current character */
260         int     level = 0;              /* for matching '()' */
261
262         /*
263          * Find the end of the assumed function declaration.
264          * Note that ANSI C functions can have type definitions so keep
265          * track of the parentheses nesting level.
266          */
267         while (GETC(!=, EOF)) {
268                 switch (c) {
269                 case '\'':
270                 case '"':
271                         /* skip strings and character constants */
272                         skip_string(c);
273                         break;
274                 case '/':
275                         /* skip comments */
276                         if (GETC(==, '*') || c == '/')
277                                 skip_comment(c);
278                         break;
279                 case '(':
280                         level++;
281                         break;
282                 case ')':
283                         if (level == 0)
284                                 goto fnd;
285                         level--;
286                         break;
287                 case '\n':
288                         SETLINE;
289                 }
290         }
291         return (NO);
292 fnd:
293         /*
294          * we assume that the character after a function's right paren
295          * is a token character if it's a function and a non-token
296          * character if it's a declaration.  Comments don't count...
297          */
298         for (;;) {
299                 while (GETC(!=, EOF) && iswhite(c))
300                         if (c == '\n')
301                                 SETLINE;
302                 if (intoken(c) || c == '{')
303                         break;
304                 if (c == '/' && (GETC(==, '*') || c == '/'))
305                         skip_comment(c);
306                 else {                          /* don't ever "read" '/' */
307                         (void)ungetc(c, inf);
308                         return (NO);
309                 }
310         }
311         if (c != '{')
312                 (void)skip_key('{');
313         return (YES);
314 }
315
316 /*
317  * hash_entry --
318  *      handle a line starting with a '#'
319  */
320 static void
321 hash_entry(void)
322 {
323         int     c;                      /* character read */
324         int     curline;                /* line started on */
325         char    *sp;                    /* buffer pointer */
326         char    tok[MAXTOKEN];          /* storage buffer */
327
328         /* ignore leading whitespace */
329         while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
330                 ;
331         (void)ungetc(c, inf);
332
333         curline = lineno;
334         for (sp = tok;;) {              /* get next token */
335                 if (GETC(==, EOF))
336                         return;
337                 if (iswhite(c))
338                         break;
339                 if (sp == tok + sizeof tok - 1)
340                         /* Too long -- truncate it */
341                         *sp = EOS;
342                 else 
343                         *sp++ = c;
344         }
345         *sp = EOS;
346         if (memcmp(tok, "define", 6))   /* only interested in #define's */
347                 goto skip;
348         for (;;) {                      /* this doesn't handle "#define \n" */
349                 if (GETC(==, EOF))
350                         return;
351                 if (!iswhite(c))
352                         break;
353         }
354         for (sp = tok;;) {              /* get next token */
355                 if (sp == tok + sizeof tok - 1)
356                         /* Too long -- truncate it */
357                         *sp = EOS;
358                 else 
359                         *sp++ = c;
360                 if (GETC(==, EOF))
361                         return;
362                 /*
363                  * this is where it DOESN'T handle
364                  * "#define \n"
365                  */
366                 if (!intoken(c))
367                         break;
368         }
369         *sp = EOS;
370         if (dflag || c == '(') {        /* only want macros */
371                 getline();
372                 pfnote(tok, curline);
373         }
374 skip:   if (c == '\n') {                /* get rid of rest of define */
375                 SETLINE
376                 if (*(sp - 1) != '\\')
377                         return;
378         }
379         (void)skip_key('\n');
380 }
381
382 /*
383  * str_entry --
384  *      handle a struct, union or enum entry
385  */
386 static int
387 str_entry(int c) /* c is current character */
388 {
389         int     curline;                /* line started on */
390         char    *sp;                    /* buffer pointer */
391         char    tok[LINE_MAX];          /* storage buffer */
392
393         curline = lineno;
394         while (iswhite(c))
395                 if (GETC(==, EOF))
396                         return (NO);
397         if (c == '{')           /* it was "struct {" */
398                 return (YES);
399         for (sp = tok;;) {              /* get next token */
400                 if (sp == tok + sizeof tok - 1)
401                         /* Too long -- truncate it */
402                         *sp = EOS;
403                 else 
404                         *sp++ = c;
405                 if (GETC(==, EOF))
406                         return (NO);
407                 if (!intoken(c))
408                         break;
409         }
410         switch (c) {
411                 case '{':               /* it was "struct foo{" */
412                         --sp;
413                         break;
414                 case '\n':              /* it was "struct foo\n" */
415                         SETLINE;
416                         /*FALLTHROUGH*/
417                 default:                /* probably "struct foo " */
418                         while (GETC(!=, EOF))
419                                 if (!iswhite(c))
420                                         break;
421                         if (c != '{') {
422                                 (void)ungetc(c, inf);
423                                 return (NO);
424                         }
425         }
426         *sp = EOS;
427         pfnote(tok, curline);
428         return (YES);
429 }
430
431 /*
432  * skip_comment --
433  *      skip over comment
434  */
435 void
436 skip_comment(int t) /* t is comment character */
437 {
438         int     c;                      /* character read */
439         int     star;                   /* '*' flag */
440
441         for (star = 0; GETC(!=, EOF);)
442                 switch(c) {
443                 /* comments don't nest, nor can they be escaped. */
444                 case '*':
445                         star = YES;
446                         break;
447                 case '/':
448                         if (star && t == '*')
449                                 return;
450                         break;
451                 case '\n':
452                         if (t == '/')
453                                 return;
454                         SETLINE;
455                         /*FALLTHROUGH*/
456                 default:
457                         star = NO;
458                         break;
459                 }
460 }
461
462 /*
463  * skip_string --
464  *      skip to the end of a string or character constant.
465  */
466 void
467 skip_string(int key)
468 {
469         int     c,
470                 skip;
471
472         for (skip = NO; GETC(!=, EOF); )
473                 switch (c) {
474                 case '\\':              /* a backslash escapes anything */
475                         skip = !skip;   /* we toggle in case it's "\\" */
476                         break;
477                 case '\n':
478                         SETLINE;
479                         /*FALLTHROUGH*/
480                 default:
481                         if (c == key && !skip)
482                                 return;
483                         skip = NO;
484                 }
485 }
486
487 /*
488  * skip_key --
489  *      skip to next char "key"
490  */
491 int
492 skip_key(int key)
493 {
494         int     c,
495                 skip,
496                 retval;
497
498         for (skip = retval = NO; GETC(!=, EOF);)
499                 switch(c) {
500                 case '\\':              /* a backslash escapes anything */
501                         skip = !skip;   /* we toggle in case it's "\\" */
502                         break;
503                 case ';':               /* special case for yacc; if one */
504                 case '|':               /* of these chars occurs, we may */
505                         retval = YES;   /* have moved out of the rule */
506                         break;          /* not used by C */
507                 case '\'':
508                 case '"':
509                         /* skip strings and character constants */
510                         skip_string(c);
511                         break;
512                 case '/':
513                         /* skip comments */
514                         if (GETC(==, '*') || c == '/') {
515                                 skip_comment(c);
516                                 break;
517                         }
518                         (void)ungetc(c, inf);
519                         c = '/';
520                         goto norm;
521                 case '\n':
522                         SETLINE;
523                         /*FALLTHROUGH*/
524                 default:
525                 norm:
526                         if (c == key && !skip)
527                                 return (retval);
528                         skip = NO;
529                 }
530         return (retval);
531 }