Rename getline with get_line to avoid collision with getline(3).
[dragonfly.git] / usr.bin / ctags / C.c
1 /*
2  * Copyright (c) 1987, 1993, 1994
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * @(#)C.c      8.4 (Berkeley) 4/2/94
30  * $FreeBSD: head/usr.bin/ctags/C.c 299355 2016-05-10 11:11:23Z bapt $
31  */
32
33 #include <limits.h>
34 #include <stdio.h>
35 #include <string.h>
36
37 #include "ctags.h"
38
39 static int      func_entry(void);
40 static void     hash_entry(void);
41 static void     skip_string(int);
42 static int      str_entry(int);
43
44 /*
45  * c_entries --
46  *      read .c and .h files and call appropriate routines
47  */
48 void
49 c_entries(void)
50 {
51         int     c;                      /* current character */
52         int     level;                  /* brace level */
53         int     token;                  /* if reading a token */
54         int     t_def;                  /* if reading a typedef */
55         int     t_level;                /* typedef's brace level */
56         char    *sp;                    /* buffer pointer */
57         char    tok[MAXTOKEN];          /* token buffer */
58
59         lineftell = ftell(inf);
60         sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
61         while (GETC(!=, EOF)) {
62                 switch (c) {
63                 /*
64                  * Here's where it DOESN'T handle: {
65                  *      foo(a)
66                  *      {
67                  *      #ifdef notdef
68                  *              }
69                  *      #endif
70                  *              if (a)
71                  *                      puts("hello, world");
72                  *      }
73                  */
74                 case '{':
75                         ++level;
76                         goto endtok;
77                 case '}':
78                         /*
79                          * if level goes below zero, try and fix
80                          * it, even though we've already messed up
81                          */
82                         if (--level < 0)
83                                 level = 0;
84                         goto endtok;
85
86                 case '\n':
87                         SETLINE;
88                         /*
89                          * the above 3 cases are similar in that they
90                          * are special characters that also end tokens.
91                          */
92         endtok:                 if (sp > tok) {
93                                 *sp = EOS;
94                                 token = YES;
95                                 sp = tok;
96                         }
97                         else
98                                 token = NO;
99                         continue;
100
101                 /*
102                  * We ignore quoted strings and character constants
103                  * completely.
104                  */
105                 case '"':
106                 case '\'':
107                         skip_string(c);
108                         break;
109
110                 /*
111                  * comments can be fun; note the state is unchanged after
112                  * return, in case we found:
113                  *      "foo() XX comment XX { int bar; }"
114                  */
115                 case '/':
116                         if (GETC(==, '*') || c == '/') {
117                                 skip_comment(c);
118                                 continue;
119                         }
120                         ungetc(c, inf);
121                         c = '/';
122                         goto storec;
123
124                 /* hash marks flag #define's. */
125                 case '#':
126                         if (sp == tok) {
127                                 hash_entry();
128                                 break;
129                         }
130                         goto storec;
131
132                 /*
133                  * if we have a current token, parenthesis on
134                  * level zero indicates a function.
135                  */
136                 case '(':
137                         if (!level && token) {
138                                 int     curline;
139
140                                 if (sp != tok)
141                                         *sp = EOS;
142                                 /*
143                                  * grab the line immediately, we may
144                                  * already be wrong, for example,
145                                  *      foo\n
146                                  *      (arg1,
147                                  */
148                                 get_line();
149                                 curline = lineno;
150                                 if (func_entry()) {
151                                         ++level;
152                                         pfnote(tok, curline);
153                                 }
154                                 break;
155                         }
156                         goto storec;
157
158                 /*
159                  * semi-colons indicate the end of a typedef; if we find a
160                  * typedef we search for the next semi-colon of the same
161                  * level as the typedef.  Ignoring "structs", they are
162                  * tricky, since you can find:
163                  *
164                  *      "typedef long time_t;"
165                  *      "typedef unsigned int u_int;"
166                  *      "typedef unsigned int u_int [10];"
167                  *
168                  * If looking at a typedef, we save a copy of the last token
169                  * found.  Then, when we find the ';' we take the current
170                  * token if it starts with a valid token name, else we take
171                  * the one we saved.  There's probably some reasonable
172                  * alternative to this...
173                  */
174                 case ';':
175                         if (t_def && level == t_level) {
176                                 t_def = NO;
177                                 get_line();
178                                 if (sp != tok)
179                                         *sp = EOS;
180                                 pfnote(tok, lineno);
181                                 break;
182                         }
183                         goto storec;
184
185                 /*
186                  * store characters until one that can't be part of a token
187                  * comes along; check the current token against certain
188                  * reserved words.
189                  */
190                 default:
191                         /* ignore whitespace */
192                         if (c == ' ' || c == '\t') {
193                                 int save = c;
194                                 while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
195                                         ;
196                                 if (c == EOF)
197                                         return;
198                                 ungetc(c, inf);
199                                 c = save;
200                         }
201         storec:         if (!intoken(c)) {
202                                 if (sp == tok)
203                                         break;
204                                 *sp = EOS;
205                                 if (tflag) {
206                                         /* no typedefs inside typedefs */
207                                         if (!t_def &&
208                                                    !memcmp(tok, "typedef",8)) {
209                                                 t_def = YES;
210                                                 t_level = level;
211                                                 break;
212                                         }
213                                         /* catch "typedef struct" */
214                                         if ((!t_def || t_level < level)
215                                             && (!memcmp(tok, "struct", 7)
216                                             || !memcmp(tok, "union", 6)
217                                             || !memcmp(tok, "enum", 5))) {
218                                                 /*
219                                                  * get line immediately;
220                                                  * may change before '{'
221                                                  */
222                                                 get_line();
223                                                 if (str_entry(c))
224                                                         ++level;
225                                                 break;
226                                                 /* } */
227                                         }
228                                 }
229                                 sp = tok;
230                         }
231                         else if (sp != tok || begtoken(c)) {
232                                 if (sp == tok + sizeof tok - 1)
233                                         /* Too long -- truncate it */
234                                         *sp = EOS;
235                                 else 
236                                         *sp++ = c;
237                                 token = YES;
238                         }
239                         continue;
240                 }
241
242                 sp = tok;
243                 token = NO;
244         }
245 }
246
247 /*
248  * func_entry --
249  *      handle a function reference
250  */
251 static int
252 func_entry(void)
253 {
254         int     c;                      /* current character */
255         int     level = 0;              /* for matching '()' */
256
257         /*
258          * Find the end of the assumed function declaration.
259          * Note that ANSI C functions can have type definitions so keep
260          * track of the parentheses nesting level.
261          */
262         while (GETC(!=, EOF)) {
263                 switch (c) {
264                 case '\'':
265                 case '"':
266                         /* skip strings and character constants */
267                         skip_string(c);
268                         break;
269                 case '/':
270                         /* skip comments */
271                         if (GETC(==, '*') || c == '/')
272                                 skip_comment(c);
273                         break;
274                 case '(':
275                         level++;
276                         break;
277                 case ')':
278                         if (level == 0)
279                                 goto fnd;
280                         level--;
281                         break;
282                 case '\n':
283                         SETLINE;
284                 }
285         }
286         return (NO);
287 fnd:
288         /*
289          * we assume that the character after a function's right paren
290          * is a token character if it's a function and a non-token
291          * character if it's a declaration.  Comments don't count...
292          */
293         for (;;) {
294                 while (GETC(!=, EOF) && iswhite(c))
295                         if (c == '\n')
296                                 SETLINE;
297                 if (intoken(c) || c == '{')
298                         break;
299                 if (c == '/' && (GETC(==, '*') || c == '/'))
300                         skip_comment(c);
301                 else {                          /* don't ever "read" '/' */
302                         ungetc(c, inf);
303                         return (NO);
304                 }
305         }
306         if (c != '{')
307                 skip_key('{');
308         return (YES);
309 }
310
311 /*
312  * hash_entry --
313  *      handle a line starting with a '#'
314  */
315 static void
316 hash_entry(void)
317 {
318         int     c;                      /* character read */
319         int     curline;                /* line started on */
320         char    *sp;                    /* buffer pointer */
321         char    tok[MAXTOKEN];          /* storage buffer */
322
323         /* ignore leading whitespace */
324         while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
325                 ;
326         ungetc(c, inf);
327
328         curline = lineno;
329         for (sp = tok;;) {              /* get next token */
330                 if (GETC(==, EOF))
331                         return;
332                 if (iswhite(c))
333                         break;
334                 if (sp == tok + sizeof tok - 1)
335                         /* Too long -- truncate it */
336                         *sp = EOS;
337                 else 
338                         *sp++ = c;
339         }
340         *sp = EOS;
341         if (memcmp(tok, "define", 6))   /* only interested in #define's */
342                 goto skip;
343         for (;;) {                      /* this doesn't handle "#define \n" */
344                 if (GETC(==, EOF))
345                         return;
346                 if (!iswhite(c))
347                         break;
348         }
349         for (sp = tok;;) {              /* get next token */
350                 if (sp == tok + sizeof tok - 1)
351                         /* Too long -- truncate it */
352                         *sp = EOS;
353                 else 
354                         *sp++ = c;
355                 if (GETC(==, EOF))
356                         return;
357                 /*
358                  * this is where it DOESN'T handle
359                  * "#define \n"
360                  */
361                 if (!intoken(c))
362                         break;
363         }
364         *sp = EOS;
365         if (dflag || c == '(') {        /* only want macros */
366                 get_line();
367                 pfnote(tok, curline);
368         }
369 skip:   if (c == '\n') {                /* get rid of rest of define */
370                 SETLINE
371                 if (*(sp - 1) != '\\')
372                         return;
373         }
374         skip_key('\n');
375 }
376
377 /*
378  * str_entry --
379  *      handle a struct, union or enum entry
380  */
381 static int
382 str_entry(int c) /* c is current character */
383 {
384         int     curline;                /* line started on */
385         char    *sp;                    /* buffer pointer */
386         char    tok[LINE_MAX];          /* storage buffer */
387
388         curline = lineno;
389         while (iswhite(c))
390                 if (GETC(==, EOF))
391                         return (NO);
392         if (c == '{')           /* it was "struct {" */
393                 return (YES);
394         for (sp = tok;;) {              /* get next token */
395                 if (sp == tok + sizeof tok - 1)
396                         /* Too long -- truncate it */
397                         *sp = EOS;
398                 else 
399                         *sp++ = c;
400                 if (GETC(==, EOF))
401                         return (NO);
402                 if (!intoken(c))
403                         break;
404         }
405         switch (c) {
406                 case '{':               /* it was "struct foo{" */
407                         --sp;
408                         break;
409                 case '\n':              /* it was "struct foo\n" */
410                         SETLINE;
411                         /*FALLTHROUGH*/
412                 default:                /* probably "struct foo " */
413                         while (GETC(!=, EOF))
414                                 if (!iswhite(c))
415                                         break;
416                         if (c != '{') {
417                                 ungetc(c, inf);
418                                 return (NO);
419                         }
420         }
421         *sp = EOS;
422         pfnote(tok, curline);
423         return (YES);
424 }
425
426 /*
427  * skip_comment --
428  *      skip over comment
429  */
430 void
431 skip_comment(int t) /* t is comment character */
432 {
433         int     c;                      /* character read */
434         int     star;                   /* '*' flag */
435
436         for (star = 0; GETC(!=, EOF);)
437                 switch(c) {
438                 /* comments don't nest, nor can they be escaped. */
439                 case '*':
440                         star = YES;
441                         break;
442                 case '/':
443                         if (star && t == '*')
444                                 return;
445                         break;
446                 case '\n':
447                         if (t == '/')
448                                 return;
449                         SETLINE;
450                         /*FALLTHROUGH*/
451                 default:
452                         star = NO;
453                         break;
454                 }
455 }
456
457 /*
458  * skip_string --
459  *      skip to the end of a string or character constant.
460  */
461 void
462 skip_string(int key)
463 {
464         int     c,
465                 skip;
466
467         for (skip = NO; GETC(!=, EOF); )
468                 switch (c) {
469                 case '\\':              /* a backslash escapes anything */
470                         skip = !skip;   /* we toggle in case it's "\\" */
471                         break;
472                 case '\n':
473                         SETLINE;
474                         /*FALLTHROUGH*/
475                 default:
476                         if (c == key && !skip)
477                                 return;
478                         skip = NO;
479                 }
480 }
481
482 /*
483  * skip_key --
484  *      skip to next char "key"
485  */
486 int
487 skip_key(int key)
488 {
489         int     c,
490                 skip,
491                 retval;
492
493         for (skip = retval = NO; GETC(!=, EOF);)
494                 switch(c) {
495                 case '\\':              /* a backslash escapes anything */
496                         skip = !skip;   /* we toggle in case it's "\\" */
497                         break;
498                 case ';':               /* special case for yacc; if one */
499                 case '|':               /* of these chars occurs, we may */
500                         retval = YES;   /* have moved out of the rule */
501                         break;          /* not used by C */
502                 case '\'':
503                 case '"':
504                         /* skip strings and character constants */
505                         skip_string(c);
506                         break;
507                 case '/':
508                         /* skip comments */
509                         if (GETC(==, '*') || c == '/') {
510                                 skip_comment(c);
511                                 break;
512                         }
513                         ungetc(c, inf);
514                         c = '/';
515                         goto norm;
516                 case '\n':
517                         SETLINE;
518                         /*FALLTHROUGH*/
519                 default:
520                 norm:
521                         if (c == key && !skip)
522                                 return (retval);
523                         skip = NO;
524                 }
525         return (retval);
526 }