Style(9):
[dragonfly.git] / usr.bin / checknr / checknr.c
1 /*
2  * Copyright (c) 1980, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by the University of
16  *      California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * @(#) Copyright (c) 1980, 1993 The Regents of the University of California.  All rights reserved.
34  * @(#)checknr.c        8.1 (Berkeley) 6/6/93
35  *
36  * $DragonFly: src/usr.bin/checknr/checknr.c,v 1.6 2005/03/01 22:32:14 cpressey Exp $
37  */
38
39 /*
40  * checknr: check an nroff/troff input file for matching macro calls.
41  * we also attempt to match size and font changes, but only the embedded
42  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
43  * later but for now think of these restrictions as contributions to
44  * structured typesetting.
45  */
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <ctype.h>
50
51 #define MAXSTK  100     /* Stack size */
52 #define MAXBR   100     /* Max number of bracket pairs known */
53 #define MAXCMDS 500     /* Max number of commands known */
54
55 static void     addcmd(char *);
56 static void     addmac(const char *);
57 static int      binsrch(const char *);
58 static void     checkknown(const char *);
59 static void     chkcmd(const char *);
60 static void     complain(int);
61 static int      eq(const char *, const char *);
62 static void     nomatch(const char *);
63 static void     pe(int);
64 static void     process(FILE *);
65 static void     prop(int);
66 static void     usage(void);
67
68 /*
69  * The stack on which we remember what we've seen so far.
70  */
71 struct stkstr {
72         int opno;       /* number of opening bracket */
73         int pl;         /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
74         int parm;       /* parm to size, font, etc */
75         int lno;        /* line number the thing came in in */
76 } stk[MAXSTK];
77 int stktop;
78
79 /*
80  * The kinds of opening and closing brackets.
81  */
82 struct brstr {
83         char *opbr;
84         char *clbr;
85 } br[MAXBR] = {
86         /* A few bare bones troff commands */
87 #define SZ      0
88         {"sz",  "sz"},  /* also \s */
89 #define FT      1
90         {"ft",  "ft"},  /* also \f */
91         /* the -mm package */
92         {"AL",  "LE"},
93         {"AS",  "AE"},
94         {"BL",  "LE"},
95         {"BS",  "BE"},
96         {"DF",  "DE"},
97         {"DL",  "LE"},
98         {"DS",  "DE"},
99         {"FS",  "FE"},
100         {"ML",  "LE"},
101         {"NS",  "NE"},
102         {"RL",  "LE"},
103         {"VL",  "LE"},
104         /* the -ms package */
105         {"AB",  "AE"},
106         {"BD",  "DE"},
107         {"CD",  "DE"},
108         {"DS",  "DE"},
109         {"FS",  "FE"},
110         {"ID",  "DE"},
111         {"KF",  "KE"},
112         {"KS",  "KE"},
113         {"LD",  "DE"},
114         {"LG",  "NL"},
115         {"QS",  "QE"},
116         {"RS",  "RE"},
117         {"SM",  "NL"},
118         {"XA",  "XE"},
119         {"XS",  "XE"},
120         /* The -me package */
121         {"(b",  ")b"},
122         {"(c",  ")c"},
123         {"(d",  ")d"},
124         {"(f",  ")f"},
125         {"(l",  ")l"},
126         {"(q",  ")q"},
127         {"(x",  ")x"},
128         {"(z",  ")z"},
129         /* Things needed by preprocessors */
130         {"EQ",  "EN"},
131         {"TS",  "TE"},
132         /* Refer */
133         {"[",   "]"},
134         {0,     0}
135 };
136
137 /*
138  * All commands known to nroff, plus macro packages.
139  * Used so we can complain about unrecognized commands.
140  */
141 char *knowncmds[MAXCMDS] = {
142 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
143 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
144 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
145 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
146 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
147 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
148 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
149 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
150 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
151 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
152 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
153 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
154 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
155 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
156 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
157 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
158 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
159 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
160 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
161 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
162 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
163 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
164 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
165 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
166 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
167 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
168 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
169 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
170 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
171 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
172 "yr", 0
173 };
174
175 int     lineno;         /* current line number in input file */
176 char    line[256];      /* the current line */
177 char    *cfilename;     /* name of current file */
178 int     nfiles;         /* number of files to process */
179 int     fflag;          /* -f: ignore \f */
180 int     sflag;          /* -s: ignore \s */
181 int     ncmds;          /* size of knowncmds */
182 int     slot;           /* slot in knowncmds found by binsrch */
183
184 int
185 main(int argc, char **argv)
186 {
187         FILE *f;
188         int i;
189         char *cp;
190         char b1[4];
191
192         /* Figure out how many known commands there are */
193         while (knowncmds[ncmds])
194                 ncmds++;
195         while (argc > 1 && argv[1][0] == '-') {
196                 switch(argv[1][1]) {
197
198                 /* -a: add pairs of macros */
199                 case 'a':
200                         i = strlen(argv[1]) - 2;
201                         if (i % 6 != 0)
202                                 usage();
203                         /* look for empty macro slots */
204                         for (i = 0; br[i].opbr; i++)
205                                 ;
206                         for (cp = argv[1] + 3; cp[-1]; cp += 6) {
207                                 br[i].opbr = malloc(3);
208                                 strncpy(br[i].opbr, cp, 2);
209                                 br[i].clbr = malloc(3);
210                                 strncpy(br[i].clbr, cp + 3, 2);
211                                 /*
212                                  * known pairs are also known cmds
213                                  */
214                                 addmac(br[i].opbr);
215                                 addmac(br[i].clbr);
216                                 i++;
217                         }
218                         break;
219
220                 /* -c: add known commands */
221                 case 'c':
222                         i = strlen(argv[1]) - 2;
223                         if (i % 3 != 0)
224                                 usage();
225                         for (cp = argv[1] + 3; cp[-1]; cp += 3) {
226                                 if (cp[2] && cp[2] != '.')
227                                         usage();
228                                 strncpy(b1, cp, 2);
229                                 b1[2] = '\0';
230                                 addmac(b1);
231                         }
232                         break;
233
234                 /* -f: ignore font changes */
235                 case 'f':
236                         fflag = 1;
237                         break;
238
239                 /* -s: ignore size changes */
240                 case 's':
241                         sflag = 1;
242                         break;
243                 default:
244                         usage();
245                 }
246                 argc--; argv++;
247         }
248
249         nfiles = argc - 1;
250
251         if (nfiles > 0) {
252                 for (i = 1; i < argc; i++) {
253                         cfilename = argv[i];
254                         f = fopen(cfilename, "r");
255                         if (f == NULL)
256                                 perror(cfilename);
257                         else
258                                 process(f);
259                 }
260         } else {
261                 cfilename = "stdin";
262                 process(stdin);
263         }
264         exit(0);
265 }
266
267 static void
268 usage(void)
269 {
270         fprintf(stderr,
271             "usage: checknr [-sf] [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] "
272             "file\n");
273         exit(1);
274 }
275
276 static void
277 process(FILE *f)
278 {
279         int i, n;
280         char mac[5];    /* The current macro or nroff command */
281         int pl;
282
283         stktop = -1;
284         for (lineno = 1; fgets(line, sizeof(line), f); lineno++) {
285                 if (line[0] == '.') {
286                         /*
287                          * find and isolate the macro/command name.
288                          */
289                         strncpy(mac, line + 1, 4);
290                         if (isspace(mac[0])) {
291                                 pe(lineno);
292                                 printf("Empty command\n");
293                         } else if (isspace(mac[1])) {
294                                 mac[1] = 0;
295                         } else if (isspace(mac[2])) {
296                                 mac[2] = 0;
297                         } else if (mac[0] != '\\' || mac[1] != '\"') {
298                                 pe(lineno);
299                                 printf("Command too long\n");
300                         }
301
302                         /*
303                          * Is it a known command?
304                          */
305                         checkknown(mac);
306
307                         /*
308                          * Should we add it?
309                          */
310                         if (eq(mac, "de"))
311                                 addcmd(line);
312
313                         chkcmd(mac);
314                 }
315
316                 /*
317                  * At this point we process the line looking
318                  * for \s and \f.
319                  */
320                 for (i = 0; line[i]; i++) {
321                         if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
322                                 if (!sflag && line[++i] == 's') {
323                                         pl = line[++i];
324                                         if (isdigit(pl)) {
325                                                 n = pl - '0';
326                                                 pl = ' ';
327                                         } else
328                                                 n = 0;
329                                         while (isdigit(line[++i]))
330                                                 n = 10 * n + line[i] - '0';
331                                         i--;
332                                         if (n == 0) {
333                                                 if (stk[stktop].opno == SZ) {
334                                                         stktop--;
335                                                 } else {
336                                                         pe(lineno);
337                                                         printf("unmatched \\s0\n");
338                                                 }
339                                         } else {
340                                                 stk[++stktop].opno = SZ;
341                                                 stk[stktop].pl = pl;
342                                                 stk[stktop].parm = n;
343                                                 stk[stktop].lno = lineno;
344                                         }
345                                 } else if (!fflag && line[i] == 'f') {
346                                         n = line[++i];
347                                         if (n == 'P') {
348                                                 if (stk[stktop].opno == FT) {
349                                                         stktop--;
350                                                 } else {
351                                                         pe(lineno);
352                                                         printf("unmatched \\fP\n");
353                                                 }
354                                         } else {
355                                                 stk[++stktop].opno = FT;
356                                                 stk[stktop].pl = 1;
357                                                 stk[stktop].parm = n;
358                                                 stk[stktop].lno = lineno;
359                                         }
360                                 }
361                         }
362                 }
363         }
364         /*
365          * We've hit the end and look at all this stuff that hasn't been
366          * matched yet!  Complain, complain.
367          */
368         for (i = stktop; i >= 0; i--) {
369                 complain(i);
370         }
371 }
372
373 static void
374 complain(int i)
375 {
376         pe(stk[i].lno);
377         printf("Unmatched ");
378         prop(i);
379         printf("\n");
380 }
381
382 static void
383 prop(int i)
384 {
385         if (stk[i].pl == 0)
386                 printf(".%s", br[stk[i].opno].opbr);
387         else switch(stk[i].opno) {
388         case SZ:
389                 printf("\\s%c%d", stk[i].pl, stk[i].parm);
390                 break;
391         case FT:
392                 printf("\\f%c", stk[i].parm);
393                 break;
394         default:
395                 printf("Bug: stk[%d].opno = %d = .%s, .%s",
396                         i, stk[i].opno, br[stk[i].opno].opbr,
397                         br[stk[i].opno].clbr);
398         }
399 }
400
401 static void
402 chkcmd(const char *mac)
403 {
404         int i;
405
406         /*
407          * Check to see if it matches top of stack.
408          */
409         if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
410                 stktop--;       /* OK. Pop & forget */
411         else {
412                 /* No. Maybe it's an opener */
413                 for (i = 0; br[i].opbr; i++) {
414                         if (eq(mac, br[i].opbr)) {
415                                 /* Found. Push it. */
416                                 stktop++;
417                                 stk[stktop].opno = i;
418                                 stk[stktop].pl = 0;
419                                 stk[stktop].parm = 0;
420                                 stk[stktop].lno = lineno;
421                                 break;
422                         }
423                         /*
424                          * Maybe it's an unmatched closer.
425                          * NOTE: this depends on the fact
426                          * that none of the closers can be
427                          * openers too.
428                          */
429                         if (eq(mac, br[i].clbr)) {
430                                 nomatch(mac);
431                                 break;
432                         }
433                 }
434         }
435 }
436
437 static void
438 nomatch(const char *mac)
439 {
440         int i, j;
441
442         /*
443          * Look for a match further down on stack
444          * If we find one, it suggests that the stuff in
445          * between is supposed to match itself.
446          */
447         for (j = stktop; j >= 0; j--) {
448                 if (eq(mac, br[stk[j].opno].clbr)) {
449                         /* Found.  Make a good diagnostic. */
450                         if (j == stktop - 2) {
451                                 /*
452                                  * Check for special case \fx..\fR and don't
453                                  * complain.
454                                  */
455                                 if (stk[j + 1].opno == FT &&
456                                     stk[j + 1].parm != 'R' &&
457                                     stk[j + 2].opno == FT &&
458                                     stk[j + 2].parm == 'R') {
459                                         stktop = j - 1;
460                                         return;
461                                 }
462                                 /*
463                                  * We have two unmatched frobs.  Chances are
464                                  * they were intended to match, so we mention
465                                  * them together.
466                                  */
467                                 pe(stk[j + 1].lno);
468                                 prop(j + 1);
469                                 printf(" does not match %d: ", stk[j + 2].lno);
470                                 prop(j + 2);
471                                 printf("\n");
472                         } else {
473                                 for (i = j + 1; i <= stktop; i++) {
474                                         complain(i);
475                                 }
476                         }
477                         stktop = j - 1;
478                         return;
479                 }
480         }
481         /* Didn't find one.  Throw this away. */
482         pe(lineno);
483         printf("Unmatched .%s\n", mac);
484 }
485
486 /* eq: are two strings equal? */
487 static int
488 eq(const char *s1, const char *s2)
489 {
490         return (strcmp(s1, s2) == 0);
491 }
492
493 /* print the first part of an error message, given the line number */
494 static void
495 pe(int mylineno)
496 {
497         if (nfiles > 1)
498                 printf("%s: ", cfilename);
499         printf("%d: ", mylineno);
500 }
501
502 static void
503 checkknown(const char *mac)
504 {
505         if (eq(mac, "."))
506                 return;
507         if (binsrch(mac) >= 0)
508                 return;
509         if (mac[0] == '\\' && mac[1] == '"')    /* comments */
510                 return;
511
512         pe(lineno);
513         printf("Unknown command: .%s\n", mac);
514 }
515
516 /*
517  * We have a .de xx line in "line".  Add xx to the list of known commands.
518  */
519 static void
520 addcmd(char *myline)
521 {
522         char *mac;
523
524         /* grab the macro being defined */
525         mac = myline + 4;
526         while (isspace(*mac))
527                 mac++;
528         if (*mac == 0) {
529                 pe(lineno);
530                 printf("illegal define: %s\n", myline);
531                 return;
532         }
533         mac[2] = 0;
534         if (isspace(mac[1]) || mac[1] == '\\')
535                 mac[1] = 0;
536         if (ncmds >= MAXCMDS) {
537                 printf("Only %d known commands allowed\n", MAXCMDS);
538                 exit(1);
539         }
540         addmac(mac);
541 }
542
543 /*
544  * Add mac to the list.  We should really have some kind of tree
545  * structure here but this is a quick-and-dirty job and I just don't
546  * have time to mess with it.  (I wonder if this will come back to haunt
547  * me someday?)  Anyway, I claim that .de is fairly rare in user
548  * nroff programs, and the loop below is pretty fast.
549  */
550 static void
551 addmac(const char *mac)
552 {
553         char **src, **dest, **loc;
554
555         if (binsrch(mac) >= 0) {        /* it's OK to redefine something */
556 #ifdef DEBUG
557                 printf("binsrch(%s) -> already in table\n", mac);
558 #endif DEBUG
559                 return;
560         }
561         /* binsrch sets slot as a side effect */
562 #ifdef DEBUG
563         printf("binsrch(%s) -> %d\n", mac, slot);
564 #endif
565         loc = &knowncmds[slot];
566         src = &knowncmds[ncmds - 1];
567         dest = src + 1;
568         while (dest > loc)
569                 *dest-- = *src--;
570         *loc = malloc(3);
571         strcpy(*loc, mac);
572         ncmds++;
573 #ifdef DEBUG
574         printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2],
575                 knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1],
576                 knowncmds[slot+2], ncmds);
577 #endif
578 }
579
580 /*
581  * Do a binary search in knowncmds for mac.
582  * If found, return the index.  If not, return -1.
583  */
584 static int
585 binsrch(const char *mac)
586 {
587         const char *p;  /* pointer to current cmd in list */
588         int d;          /* difference if any */
589         int mid;        /* mid point in binary search */
590         int top, bot;   /* boundaries of bin search, inclusive */
591
592         top = ncmds - 1;
593         bot = 0;
594         while (top >= bot) {
595                 mid = (top + bot) / 2;
596                 p = knowncmds[mid];
597                 d = p[0] - mac[0];
598                 if (d == 0)
599                         d = p[1] - mac[1];
600                 if (d == 0)
601                         return mid;
602                 if (d < 0)
603                         bot = mid + 1;
604                 else
605                         top = mid - 1;
606         }
607         slot = bot;     /* place it would have gone */
608         return -1;
609 }