This should have been part of the previous commit messsage to var.c
[dragonfly.git] / usr.bin / checknr / checknr.c
1 /*
2  * Copyright (c) 1980, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by the University of
16  *      California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * @(#) Copyright (c) 1980, 1993 The Regents of the University of California.  All rights reserved.
34  * @(#)checknr.c        8.1 (Berkeley) 6/6/93
35  *
36  * $DragonFly: src/usr.bin/checknr/checknr.c,v 1.7 2005/03/01 22:50:20 cpressey Exp $
37  */
38
39 /*
40  * checknr: check an nroff/troff input file for matching macro calls.
41  * we also attempt to match size and font changes, but only the embedded
42  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
43  * later but for now think of these restrictions as contributions to
44  * structured typesetting.
45  */
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <ctype.h>
50
51 #define MAXSTK  100     /* Stack size */
52 #define MAXBR   100     /* Max number of bracket pairs known */
53 #define MAXCMDS 500     /* Max number of commands known */
54
55 static void     addcmd(char *);
56 static void     addmac(const char *);
57 static int      binsrch(const char *, int *);
58 static void     checkknown(const char *);
59 static void     chkcmd(const char *);
60 static void     complain(int);
61 static int      eq(const char *, const char *);
62 static void     nomatch(const char *);
63 static void     pe(int);
64 static void     process(FILE *);
65 static void     prop(int);
66 static void     usage(void);
67
68 /*
69  * The stack on which we remember what we've seen so far.
70  */
71 struct stkstr {
72         int opno;       /* number of opening bracket */
73         int pl;         /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
74         int parm;       /* parm to size, font, etc */
75         int lno;        /* line number the thing came in in */
76 } stk[MAXSTK];
77 int stktop;
78
79 /*
80  * The kinds of opening and closing brackets.
81  */
82 struct brstr {
83         char *opbr;
84         char *clbr;
85 } br[MAXBR] = {
86         /* A few bare bones troff commands */
87 #define SZ      0
88         {"sz",  "sz"},  /* also \s */
89 #define FT      1
90         {"ft",  "ft"},  /* also \f */
91         /* the -mm package */
92         {"AL",  "LE"},
93         {"AS",  "AE"},
94         {"BL",  "LE"},
95         {"BS",  "BE"},
96         {"DF",  "DE"},
97         {"DL",  "LE"},
98         {"DS",  "DE"},
99         {"FS",  "FE"},
100         {"ML",  "LE"},
101         {"NS",  "NE"},
102         {"RL",  "LE"},
103         {"VL",  "LE"},
104         /* the -ms package */
105         {"AB",  "AE"},
106         {"BD",  "DE"},
107         {"CD",  "DE"},
108         {"DS",  "DE"},
109         {"FS",  "FE"},
110         {"ID",  "DE"},
111         {"KF",  "KE"},
112         {"KS",  "KE"},
113         {"LD",  "DE"},
114         {"LG",  "NL"},
115         {"QS",  "QE"},
116         {"RS",  "RE"},
117         {"SM",  "NL"},
118         {"XA",  "XE"},
119         {"XS",  "XE"},
120         /* The -me package */
121         {"(b",  ")b"},
122         {"(c",  ")c"},
123         {"(d",  ")d"},
124         {"(f",  ")f"},
125         {"(l",  ")l"},
126         {"(q",  ")q"},
127         {"(x",  ")x"},
128         {"(z",  ")z"},
129         /* Things needed by preprocessors */
130         {"EQ",  "EN"},
131         {"TS",  "TE"},
132         /* Refer */
133         {"[",   "]"},
134         {0,     0}
135 };
136
137 /*
138  * All commands known to nroff, plus macro packages.
139  * Used so we can complain about unrecognized commands.
140  */
141 char *knowncmds[MAXCMDS] = {
142 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
143 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
144 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
145 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
146 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
147 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
148 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
149 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
150 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
151 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
152 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
153 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
154 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
155 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
156 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
157 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
158 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
159 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
160 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
161 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
162 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
163 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
164 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
165 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
166 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
167 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
168 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
169 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
170 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
171 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
172 "yr", 0
173 };
174
175 int     lineno;         /* current line number in input file */
176 char    line[256];      /* the current line */
177 char    *cfilename;     /* name of current file */
178 int     nfiles;         /* number of files to process */
179 int     fflag;          /* -f: ignore \f */
180 int     sflag;          /* -s: ignore \s */
181 int     ncmds;          /* size of knowncmds */
182
183 int
184 main(int argc, char **argv)
185 {
186         FILE *f;
187         int i;
188         char *cp;
189         char b1[4];
190
191         /* Figure out how many known commands there are */
192         while (knowncmds[ncmds])
193                 ncmds++;
194         while (argc > 1 && argv[1][0] == '-') {
195                 switch(argv[1][1]) {
196
197                 /* -a: add pairs of macros */
198                 case 'a':
199                         i = strlen(argv[1]) - 2;
200                         if (i % 6 != 0)
201                                 usage();
202                         /* look for empty macro slots */
203                         for (i = 0; br[i].opbr; i++)
204                                 ;
205                         for (cp = argv[1] + 3; cp[-1]; cp += 6) {
206                                 br[i].opbr = malloc(3);
207                                 strncpy(br[i].opbr, cp, 2);
208                                 br[i].clbr = malloc(3);
209                                 strncpy(br[i].clbr, cp + 3, 2);
210                                 /*
211                                  * known pairs are also known cmds
212                                  */
213                                 addmac(br[i].opbr);
214                                 addmac(br[i].clbr);
215                                 i++;
216                         }
217                         break;
218
219                 /* -c: add known commands */
220                 case 'c':
221                         i = strlen(argv[1]) - 2;
222                         if (i % 3 != 0)
223                                 usage();
224                         for (cp = argv[1] + 3; cp[-1]; cp += 3) {
225                                 if (cp[2] && cp[2] != '.')
226                                         usage();
227                                 strncpy(b1, cp, 2);
228                                 b1[2] = '\0';
229                                 addmac(b1);
230                         }
231                         break;
232
233                 /* -f: ignore font changes */
234                 case 'f':
235                         fflag = 1;
236                         break;
237
238                 /* -s: ignore size changes */
239                 case 's':
240                         sflag = 1;
241                         break;
242                 default:
243                         usage();
244                 }
245                 argc--; argv++;
246         }
247
248         nfiles = argc - 1;
249
250         if (nfiles > 0) {
251                 for (i = 1; i < argc; i++) {
252                         cfilename = argv[i];
253                         f = fopen(cfilename, "r");
254                         if (f == NULL)
255                                 perror(cfilename);
256                         else
257                                 process(f);
258                 }
259         } else {
260                 cfilename = "stdin";
261                 process(stdin);
262         }
263         exit(0);
264 }
265
266 static void
267 usage(void)
268 {
269         fprintf(stderr,
270             "usage: checknr [-sf] [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] "
271             "file\n");
272         exit(1);
273 }
274
275 static void
276 process(FILE *f)
277 {
278         int i, n;
279         char mac[5];    /* The current macro or nroff command */
280         int pl;
281
282         stktop = -1;
283         for (lineno = 1; fgets(line, sizeof(line), f); lineno++) {
284                 if (line[0] == '.') {
285                         /*
286                          * find and isolate the macro/command name.
287                          */
288                         strncpy(mac, line + 1, 4);
289                         if (isspace(mac[0])) {
290                                 pe(lineno);
291                                 printf("Empty command\n");
292                         } else if (isspace(mac[1])) {
293                                 mac[1] = 0;
294                         } else if (isspace(mac[2])) {
295                                 mac[2] = 0;
296                         } else if (mac[0] != '\\' || mac[1] != '\"') {
297                                 pe(lineno);
298                                 printf("Command too long\n");
299                         }
300
301                         /*
302                          * Is it a known command?
303                          */
304                         checkknown(mac);
305
306                         /*
307                          * Should we add it?
308                          */
309                         if (eq(mac, "de"))
310                                 addcmd(line);
311
312                         chkcmd(mac);
313                 }
314
315                 /*
316                  * At this point we process the line looking
317                  * for \s and \f.
318                  */
319                 for (i = 0; line[i]; i++) {
320                         if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
321                                 if (!sflag && line[++i] == 's') {
322                                         pl = line[++i];
323                                         if (isdigit(pl)) {
324                                                 n = pl - '0';
325                                                 pl = ' ';
326                                         } else
327                                                 n = 0;
328                                         while (isdigit(line[++i]))
329                                                 n = 10 * n + line[i] - '0';
330                                         i--;
331                                         if (n == 0) {
332                                                 if (stk[stktop].opno == SZ) {
333                                                         stktop--;
334                                                 } else {
335                                                         pe(lineno);
336                                                         printf("unmatched \\s0\n");
337                                                 }
338                                         } else {
339                                                 stk[++stktop].opno = SZ;
340                                                 stk[stktop].pl = pl;
341                                                 stk[stktop].parm = n;
342                                                 stk[stktop].lno = lineno;
343                                         }
344                                 } else if (!fflag && line[i] == 'f') {
345                                         n = line[++i];
346                                         if (n == 'P') {
347                                                 if (stk[stktop].opno == FT) {
348                                                         stktop--;
349                                                 } else {
350                                                         pe(lineno);
351                                                         printf("unmatched \\fP\n");
352                                                 }
353                                         } else {
354                                                 stk[++stktop].opno = FT;
355                                                 stk[stktop].pl = 1;
356                                                 stk[stktop].parm = n;
357                                                 stk[stktop].lno = lineno;
358                                         }
359                                 }
360                         }
361                 }
362         }
363         /*
364          * We've hit the end and look at all this stuff that hasn't been
365          * matched yet!  Complain, complain.
366          */
367         for (i = stktop; i >= 0; i--) {
368                 complain(i);
369         }
370 }
371
372 static void
373 complain(int i)
374 {
375         pe(stk[i].lno);
376         printf("Unmatched ");
377         prop(i);
378         printf("\n");
379 }
380
381 static void
382 prop(int i)
383 {
384         if (stk[i].pl == 0)
385                 printf(".%s", br[stk[i].opno].opbr);
386         else switch(stk[i].opno) {
387         case SZ:
388                 printf("\\s%c%d", stk[i].pl, stk[i].parm);
389                 break;
390         case FT:
391                 printf("\\f%c", stk[i].parm);
392                 break;
393         default:
394                 printf("Bug: stk[%d].opno = %d = .%s, .%s",
395                         i, stk[i].opno, br[stk[i].opno].opbr,
396                         br[stk[i].opno].clbr);
397         }
398 }
399
400 static void
401 chkcmd(const char *mac)
402 {
403         int i;
404
405         /*
406          * Check to see if it matches top of stack.
407          */
408         if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
409                 stktop--;       /* OK. Pop & forget */
410         else {
411                 /* No. Maybe it's an opener */
412                 for (i = 0; br[i].opbr; i++) {
413                         if (eq(mac, br[i].opbr)) {
414                                 /* Found. Push it. */
415                                 stktop++;
416                                 stk[stktop].opno = i;
417                                 stk[stktop].pl = 0;
418                                 stk[stktop].parm = 0;
419                                 stk[stktop].lno = lineno;
420                                 break;
421                         }
422                         /*
423                          * Maybe it's an unmatched closer.
424                          * NOTE: this depends on the fact
425                          * that none of the closers can be
426                          * openers too.
427                          */
428                         if (eq(mac, br[i].clbr)) {
429                                 nomatch(mac);
430                                 break;
431                         }
432                 }
433         }
434 }
435
436 static void
437 nomatch(const char *mac)
438 {
439         int i, j;
440
441         /*
442          * Look for a match further down on stack
443          * If we find one, it suggests that the stuff in
444          * between is supposed to match itself.
445          */
446         for (j = stktop; j >= 0; j--) {
447                 if (eq(mac, br[stk[j].opno].clbr)) {
448                         /* Found.  Make a good diagnostic. */
449                         if (j == stktop - 2) {
450                                 /*
451                                  * Check for special case \fx..\fR and don't
452                                  * complain.
453                                  */
454                                 if (stk[j + 1].opno == FT &&
455                                     stk[j + 1].parm != 'R' &&
456                                     stk[j + 2].opno == FT &&
457                                     stk[j + 2].parm == 'R') {
458                                         stktop = j - 1;
459                                         return;
460                                 }
461                                 /*
462                                  * We have two unmatched frobs.  Chances are
463                                  * they were intended to match, so we mention
464                                  * them together.
465                                  */
466                                 pe(stk[j + 1].lno);
467                                 prop(j + 1);
468                                 printf(" does not match %d: ", stk[j + 2].lno);
469                                 prop(j + 2);
470                                 printf("\n");
471                         } else {
472                                 for (i = j + 1; i <= stktop; i++) {
473                                         complain(i);
474                                 }
475                         }
476                         stktop = j - 1;
477                         return;
478                 }
479         }
480         /* Didn't find one.  Throw this away. */
481         pe(lineno);
482         printf("Unmatched .%s\n", mac);
483 }
484
485 /* eq: are two strings equal? */
486 static int
487 eq(const char *s1, const char *s2)
488 {
489         return (strcmp(s1, s2) == 0);
490 }
491
492 /* print the first part of an error message, given the line number */
493 static void
494 pe(int mylineno)
495 {
496         if (nfiles > 1)
497                 printf("%s: ", cfilename);
498         printf("%d: ", mylineno);
499 }
500
501 static void
502 checkknown(const char *mac)
503 {
504         if (eq(mac, "."))
505                 return;
506         if (binsrch(mac, NULL) >= 0)
507                 return;
508         if (mac[0] == '\\' && mac[1] == '"')    /* comments */
509                 return;
510
511         pe(lineno);
512         printf("Unknown command: .%s\n", mac);
513 }
514
515 /*
516  * We have a .de xx line in "line".  Add xx to the list of known commands.
517  */
518 static void
519 addcmd(char *myline)
520 {
521         char *mac;
522
523         /* grab the macro being defined */
524         mac = myline + 4;
525         while (isspace(*mac))
526                 mac++;
527         if (*mac == 0) {
528                 pe(lineno);
529                 printf("illegal define: %s\n", myline);
530                 return;
531         }
532         mac[2] = 0;
533         if (isspace(mac[1]) || mac[1] == '\\')
534                 mac[1] = 0;
535         if (ncmds >= MAXCMDS) {
536                 printf("Only %d known commands allowed\n", MAXCMDS);
537                 exit(1);
538         }
539         addmac(mac);
540 }
541
542 /*
543  * Add mac to the list.  We should really have some kind of tree
544  * structure here but this is a quick-and-dirty job and I just don't
545  * have time to mess with it.  (I wonder if this will come back to haunt
546  * me someday?)  Anyway, I claim that .de is fairly rare in user
547  * nroff programs, and the loop below is pretty fast.
548  */
549 static void
550 addmac(const char *mac)
551 {
552         char **src, **dest, **loc;
553         int slot;
554
555         if (binsrch(mac, &slot) >= 0) { /* it's OK to redefine something */
556 #ifdef DEBUG
557                 printf("binsrch(%s) -> already in table\n", mac);
558 #endif DEBUG
559                 return;
560         }
561         /* binsrch sets slot as a side effect */
562 #ifdef DEBUG
563         printf("binsrch(%s) -> %d\n", mac, slot);
564 #endif
565         loc = &knowncmds[slot];
566         src = &knowncmds[ncmds - 1];
567         dest = src + 1;
568         while (dest > loc)
569                 *dest-- = *src--;
570         *loc = malloc(3);
571         strcpy(*loc, mac);
572         ncmds++;
573 #ifdef DEBUG
574         printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2],
575                 knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1],
576                 knowncmds[slot+2], ncmds);
577 #endif
578 }
579
580 /*
581  * Do a binary search in knowncmds for mac.
582  * If found, return the index.  If not, return -1.
583  * Also, if not found, and if slot_ptr is not NULL,
584  * set *slot_ptr to where it should have been.
585  */
586 static int
587 binsrch(const char *mac, int *slot_ptr)
588 {
589         const char *p;  /* pointer to current cmd in list */
590         int d;          /* difference if any */
591         int mid;        /* mid point in binary search */
592         int top, bot;   /* boundaries of bin search, inclusive */
593
594         top = ncmds - 1;
595         bot = 0;
596         while (top >= bot) {
597                 mid = (top + bot) / 2;
598                 p = knowncmds[mid];
599                 d = p[0] - mac[0];
600                 if (d == 0)
601                         d = p[1] - mac[1];
602                 if (d == 0)
603                         return mid;
604                 if (d < 0)
605                         bot = mid + 1;
606                 else
607                         top = mid - 1;
608         }
609         if (slot_ptr != NULL)
610                 *slot_ptr = bot;        /* place it would have gone */
611         return -1;
612 }