73634e9f4e39367bfe96d630b6d223e3fd7e3e91
[dragonfly.git] / usr.bin / checknr / checknr.c
1 /*
2  * Copyright (c) 1980, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by the University of
16  *      California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * @(#) Copyright (c) 1980, 1993 The Regents of the University of California.  All rights reserved.
34  * @(#)checknr.c        8.1 (Berkeley) 6/6/93
35  *
36  * $DragonFly: src/usr.bin/checknr/checknr.c,v 1.11 2005/03/04 02:53:55 cpressey Exp $
37  */
38
39 #include <err.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <ctype.h>
44
45 #define MAXSTK  100     /* Stack size */
46 #define MAXBR   100     /* Max number of bracket pairs known */
47 #define MAXCMDS 500     /* Max number of commands known */
48
49 static void     addcmd(char *);
50 static void     addmac(const char *);
51 static int      binsrch(const char *, int *);
52 static void     checkknown(const char *);
53 static void     chkcmd(const char *);
54 static void     complain(int);
55 static int      eq(const char *, const char *);
56 static void     nomatch(const char *);
57 static void     pe(int);
58 static void     process(FILE *);
59 static void     prop(int);
60 static void     usage(void);
61
62 /*
63  * The stack on which we remember what we've seen so far.
64  */
65 struct stkstr {
66         int opno;       /* number of opening bracket */
67         int pl;         /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
68         int parm;       /* parm to size, font, etc */
69         int lno;        /* line number the thing came in in */
70 } stk[MAXSTK];
71 int stktop;
72
73 /*
74  * The kinds of opening and closing brackets.
75  */
76 struct brstr {
77         char opbr[3];
78         char clbr[3];
79 } br[MAXBR] = {
80         /* A few bare bones troff commands */
81 #define SZ      0
82         {"sz",  "sz"},  /* also \s */
83 #define FT      1
84         {"ft",  "ft"},  /* also \f */
85         /* the -mm package */
86         {"AL",  "LE"},
87         {"AS",  "AE"},
88         {"BL",  "LE"},
89         {"BS",  "BE"},
90         {"DF",  "DE"},
91         {"DL",  "LE"},
92         {"DS",  "DE"},
93         {"FS",  "FE"},
94         {"ML",  "LE"},
95         {"NS",  "NE"},
96         {"RL",  "LE"},
97         {"VL",  "LE"},
98         /* the -ms package */
99         {"AB",  "AE"},
100         {"BD",  "DE"},
101         {"CD",  "DE"},
102         {"DS",  "DE"},
103         {"FS",  "FE"},
104         {"ID",  "DE"},
105         {"KF",  "KE"},
106         {"KS",  "KE"},
107         {"LD",  "DE"},
108         {"LG",  "NL"},
109         {"QS",  "QE"},
110         {"RS",  "RE"},
111         {"SM",  "NL"},
112         {"XA",  "XE"},
113         {"XS",  "XE"},
114         /* The -me package */
115         {"(b",  ")b"},
116         {"(c",  ")c"},
117         {"(d",  ")d"},
118         {"(f",  ")f"},
119         {"(l",  ")l"},
120         {"(q",  ")q"},
121         {"(x",  ")x"},
122         {"(z",  ")z"},
123         /* Things needed by preprocessors */
124         {"EQ",  "EN"},
125         {"TS",  "TE"},
126         /* Refer */
127         {"[",   "]"}
128 };
129
130 /*
131  * All commands known to nroff, plus macro packages.
132  * Used so we can complain about unrecognized commands.
133  */
134 char knowncmds[MAXCMDS][3] = {
135 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
136 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
137 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
138 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
139 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
140 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
141 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
142 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
143 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
144 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
145 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
146 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
147 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
148 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
149 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
150 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
151 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
152 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
153 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
154 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
155 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
156 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
157 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
158 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
159 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
160 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
161 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
162 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
163 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
164 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
165 "yr"
166 };
167
168 int     lineno;         /* current line number in input file */
169 char    line[256];      /* the current line */
170 const char *cfilename;  /* name of current file */
171 int     nfiles;         /* number of files to process */
172 int     fflag;          /* -f: ignore \f */
173 int     sflag;          /* -s: ignore \s */
174 int     ncmds;          /* size of knowncmds */
175
176 /*
177  * checknr: check an nroff/troff input file for matching macro calls.
178  * we also attempt to match size and font changes, but only the embedded
179  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
180  * later but for now think of these restrictions as contributions to
181  * structured typesetting.
182  */
183 int
184 main(int argc, char **argv)
185 {
186         FILE *f;
187         int i;
188         char *cp;
189         char b1[4];
190
191         /* Figure out how many known commands there are */
192         ncmds = 0;
193         while (ncmds < MAXCMDS && knowncmds[ncmds][0] != '\0')
194                 ncmds++;
195         while (argc > 1 && argv[1][0] == '-') {
196                 switch(argv[1][1]) {
197
198                 /* -a: add pairs of macros */
199                 case 'a':
200                         if ((strlen(argv[1]) - 2) % 6 != 0)
201                                 usage();
202                         /* look for empty macro slots */
203                         i = 0;
204                         while (i < MAXBR && br[i].opbr[0] != '\0')
205                                 i++;
206                         if (i >= MAXBR) {
207                                 errx(1, "Only %d known macro-pairs allowed",
208                                     MAXBR);
209                         }
210                         for (cp = argv[1] + 3; cp[-1]; cp += 6) {
211                                 strncpy(br[i].opbr, cp, 2);
212                                 strncpy(br[i].clbr, cp + 3, 2);
213                                 /*
214                                  * known pairs are also known cmds
215                                  */
216                                 addmac(br[i].opbr);
217                                 addmac(br[i].clbr);
218                                 i++;
219                         }
220                         break;
221
222                 /* -c: add known commands */
223                 case 'c':
224                         i = strlen(argv[1]) - 2;
225                         if (i % 3 != 0)
226                                 usage();
227                         for (cp = argv[1] + 3; cp[-1]; cp += 3) {
228                                 if (cp[2] && cp[2] != '.')
229                                         usage();
230                                 strncpy(b1, cp, 2);
231                                 b1[2] = '\0';
232                                 addmac(b1);
233                         }
234                         break;
235
236                 /* -f: ignore font changes */
237                 case 'f':
238                         fflag = 1;
239                         break;
240
241                 /* -s: ignore size changes */
242                 case 's':
243                         sflag = 1;
244                         break;
245                 default:
246                         usage();
247                 }
248                 argc--; argv++;
249         }
250
251         nfiles = argc - 1;
252
253         if (nfiles > 0) {
254                 for (i = 1; i < argc; i++) {
255                         cfilename = argv[i];
256                         f = fopen(cfilename, "r");
257                         if (f == NULL)
258                                 perror(cfilename);
259                         else
260                                 process(f);
261                 }
262         } else {
263                 cfilename = "stdin";
264                 process(stdin);
265         }
266         exit(0);
267 }
268
269 static void
270 usage(void)
271 {
272         fprintf(stderr,
273             "usage: checknr [-fs] [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] "
274             "file\n");
275         exit(1);
276 }
277
278 static void
279 process(FILE *f)
280 {
281         int i, n;
282         char mac[5];    /* The current macro or nroff command */
283         int pl;
284
285         stktop = -1;
286         for (lineno = 1; fgets(line, sizeof(line), f); lineno++) {
287                 if (line[0] == '.') {
288                         /*
289                          * find and isolate the macro/command name.
290                          */
291                         strncpy(mac, line + 1, 4);
292                         if (isspace(mac[0])) {
293                                 pe(lineno);
294                                 printf("Empty command\n");
295                         } else if (isspace(mac[1])) {
296                                 mac[1] = 0;
297                         } else if (isspace(mac[2])) {
298                                 mac[2] = 0;
299                         } else if (mac[0] != '\\' || mac[1] != '\"') {
300                                 pe(lineno);
301                                 printf("Command too long\n");
302                         }
303
304                         /*
305                          * Is it a known command?
306                          */
307                         checkknown(mac);
308
309                         /*
310                          * Should we add it?
311                          */
312                         if (eq(mac, "de"))
313                                 addcmd(line);
314
315                         chkcmd(mac);
316                 }
317
318                 /*
319                  * At this point we process the line looking
320                  * for \s and \f.
321                  */
322                 for (i = 0; line[i]; i++) {
323                         if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
324                                 if (!sflag && line[++i] == 's') {
325                                         pl = line[++i];
326                                         if (isdigit(pl)) {
327                                                 n = pl - '0';
328                                                 pl = ' ';
329                                         } else
330                                                 n = 0;
331                                         while (isdigit(line[++i]))
332                                                 n = 10 * n + line[i] - '0';
333                                         i--;
334                                         if (n == 0) {
335                                                 if (stk[stktop].opno == SZ) {
336                                                         stktop--;
337                                                 } else {
338                                                         pe(lineno);
339                                                         printf("unmatched \\s0\n");
340                                                 }
341                                         } else {
342                                                 stk[++stktop].opno = SZ;
343                                                 stk[stktop].pl = pl;
344                                                 stk[stktop].parm = n;
345                                                 stk[stktop].lno = lineno;
346                                         }
347                                 } else if (!fflag && line[i] == 'f') {
348                                         n = line[++i];
349                                         if (n == 'P') {
350                                                 if (stk[stktop].opno == FT) {
351                                                         stktop--;
352                                                 } else {
353                                                         pe(lineno);
354                                                         printf("unmatched \\fP\n");
355                                                 }
356                                         } else {
357                                                 stk[++stktop].opno = FT;
358                                                 stk[stktop].pl = 1;
359                                                 stk[stktop].parm = n;
360                                                 stk[stktop].lno = lineno;
361                                         }
362                                 }
363                         }
364                 }
365         }
366         /*
367          * We've hit the end and look at all this stuff that hasn't been
368          * matched yet!  Complain, complain.
369          */
370         for (i = stktop; i >= 0; i--) {
371                 complain(i);
372         }
373 }
374
375 static void
376 complain(int i)
377 {
378         pe(stk[i].lno);
379         printf("Unmatched ");
380         prop(i);
381         printf("\n");
382 }
383
384 static void
385 prop(int i)
386 {
387         if (stk[i].pl == 0)
388                 printf(".%s", br[stk[i].opno].opbr);
389         else switch(stk[i].opno) {
390         case SZ:
391                 printf("\\s%c%d", stk[i].pl, stk[i].parm);
392                 break;
393         case FT:
394                 printf("\\f%c", stk[i].parm);
395                 break;
396         default:
397                 printf("Bug: stk[%d].opno = %d = .%s, .%s",
398                         i, stk[i].opno, br[stk[i].opno].opbr,
399                         br[stk[i].opno].clbr);
400         }
401 }
402
403 static void
404 chkcmd(const char *mac)
405 {
406         int i;
407
408         /*
409          * Check to see if it matches top of stack.
410          */
411         if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
412                 stktop--;       /* OK. Pop & forget */
413         else {
414                 /* No. Maybe it's an opener */
415                 for (i = 0; br[i].opbr[0] != '\0'; i++) {
416                         if (eq(mac, br[i].opbr)) {
417                                 /* Found. Push it. */
418                                 stktop++;
419                                 stk[stktop].opno = i;
420                                 stk[stktop].pl = 0;
421                                 stk[stktop].parm = 0;
422                                 stk[stktop].lno = lineno;
423                                 break;
424                         }
425                         /*
426                          * Maybe it's an unmatched closer.
427                          * NOTE: this depends on the fact
428                          * that none of the closers can be
429                          * openers too.
430                          */
431                         if (eq(mac, br[i].clbr)) {
432                                 nomatch(mac);
433                                 break;
434                         }
435                 }
436         }
437 }
438
439 static void
440 nomatch(const char *mac)
441 {
442         int i, j;
443
444         /*
445          * Look for a match further down on stack
446          * If we find one, it suggests that the stuff in
447          * between is supposed to match itself.
448          */
449         for (j = stktop; j >= 0; j--) {
450                 if (eq(mac, br[stk[j].opno].clbr)) {
451                         /* Found.  Make a good diagnostic. */
452                         if (j == stktop - 2) {
453                                 /*
454                                  * Check for special case \fx..\fR and don't
455                                  * complain.
456                                  */
457                                 if (stk[j + 1].opno == FT &&
458                                     stk[j + 1].parm != 'R' &&
459                                     stk[j + 2].opno == FT &&
460                                     stk[j + 2].parm == 'R') {
461                                         stktop = j - 1;
462                                         return;
463                                 }
464                                 /*
465                                  * We have two unmatched frobs.  Chances are
466                                  * they were intended to match, so we mention
467                                  * them together.
468                                  */
469                                 pe(stk[j + 1].lno);
470                                 prop(j + 1);
471                                 printf(" does not match %d: ", stk[j + 2].lno);
472                                 prop(j + 2);
473                                 printf("\n");
474                         } else {
475                                 for (i = j + 1; i <= stktop; i++) {
476                                         complain(i);
477                                 }
478                         }
479                         stktop = j - 1;
480                         return;
481                 }
482         }
483         /* Didn't find one.  Throw this away. */
484         pe(lineno);
485         printf("Unmatched .%s\n", mac);
486 }
487
488 /* eq: are two strings equal? */
489 static int
490 eq(const char *s1, const char *s2)
491 {
492         return (strcmp(s1, s2) == 0);
493 }
494
495 /* print the first part of an error message, given the line number */
496 static void
497 pe(int mylineno)
498 {
499         if (nfiles > 1)
500                 printf("%s: ", cfilename);
501         printf("%d: ", mylineno);
502 }
503
504 static void
505 checkknown(const char *mac)
506 {
507         if (eq(mac, "."))
508                 return;
509         if (binsrch(mac, NULL) >= 0)
510                 return;
511         if (mac[0] == '\\' && mac[1] == '"')    /* comments */
512                 return;
513
514         pe(lineno);
515         printf("Unknown command: .%s\n", mac);
516 }
517
518 /*
519  * We have a .de xx line in "line".  Add xx to the list of known commands.
520  */
521 static void
522 addcmd(char *myline)
523 {
524         char *mac;
525
526         /* grab the macro being defined */
527         mac = myline + 4;
528         while (isspace(*mac))
529                 mac++;
530         if (*mac == 0) {
531                 pe(lineno);
532                 printf("illegal define: %s\n", myline);
533                 return;
534         }
535         mac[2] = 0;
536         if (isspace(mac[1]) || mac[1] == '\\')
537                 mac[1] = 0;
538         addmac(mac);
539 }
540
541 /*
542  * Add mac to the list.  We should really have some kind of tree
543  * structure here, but the loop below is reasonably fast.
544  */
545 static void
546 addmac(const char *mac)
547 {
548         int i, slot;
549
550         if (ncmds >= MAXCMDS) {
551                 errx(1, "Only %d known commands allowed", MAXCMDS);
552         }
553
554         /* Don't try to add it if it's already in the table. */
555         if (binsrch(mac, &slot) >= 0) {
556 #ifdef DEBUG
557                 printf("binsrch(%s) -> already in table\n", mac);
558 #endif DEBUG
559                 return;
560         }
561 #ifdef DEBUG
562         printf("binsrch(%s) -> %d\n", mac, slot);
563 #endif
564         for (i = ncmds - 1; i >= slot; i--) {
565                 strncpy(knowncmds[i + 1], knowncmds[i], 2);
566         }
567         strncpy(knowncmds[slot], mac, 2);
568         ncmds++;
569 #ifdef DEBUG
570         printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2],
571                 knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1],
572                 knowncmds[slot+2], ncmds);
573 #endif
574 }
575
576 /*
577  * Do a binary search in knowncmds for mac.
578  * If found, return the index.  If not, return -1.
579  * Also, if not found, and if slot_ptr is not NULL,
580  * set *slot_ptr to where it should have been.
581  */
582 static int
583 binsrch(const char *mac, int *slot_ptr)
584 {
585         const char *p;  /* pointer to current cmd in list */
586         int d;          /* difference if any */
587         int mid;        /* mid point in binary search */
588         int top, bot;   /* boundaries of bin search, inclusive */
589
590         top = ncmds - 1;
591         bot = 0;
592         while (top >= bot) {
593                 mid = (top + bot) / 2;
594                 p = knowncmds[mid];
595                 d = p[0] - mac[0];
596                 if (d == 0)
597                         d = p[1] - mac[1];
598                 if (d == 0)
599                         return mid;
600                 if (d < 0)
601                         bot = mid + 1;
602                 else
603                         top = mid - 1;
604         }
605         if (slot_ptr != NULL)
606                 *slot_ptr = bot;        /* place it would have gone */
607         return -1;
608 }