Correct BSD License clause numbering from 1-2-4 to 1-2-3.
[dragonfly.git] / usr.bin / checknr / checknr.c
1 /*
2  * Copyright (c) 1980, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * @(#) Copyright (c) 1980, 1993 The Regents of the University of California.  All rights reserved.
30  * @(#)checknr.c        8.1 (Berkeley) 6/6/93
31  *
32  * $DragonFly: src/usr.bin/checknr/checknr.c,v 1.13 2008/11/11 01:02:40 pavalos Exp $
33  */
34
35 #include <err.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <ctype.h>
40
41 #define MAXSTK  100     /* Stack size */
42 #define MAXBR   100     /* Max number of bracket pairs known */
43 #define MAXCMDS 500     /* Max number of commands known */
44
45 static void     addcmd(char *);
46 static void     addmac(const char *);
47 static int      binsrch(const char *, int *);
48 static void     checkknown(const char *);
49 static void     chkcmd(const char *);
50 static void     complain(int);
51 static int      eq(const char *, const char *);
52 static void     nomatch(const char *);
53 static void     pe(int);
54 static void     process(FILE *);
55 static void     prop(int);
56 static void     usage(void);
57
58 /*
59  * The stack on which we remember what we've seen so far.
60  */
61 struct stkstr {
62         int opno;       /* number of opening bracket */
63         int pl;         /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
64         int parm;       /* parm to size, font, etc */
65         int lno;        /* line number the thing came in on */
66 } stk[MAXSTK];
67 int stktop;
68
69 /*
70  * The kinds of opening and closing brackets.
71  */
72 struct brstr {
73         char opbr[3];
74         char clbr[3];
75 } br[MAXBR] = {
76         /* A few bare bones troff commands */
77 #define SZ      0
78         {"sz",  "sz"},  /* also \s */
79 #define FT      1
80         {"ft",  "ft"},  /* also \f */
81         /* the -mm package */
82         {"AL",  "LE"},
83         {"AS",  "AE"},
84         {"BL",  "LE"},
85         {"BS",  "BE"},
86         {"DF",  "DE"},
87         {"DL",  "LE"},
88         {"DS",  "DE"},
89         {"FS",  "FE"},
90         {"ML",  "LE"},
91         {"NS",  "NE"},
92         {"RL",  "LE"},
93         {"VL",  "LE"},
94         /* the -ms package */
95         {"AB",  "AE"},
96         {"BD",  "DE"},
97         {"CD",  "DE"},
98         {"DS",  "DE"},
99         {"FS",  "FE"},
100         {"ID",  "DE"},
101         {"KF",  "KE"},
102         {"KS",  "KE"},
103         {"LD",  "DE"},
104         {"LG",  "NL"},
105         {"QS",  "QE"},
106         {"RS",  "RE"},
107         {"SM",  "NL"},
108         {"XA",  "XE"},
109         {"XS",  "XE"},
110         /* The -me package */
111         {"(b",  ")b"},
112         {"(c",  ")c"},
113         {"(d",  ")d"},
114         {"(f",  ")f"},
115         {"(l",  ")l"},
116         {"(q",  ")q"},
117         {"(x",  ")x"},
118         {"(z",  ")z"},
119         /* Things needed by preprocessors */
120         {"EQ",  "EN"},
121         {"TS",  "TE"},
122         /* Refer */
123         {"[",   "]"}
124 };
125
126 /*
127  * All commands known to nroff, plus macro packages.
128  * Used so we can complain about unrecognized commands.
129  */
130 char knowncmds[MAXCMDS][3] = {
131 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
132 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
133 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
134 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
135 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
136 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
137 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
138 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
139 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
140 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
141 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
142 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
143 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
144 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
145 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
146 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
147 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
148 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
149 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
150 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
151 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
152 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
153 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
154 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
155 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
156 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
157 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
158 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
159 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
160 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
161 "yr"
162 };
163
164 int     lineno;         /* current line number in input file */
165 char    line[256];      /* the current line */
166 const char *cfilename;  /* name of current file */
167 int     nfiles;         /* number of files to process */
168 int     fflag;          /* -f: ignore \f */
169 int     sflag;          /* -s: ignore \s */
170 int     ncmds;          /* size of knowncmds */
171
172 /*
173  * checknr: check an nroff/troff input file for matching macro calls.
174  * we also attempt to match size and font changes, but only the embedded
175  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
176  * later but for now think of these restrictions as contributions to
177  * structured typesetting.
178  */
179 int
180 main(int argc, char **argv)
181 {
182         FILE *f;
183         int i;
184         char *cp;
185         char b1[4];
186
187         /* Figure out how many known commands there are */
188         ncmds = 0;
189         while (ncmds < MAXCMDS && knowncmds[ncmds][0] != '\0')
190                 ncmds++;
191         while (argc > 1 && argv[1][0] == '-') {
192                 switch(argv[1][1]) {
193
194                 /* -a: add pairs of macros */
195                 case 'a':
196                         if ((strlen(argv[1]) - 2) % 6 != 0)
197                                 usage();
198                         /* look for empty macro slots */
199                         i = 0;
200                         while (i < MAXBR && br[i].opbr[0] != '\0')
201                                 i++;
202                         if (i >= MAXBR) {
203                                 errx(1, "Only %d known macro-pairs allowed",
204                                     MAXBR);
205                         }
206                         for (cp = argv[1] + 3; cp[-1]; cp += 6) {
207                                 strncpy(br[i].opbr, cp, 2);
208                                 strncpy(br[i].clbr, cp + 3, 2);
209                                 /*
210                                  * known pairs are also known cmds
211                                  */
212                                 addmac(br[i].opbr);
213                                 addmac(br[i].clbr);
214                                 i++;
215                         }
216                         break;
217
218                 /* -c: add known commands */
219                 case 'c':
220                         i = strlen(argv[1]) - 2;
221                         if (i % 3 != 0)
222                                 usage();
223                         for (cp = argv[1] + 3; cp[-1]; cp += 3) {
224                                 if (cp[2] && cp[2] != '.')
225                                         usage();
226                                 strncpy(b1, cp, 2);
227                                 b1[2] = '\0';
228                                 addmac(b1);
229                         }
230                         break;
231
232                 /* -f: ignore font changes */
233                 case 'f':
234                         fflag = 1;
235                         break;
236
237                 /* -s: ignore size changes */
238                 case 's':
239                         sflag = 1;
240                         break;
241                 default:
242                         usage();
243                 }
244                 argc--; argv++;
245         }
246
247         nfiles = argc - 1;
248
249         if (nfiles > 0) {
250                 for (i = 1; i < argc; i++) {
251                         cfilename = argv[i];
252                         f = fopen(cfilename, "r");
253                         if (f == NULL)
254                                 warn("%s", cfilename);
255                         else {
256                                 process(f);
257                                 fclose(f);
258                         }
259                 }
260         } else {
261                 cfilename = "stdin";
262                 process(stdin);
263         }
264         exit(0);
265 }
266
267 static void
268 usage(void)
269 {
270         fprintf(stderr,
271             "usage: checknr [-fs] [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] "
272             "file\n");
273         exit(1);
274 }
275
276 static void
277 process(FILE *f)
278 {
279         int i, n;
280         char mac[5];    /* The current macro or nroff command */
281         int pl;
282
283         stktop = -1;
284         for (lineno = 1; fgets(line, sizeof(line), f); lineno++) {
285                 if (line[0] == '.') {
286                         /*
287                          * find and isolate the macro/command name.
288                          */
289                         strncpy(mac, line + 1, 4);
290                         if (isspace(mac[0])) {
291                                 pe(lineno);
292                                 printf("Empty command\n");
293                         } else if (isspace(mac[1])) {
294                                 mac[1] = 0;
295                         } else if (isspace(mac[2])) {
296                                 mac[2] = 0;
297                         } else if (mac[0] != '\\' || mac[1] != '\"') {
298                                 pe(lineno);
299                                 printf("Command too long\n");
300                         }
301
302                         /*
303                          * Is it a known command?
304                          */
305                         checkknown(mac);
306
307                         /*
308                          * Should we add it?
309                          */
310                         if (eq(mac, "de"))
311                                 addcmd(line);
312
313                         chkcmd(mac);
314                 }
315
316                 /*
317                  * At this point we process the line looking
318                  * for \s and \f.
319                  */
320                 for (i = 0; line[i]; i++) {
321                         if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
322                                 if (!sflag && line[++i] == 's') {
323                                         pl = line[++i];
324                                         if (isdigit(pl)) {
325                                                 n = pl - '0';
326                                                 pl = ' ';
327                                         } else
328                                                 n = 0;
329                                         while (isdigit(line[++i]))
330                                                 n = 10 * n + line[i] - '0';
331                                         i--;
332                                         if (n == 0) {
333                                                 if (stk[stktop].opno == SZ) {
334                                                         stktop--;
335                                                 } else {
336                                                         pe(lineno);
337                                                         printf("unmatched \\s0\n");
338                                                 }
339                                         } else {
340                                                 stk[++stktop].opno = SZ;
341                                                 stk[stktop].pl = pl;
342                                                 stk[stktop].parm = n;
343                                                 stk[stktop].lno = lineno;
344                                         }
345                                 } else if (!fflag && line[i] == 'f') {
346                                         n = line[++i];
347                                         if (n == 'P') {
348                                                 if (stk[stktop].opno == FT) {
349                                                         stktop--;
350                                                 } else {
351                                                         pe(lineno);
352                                                         printf("unmatched \\fP\n");
353                                                 }
354                                         } else {
355                                                 stk[++stktop].opno = FT;
356                                                 stk[stktop].pl = 1;
357                                                 stk[stktop].parm = n;
358                                                 stk[stktop].lno = lineno;
359                                         }
360                                 }
361                         }
362                 }
363         }
364         /*
365          * We've hit the end and look at all this stuff that hasn't been
366          * matched yet!  Complain, complain.
367          */
368         for (i = stktop; i >= 0; i--) {
369                 complain(i);
370         }
371 }
372
373 static void
374 complain(int i)
375 {
376         pe(stk[i].lno);
377         printf("Unmatched ");
378         prop(i);
379         printf("\n");
380 }
381
382 static void
383 prop(int i)
384 {
385         if (stk[i].pl == 0)
386                 printf(".%s", br[stk[i].opno].opbr);
387         else switch(stk[i].opno) {
388         case SZ:
389                 printf("\\s%c%d", stk[i].pl, stk[i].parm);
390                 break;
391         case FT:
392                 printf("\\f%c", stk[i].parm);
393                 break;
394         default:
395                 printf("Bug: stk[%d].opno = %d = .%s, .%s",
396                         i, stk[i].opno, br[stk[i].opno].opbr,
397                         br[stk[i].opno].clbr);
398         }
399 }
400
401 static void
402 chkcmd(const char *mac)
403 {
404         int i;
405
406         /*
407          * Check to see if it matches top of stack.
408          */
409         if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
410                 stktop--;       /* OK. Pop & forget */
411         else {
412                 /* No. Maybe it's an opener */
413                 for (i = 0; br[i].opbr[0] != '\0'; i++) {
414                         if (eq(mac, br[i].opbr)) {
415                                 /* Found. Push it. */
416                                 stktop++;
417                                 stk[stktop].opno = i;
418                                 stk[stktop].pl = 0;
419                                 stk[stktop].parm = 0;
420                                 stk[stktop].lno = lineno;
421                                 break;
422                         }
423                         /*
424                          * Maybe it's an unmatched closer.
425                          * NOTE: this depends on the fact
426                          * that none of the closers can be
427                          * openers too.
428                          */
429                         if (eq(mac, br[i].clbr)) {
430                                 nomatch(mac);
431                                 break;
432                         }
433                 }
434         }
435 }
436
437 static void
438 nomatch(const char *mac)
439 {
440         int i, j;
441
442         /*
443          * Look for a match further down on stack
444          * If we find one, it suggests that the stuff in
445          * between is supposed to match itself.
446          */
447         for (j = stktop; j >= 0; j--) {
448                 if (eq(mac, br[stk[j].opno].clbr)) {
449                         /* Found.  Make a good diagnostic. */
450                         if (j == stktop - 2) {
451                                 /*
452                                  * Check for special case \fx..\fR and don't
453                                  * complain.
454                                  */
455                                 if (stk[j + 1].opno == FT &&
456                                     stk[j + 1].parm != 'R' &&
457                                     stk[j + 2].opno == FT &&
458                                     stk[j + 2].parm == 'R') {
459                                         stktop = j - 1;
460                                         return;
461                                 }
462                                 /*
463                                  * We have two unmatched frobs.  Chances are
464                                  * they were intended to match, so we mention
465                                  * them together.
466                                  */
467                                 pe(stk[j + 1].lno);
468                                 prop(j + 1);
469                                 printf(" does not match %d: ", stk[j + 2].lno);
470                                 prop(j + 2);
471                                 printf("\n");
472                         } else {
473                                 for (i = j + 1; i <= stktop; i++) {
474                                         complain(i);
475                                 }
476                         }
477                         stktop = j - 1;
478                         return;
479                 }
480         }
481         /* Didn't find one.  Throw this away. */
482         pe(lineno);
483         printf("Unmatched .%s\n", mac);
484 }
485
486 /* eq: are two strings equal? */
487 static int
488 eq(const char *s1, const char *s2)
489 {
490         return (strcmp(s1, s2) == 0);
491 }
492
493 /* print the first part of an error message, given the line number */
494 static void
495 pe(int mylineno)
496 {
497         if (nfiles > 1)
498                 printf("%s: ", cfilename);
499         printf("%d: ", mylineno);
500 }
501
502 static void
503 checkknown(const char *mac)
504 {
505         if (eq(mac, "."))
506                 return;
507         if (binsrch(mac, NULL) >= 0)
508                 return;
509         if (mac[0] == '\\' && mac[1] == '"')    /* comments */
510                 return;
511
512         pe(lineno);
513         printf("Unknown command: .%s\n", mac);
514 }
515
516 /*
517  * We have a .de xx line in "line".  Add xx to the list of known commands.
518  */
519 static void
520 addcmd(char *myline)
521 {
522         char *mac;
523
524         /* grab the macro being defined */
525         mac = myline + 4;
526         while (isspace(*mac))
527                 mac++;
528         if (*mac == 0) {
529                 pe(lineno);
530                 printf("illegal define: %s\n", myline);
531                 return;
532         }
533         mac[2] = 0;
534         if (isspace(mac[1]) || mac[1] == '\\')
535                 mac[1] = 0;
536         addmac(mac);
537 }
538
539 /*
540  * Add mac to the list.  We should really have some kind of tree
541  * structure here, but the loop below is reasonably fast.
542  */
543 static void
544 addmac(const char *mac)
545 {
546         int i, slot;
547
548         if (ncmds >= MAXCMDS) {
549                 errx(1, "Only %d known commands allowed", MAXCMDS);
550         }
551
552         /* Don't try to add it if it's already in the table. */
553         if (binsrch(mac, &slot) >= 0) {
554 #ifdef DEBUG
555                 printf("binsrch(%s) -> already in table\n", mac);
556 #endif /* DEBUG */
557                 return;
558         }
559 #ifdef DEBUG
560         printf("binsrch(%s) -> %d\n", mac, slot);
561 #endif
562         for (i = ncmds - 1; i >= slot; i--) {
563                 strncpy(knowncmds[i + 1], knowncmds[i], 2);
564         }
565         strncpy(knowncmds[slot], mac, 2);
566         ncmds++;
567 #ifdef DEBUG
568         printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2],
569                 knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1],
570                 knowncmds[slot+2], ncmds);
571 #endif
572 }
573
574 /*
575  * Do a binary search in knowncmds for mac.
576  * If found, return the index.  If not, return -1.
577  * Also, if not found, and if slot_ptr is not NULL,
578  * set *slot_ptr to where it should have been.
579  */
580 static int
581 binsrch(const char *mac, int *slot_ptr)
582 {
583         const char *p;  /* pointer to current cmd in list */
584         int d;          /* difference if any */
585         int mid;        /* mid point in binary search */
586         int top, bot;   /* boundaries of bin search, inclusive */
587
588         top = ncmds - 1;
589         bot = 0;
590         while (top >= bot) {
591                 mid = (top + bot) / 2;
592                 p = knowncmds[mid];
593                 d = p[0] - mac[0];
594                 if (d == 0)
595                         d = p[1] - mac[1];
596                 if (d == 0)
597                         return mid;
598                 if (d < 0)
599                         bot = mid + 1;
600                 else
601                         top = mid - 1;
602         }
603         if (slot_ptr != NULL)
604                 *slot_ptr = bot;        /* place it would have gone */
605         return -1;
606 }