Initial import from FreeBSD RELENG_4:
[dragonfly.git] / usr.bin / checknr / checknr.c
1 /*
2  * Copyright (c) 1980, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by the University of
16  *      California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33
34 #ifndef lint
35 static const char copyright[] =
36 "@(#) Copyright (c) 1980, 1993\n\
37         The Regents of the University of California.  All rights reserved.\n";
38 #endif /* not lint */
39
40 #ifndef lint
41 static const char sccsid[] = "@(#)checknr.c     8.1 (Berkeley) 6/6/93";
42 #endif /* not lint */
43
44 /*
45  * checknr: check an nroff/troff input file for matching macro calls.
46  * we also attempt to match size and font changes, but only the embedded
47  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
48  * later but for now think of these restrictions as contributions to
49  * structured typesetting.
50  */
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include <ctype.h>
55
56 #define MAXSTK  100     /* Stack size */
57 #define MAXBR   100     /* Max number of bracket pairs known */
58 #define MAXCMDS 500     /* Max number of commands known */
59
60 void addcmd __P((char *));
61 void addmac __P((char *));
62 int binsrch __P((char *));
63 void checkknown __P((char *));
64 void chkcmd __P((char *, char *));
65 void complain __P((int));
66 int eq __P((char *, char *));
67 void nomatch __P((char *));
68 void pe __P((int));
69 void process __P((FILE *));
70 void prop __P((int));
71 static void usage __P((void));
72
73 /*
74  * The stack on which we remember what we've seen so far.
75  */
76 struct stkstr {
77         int opno;       /* number of opening bracket */
78         int pl;         /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
79         int parm;       /* parm to size, font, etc */
80         int lno;        /* line number the thing came in in */
81 } stk[MAXSTK];
82 int stktop;
83
84 /*
85  * The kinds of opening and closing brackets.
86  */
87 struct brstr {
88         char *opbr;
89         char *clbr;
90 } br[MAXBR] = {
91         /* A few bare bones troff commands */
92 #define SZ      0
93         {"sz",  "sz"},  /* also \s */
94 #define FT      1
95         {"ft",  "ft"},  /* also \f */
96         /* the -mm package */
97         {"AL",  "LE"},
98         {"AS",  "AE"},
99         {"BL",  "LE"},
100         {"BS",  "BE"},
101         {"DF",  "DE"},
102         {"DL",  "LE"},
103         {"DS",  "DE"},
104         {"FS",  "FE"},
105         {"ML",  "LE"},
106         {"NS",  "NE"},
107         {"RL",  "LE"},
108         {"VL",  "LE"},
109         /* the -ms package */
110         {"AB",  "AE"},
111         {"BD",  "DE"},
112         {"CD",  "DE"},
113         {"DS",  "DE"},
114         {"FS",  "FE"},
115         {"ID",  "DE"},
116         {"KF",  "KE"},
117         {"KS",  "KE"},
118         {"LD",  "DE"},
119         {"LG",  "NL"},
120         {"QS",  "QE"},
121         {"RS",  "RE"},
122         {"SM",  "NL"},
123         {"XA",  "XE"},
124         {"XS",  "XE"},
125         /* The -me package */
126         {"(b",  ")b"},
127         {"(c",  ")c"},
128         {"(d",  ")d"},
129         {"(f",  ")f"},
130         {"(l",  ")l"},
131         {"(q",  ")q"},
132         {"(x",  ")x"},
133         {"(z",  ")z"},
134         /* Things needed by preprocessors */
135         {"EQ",  "EN"},
136         {"TS",  "TE"},
137         /* Refer */
138         {"[",   "]"},
139         {0,     0}
140 };
141
142 /*
143  * All commands known to nroff, plus macro packages.
144  * Used so we can complain about unrecognized commands.
145  */
146 char *knowncmds[MAXCMDS] = {
147 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
148 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
149 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
150 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
151 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
152 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
153 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
154 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
155 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
156 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
157 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
158 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
159 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
160 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
161 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
162 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
163 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
164 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
165 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
166 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
167 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
168 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
169 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
170 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
171 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
172 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
173 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
174 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
175 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
176 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
177 "yr", 0
178 };
179
180 int     lineno;         /* current line number in input file */
181 char    line[256];      /* the current line */
182 char    *cfilename;     /* name of current file */
183 int     nfiles;         /* number of files to process */
184 int     fflag;          /* -f: ignore \f */
185 int     sflag;          /* -s: ignore \s */
186 int     ncmds;          /* size of knowncmds */
187 int     slot;           /* slot in knowncmds found by binsrch */
188
189 int
190 main(argc, argv)
191 int argc;
192 char **argv;
193 {
194         FILE *f;
195         int i;
196         char *cp;
197         char b1[4];
198
199         /* Figure out how many known commands there are */
200         while (knowncmds[ncmds])
201                 ncmds++;
202         while (argc > 1 && argv[1][0] == '-') {
203                 switch(argv[1][1]) {
204
205                 /* -a: add pairs of macros */
206                 case 'a':
207                         i = strlen(argv[1]) - 2;
208                         if (i % 6 != 0)
209                                 usage();
210                         /* look for empty macro slots */
211                         for (i=0; br[i].opbr; i++)
212                                 ;
213                         for (cp=argv[1]+3; cp[-1]; cp += 6) {
214                                 br[i].opbr = malloc(3);
215                                 strncpy(br[i].opbr, cp, 2);
216                                 br[i].clbr = malloc(3);
217                                 strncpy(br[i].clbr, cp+3, 2);
218                                 addmac(br[i].opbr);     /* knows pairs are also known cmds */
219                                 addmac(br[i].clbr);
220                                 i++;
221                         }
222                         break;
223
224                 /* -c: add known commands */
225                 case 'c':
226                         i = strlen(argv[1]) - 2;
227                         if (i % 3 != 0)
228                                 usage();
229                         for (cp=argv[1]+3; cp[-1]; cp += 3) {
230                                 if (cp[2] && cp[2] != '.')
231                                         usage();
232                                 strncpy(b1, cp, 2);
233                                 b1[2] = '\0';
234                                 addmac(b1);
235                         }
236                         break;
237
238                 /* -f: ignore font changes */
239                 case 'f':
240                         fflag = 1;
241                         break;
242
243                 /* -s: ignore size changes */
244                 case 's':
245                         sflag = 1;
246                         break;
247                 default:
248                         usage();
249                 }
250                 argc--; argv++;
251         }
252
253         nfiles = argc - 1;
254
255         if (nfiles > 0) {
256                 for (i=1; i<argc; i++) {
257                         cfilename = argv[i];
258                         f = fopen(cfilename, "r");
259                         if (f == NULL)
260                                 perror(cfilename);
261                         else
262                                 process(f);
263                 }
264         } else {
265                 cfilename = "stdin";
266                 process(stdin);
267         }
268         exit(0);
269 }
270
271 static void
272 usage()
273 {
274         fprintf(stderr,
275         "usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n");
276         exit(1);
277 }
278
279 void
280 process(f)
281 FILE *f;
282 {
283         register int i, n;
284         char mac[5];    /* The current macro or nroff command */
285         int pl;
286
287         stktop = -1;
288         for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
289                 if (line[0] == '.') {
290                         /*
291                          * find and isolate the macro/command name.
292                          */
293                         strncpy(mac, line+1, 4);
294                         if (isspace(mac[0])) {
295                                 pe(lineno);
296                                 printf("Empty command\n");
297                         } else if (isspace(mac[1])) {
298                                 mac[1] = 0;
299                         } else if (isspace(mac[2])) {
300                                 mac[2] = 0;
301                         } else if (mac[0] != '\\' || mac[1] != '\"') {
302                                 pe(lineno);
303                                 printf("Command too long\n");
304                         }
305
306                         /*
307                          * Is it a known command?
308                          */
309                         checkknown(mac);
310
311                         /*
312                          * Should we add it?
313                          */
314                         if (eq(mac, "de"))
315                                 addcmd(line);
316
317                         chkcmd(line, mac);
318                 }
319
320                 /*
321                  * At this point we process the line looking
322                  * for \s and \f.
323                  */
324                 for (i=0; line[i]; i++)
325                         if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
326                                 if (!sflag && line[++i]=='s') {
327                                         pl = line[++i];
328                                         if (isdigit(pl)) {
329                                                 n = pl - '0';
330                                                 pl = ' ';
331                                         } else
332                                                 n = 0;
333                                         while (isdigit(line[++i]))
334                                                 n = 10 * n + line[i] - '0';
335                                         i--;
336                                         if (n == 0) {
337                                                 if (stk[stktop].opno == SZ) {
338                                                         stktop--;
339                                                 } else {
340                                                         pe(lineno);
341                                                         printf("unmatched \\s0\n");
342                                                 }
343                                         } else {
344                                                 stk[++stktop].opno = SZ;
345                                                 stk[stktop].pl = pl;
346                                                 stk[stktop].parm = n;
347                                                 stk[stktop].lno = lineno;
348                                         }
349                                 } else if (!fflag && line[i]=='f') {
350                                         n = line[++i];
351                                         if (n == 'P') {
352                                                 if (stk[stktop].opno == FT) {
353                                                         stktop--;
354                                                 } else {
355                                                         pe(lineno);
356                                                         printf("unmatched \\fP\n");
357                                                 }
358                                         } else {
359                                                 stk[++stktop].opno = FT;
360                                                 stk[stktop].pl = 1;
361                                                 stk[stktop].parm = n;
362                                                 stk[stktop].lno = lineno;
363                                         }
364                                 }
365                         }
366         }
367         /*
368          * We've hit the end and look at all this stuff that hasn't been
369          * matched yet!  Complain, complain.
370          */
371         for (i=stktop; i>=0; i--) {
372                 complain(i);
373         }
374 }
375
376 void
377 complain(i)
378 int i;
379 {
380         pe(stk[i].lno);
381         printf("Unmatched ");
382         prop(i);
383         printf("\n");
384 }
385
386 void
387 prop(i)
388 int i;
389 {
390         if (stk[i].pl == 0)
391                 printf(".%s", br[stk[i].opno].opbr);
392         else switch(stk[i].opno) {
393         case SZ:
394                 printf("\\s%c%d", stk[i].pl, stk[i].parm);
395                 break;
396         case FT:
397                 printf("\\f%c", stk[i].parm);
398                 break;
399         default:
400                 printf("Bug: stk[%d].opno = %d = .%s, .%s",
401                         i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
402         }
403 }
404
405 void
406 chkcmd(line, mac)
407 char *line;
408 char *mac;
409 {
410         register int i;
411
412         /*
413          * Check to see if it matches top of stack.
414          */
415         if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
416                 stktop--;       /* OK. Pop & forget */
417         else {
418                 /* No. Maybe it's an opener */
419                 for (i=0; br[i].opbr; i++) {
420                         if (eq(mac, br[i].opbr)) {
421                                 /* Found. Push it. */
422                                 stktop++;
423                                 stk[stktop].opno = i;
424                                 stk[stktop].pl = 0;
425                                 stk[stktop].parm = 0;
426                                 stk[stktop].lno = lineno;
427                                 break;
428                         }
429                         /*
430                          * Maybe it's an unmatched closer.
431                          * NOTE: this depends on the fact
432                          * that none of the closers can be
433                          * openers too.
434                          */
435                         if (eq(mac, br[i].clbr)) {
436                                 nomatch(mac);
437                                 break;
438                         }
439                 }
440         }
441 }
442
443 void
444 nomatch(mac)
445 char *mac;
446 {
447         register int i, j;
448
449         /*
450          * Look for a match further down on stack
451          * If we find one, it suggests that the stuff in
452          * between is supposed to match itself.
453          */
454         for (j=stktop; j>=0; j--)
455                 if (eq(mac,br[stk[j].opno].clbr)) {
456                         /* Found.  Make a good diagnostic. */
457                         if (j == stktop-2) {
458                                 /*
459                                  * Check for special case \fx..\fR and don't
460                                  * complain.
461                                  */
462                                 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
463                                  && stk[j+2].opno==FT && stk[j+2].parm=='R') {
464                                         stktop = j -1;
465                                         return;
466                                 }
467                                 /*
468                                  * We have two unmatched frobs.  Chances are
469                                  * they were intended to match, so we mention
470                                  * them together.
471                                  */
472                                 pe(stk[j+1].lno);
473                                 prop(j+1);
474                                 printf(" does not match %d: ", stk[j+2].lno);
475                                 prop(j+2);
476                                 printf("\n");
477                         } else for (i=j+1; i <= stktop; i++) {
478                                 complain(i);
479                         }
480                         stktop = j-1;
481                         return;
482                 }
483         /* Didn't find one.  Throw this away. */
484         pe(lineno);
485         printf("Unmatched .%s\n", mac);
486 }
487
488 /* eq: are two strings equal? */
489 int
490 eq(s1, s2)
491 char *s1, *s2;
492 {
493         return (strcmp(s1, s2) == 0);
494 }
495
496 /* print the first part of an error message, given the line number */
497 void
498 pe(lineno)
499 int lineno;
500 {
501         if (nfiles > 1)
502                 printf("%s: ", cfilename);
503         printf("%d: ", lineno);
504 }
505
506 void
507 checkknown(mac)
508 char *mac;
509 {
510
511         if (eq(mac, "."))
512                 return;
513         if (binsrch(mac) >= 0)
514                 return;
515         if (mac[0] == '\\' && mac[1] == '"')    /* comments */
516                 return;
517
518         pe(lineno);
519         printf("Unknown command: .%s\n", mac);
520 }
521
522 /*
523  * We have a .de xx line in "line".  Add xx to the list of known commands.
524  */
525 void
526 addcmd(line)
527 char *line;
528 {
529         char *mac;
530
531         /* grab the macro being defined */
532         mac = line+4;
533         while (isspace(*mac))
534                 mac++;
535         if (*mac == 0) {
536                 pe(lineno);
537                 printf("illegal define: %s\n", line);
538                 return;
539         }
540         mac[2] = 0;
541         if (isspace(mac[1]) || mac[1] == '\\')
542                 mac[1] = 0;
543         if (ncmds >= MAXCMDS) {
544                 printf("Only %d known commands allowed\n", MAXCMDS);
545                 exit(1);
546         }
547         addmac(mac);
548 }
549
550 /*
551  * Add mac to the list.  We should really have some kind of tree
552  * structure here but this is a quick-and-dirty job and I just don't
553  * have time to mess with it.  (I wonder if this will come back to haunt
554  * me someday?)  Anyway, I claim that .de is fairly rare in user
555  * nroff programs, and the register loop below is pretty fast.
556  */
557 void
558 addmac(mac)
559 char *mac;
560 {
561         register char **src, **dest, **loc;
562
563         if (binsrch(mac) >= 0){ /* it's OK to redefine something */
564 #ifdef DEBUG
565                 printf("binsrch(%s) -> already in table\n", mac);
566 #endif DEBUG
567                 return;
568         }
569         /* binsrch sets slot as a side effect */
570 #ifdef DEBUG
571 printf("binsrch(%s) -> %d\n", mac, slot);
572 #endif
573         loc = &knowncmds[slot];
574         src = &knowncmds[ncmds-1];
575         dest = src+1;
576         while (dest > loc)
577                 *dest-- = *src--;
578         *loc = malloc(3);
579         strcpy(*loc, mac);
580         ncmds++;
581 #ifdef DEBUG
582 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
583 #endif
584 }
585
586 /*
587  * Do a binary search in knowncmds for mac.
588  * If found, return the index.  If not, return -1.
589  */
590 int
591 binsrch(mac)
592 char *mac;
593 {
594         register char *p;       /* pointer to current cmd in list */
595         register int d;         /* difference if any */
596         register int mid;       /* mid point in binary search */
597         register int top, bot;  /* boundaries of bin search, inclusive */
598
599         top = ncmds-1;
600         bot = 0;
601         while (top >= bot) {
602                 mid = (top+bot)/2;
603                 p = knowncmds[mid];
604                 d = p[0] - mac[0];
605                 if (d == 0)
606                         d = p[1] - mac[1];
607                 if (d == 0)
608                         return mid;
609                 if (d < 0)
610                         bot = mid + 1;
611                 else
612                         top = mid - 1;
613         }
614         slot = bot;     /* place it would have gone */
615         return -1;
616 }