Add the DragonFly cvs id and perform general cleanups on cvs/rcs/sccs ids. Most
[dragonfly.git] / usr.bin / checknr / checknr.c
1 /*
2  * Copyright (c) 1980, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by the University of
16  *      California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * @(#) Copyright (c) 1980, 1993 The Regents of the University of California.  All rights reserved.
34  * @(#)checknr.c        8.1 (Berkeley) 6/6/93
35  */
36
37 /*
38  * checknr: check an nroff/troff input file for matching macro calls.
39  * we also attempt to match size and font changes, but only the embedded
40  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
41  * later but for now think of these restrictions as contributions to
42  * structured typesetting.
43  */
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <ctype.h>
48
49 #define MAXSTK  100     /* Stack size */
50 #define MAXBR   100     /* Max number of bracket pairs known */
51 #define MAXCMDS 500     /* Max number of commands known */
52
53 void addcmd __P((char *));
54 void addmac __P((char *));
55 int binsrch __P((char *));
56 void checkknown __P((char *));
57 void chkcmd __P((char *, char *));
58 void complain __P((int));
59 int eq __P((char *, char *));
60 void nomatch __P((char *));
61 void pe __P((int));
62 void process __P((FILE *));
63 void prop __P((int));
64 static void usage __P((void));
65
66 /*
67  * The stack on which we remember what we've seen so far.
68  */
69 struct stkstr {
70         int opno;       /* number of opening bracket */
71         int pl;         /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
72         int parm;       /* parm to size, font, etc */
73         int lno;        /* line number the thing came in in */
74 } stk[MAXSTK];
75 int stktop;
76
77 /*
78  * The kinds of opening and closing brackets.
79  */
80 struct brstr {
81         char *opbr;
82         char *clbr;
83 } br[MAXBR] = {
84         /* A few bare bones troff commands */
85 #define SZ      0
86         {"sz",  "sz"},  /* also \s */
87 #define FT      1
88         {"ft",  "ft"},  /* also \f */
89         /* the -mm package */
90         {"AL",  "LE"},
91         {"AS",  "AE"},
92         {"BL",  "LE"},
93         {"BS",  "BE"},
94         {"DF",  "DE"},
95         {"DL",  "LE"},
96         {"DS",  "DE"},
97         {"FS",  "FE"},
98         {"ML",  "LE"},
99         {"NS",  "NE"},
100         {"RL",  "LE"},
101         {"VL",  "LE"},
102         /* the -ms package */
103         {"AB",  "AE"},
104         {"BD",  "DE"},
105         {"CD",  "DE"},
106         {"DS",  "DE"},
107         {"FS",  "FE"},
108         {"ID",  "DE"},
109         {"KF",  "KE"},
110         {"KS",  "KE"},
111         {"LD",  "DE"},
112         {"LG",  "NL"},
113         {"QS",  "QE"},
114         {"RS",  "RE"},
115         {"SM",  "NL"},
116         {"XA",  "XE"},
117         {"XS",  "XE"},
118         /* The -me package */
119         {"(b",  ")b"},
120         {"(c",  ")c"},
121         {"(d",  ")d"},
122         {"(f",  ")f"},
123         {"(l",  ")l"},
124         {"(q",  ")q"},
125         {"(x",  ")x"},
126         {"(z",  ")z"},
127         /* Things needed by preprocessors */
128         {"EQ",  "EN"},
129         {"TS",  "TE"},
130         /* Refer */
131         {"[",   "]"},
132         {0,     0}
133 };
134
135 /*
136  * All commands known to nroff, plus macro packages.
137  * Used so we can complain about unrecognized commands.
138  */
139 char *knowncmds[MAXCMDS] = {
140 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
141 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
142 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
143 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
144 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
145 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
146 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
147 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
148 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
149 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
150 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
151 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
152 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
153 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
154 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
155 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
156 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
157 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
158 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
159 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
160 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
161 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
162 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
163 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
164 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
165 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
166 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
167 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
168 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
169 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
170 "yr", 0
171 };
172
173 int     lineno;         /* current line number in input file */
174 char    line[256];      /* the current line */
175 char    *cfilename;     /* name of current file */
176 int     nfiles;         /* number of files to process */
177 int     fflag;          /* -f: ignore \f */
178 int     sflag;          /* -s: ignore \s */
179 int     ncmds;          /* size of knowncmds */
180 int     slot;           /* slot in knowncmds found by binsrch */
181
182 int
183 main(argc, argv)
184 int argc;
185 char **argv;
186 {
187         FILE *f;
188         int i;
189         char *cp;
190         char b1[4];
191
192         /* Figure out how many known commands there are */
193         while (knowncmds[ncmds])
194                 ncmds++;
195         while (argc > 1 && argv[1][0] == '-') {
196                 switch(argv[1][1]) {
197
198                 /* -a: add pairs of macros */
199                 case 'a':
200                         i = strlen(argv[1]) - 2;
201                         if (i % 6 != 0)
202                                 usage();
203                         /* look for empty macro slots */
204                         for (i=0; br[i].opbr; i++)
205                                 ;
206                         for (cp=argv[1]+3; cp[-1]; cp += 6) {
207                                 br[i].opbr = malloc(3);
208                                 strncpy(br[i].opbr, cp, 2);
209                                 br[i].clbr = malloc(3);
210                                 strncpy(br[i].clbr, cp+3, 2);
211                                 addmac(br[i].opbr);     /* knows pairs are also known cmds */
212                                 addmac(br[i].clbr);
213                                 i++;
214                         }
215                         break;
216
217                 /* -c: add known commands */
218                 case 'c':
219                         i = strlen(argv[1]) - 2;
220                         if (i % 3 != 0)
221                                 usage();
222                         for (cp=argv[1]+3; cp[-1]; cp += 3) {
223                                 if (cp[2] && cp[2] != '.')
224                                         usage();
225                                 strncpy(b1, cp, 2);
226                                 b1[2] = '\0';
227                                 addmac(b1);
228                         }
229                         break;
230
231                 /* -f: ignore font changes */
232                 case 'f':
233                         fflag = 1;
234                         break;
235
236                 /* -s: ignore size changes */
237                 case 's':
238                         sflag = 1;
239                         break;
240                 default:
241                         usage();
242                 }
243                 argc--; argv++;
244         }
245
246         nfiles = argc - 1;
247
248         if (nfiles > 0) {
249                 for (i=1; i<argc; i++) {
250                         cfilename = argv[i];
251                         f = fopen(cfilename, "r");
252                         if (f == NULL)
253                                 perror(cfilename);
254                         else
255                                 process(f);
256                 }
257         } else {
258                 cfilename = "stdin";
259                 process(stdin);
260         }
261         exit(0);
262 }
263
264 static void
265 usage()
266 {
267         fprintf(stderr,
268         "usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n");
269         exit(1);
270 }
271
272 void
273 process(f)
274 FILE *f;
275 {
276         register int i, n;
277         char mac[5];    /* The current macro or nroff command */
278         int pl;
279
280         stktop = -1;
281         for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
282                 if (line[0] == '.') {
283                         /*
284                          * find and isolate the macro/command name.
285                          */
286                         strncpy(mac, line+1, 4);
287                         if (isspace(mac[0])) {
288                                 pe(lineno);
289                                 printf("Empty command\n");
290                         } else if (isspace(mac[1])) {
291                                 mac[1] = 0;
292                         } else if (isspace(mac[2])) {
293                                 mac[2] = 0;
294                         } else if (mac[0] != '\\' || mac[1] != '\"') {
295                                 pe(lineno);
296                                 printf("Command too long\n");
297                         }
298
299                         /*
300                          * Is it a known command?
301                          */
302                         checkknown(mac);
303
304                         /*
305                          * Should we add it?
306                          */
307                         if (eq(mac, "de"))
308                                 addcmd(line);
309
310                         chkcmd(line, mac);
311                 }
312
313                 /*
314                  * At this point we process the line looking
315                  * for \s and \f.
316                  */
317                 for (i=0; line[i]; i++)
318                         if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
319                                 if (!sflag && line[++i]=='s') {
320                                         pl = line[++i];
321                                         if (isdigit(pl)) {
322                                                 n = pl - '0';
323                                                 pl = ' ';
324                                         } else
325                                                 n = 0;
326                                         while (isdigit(line[++i]))
327                                                 n = 10 * n + line[i] - '0';
328                                         i--;
329                                         if (n == 0) {
330                                                 if (stk[stktop].opno == SZ) {
331                                                         stktop--;
332                                                 } else {
333                                                         pe(lineno);
334                                                         printf("unmatched \\s0\n");
335                                                 }
336                                         } else {
337                                                 stk[++stktop].opno = SZ;
338                                                 stk[stktop].pl = pl;
339                                                 stk[stktop].parm = n;
340                                                 stk[stktop].lno = lineno;
341                                         }
342                                 } else if (!fflag && line[i]=='f') {
343                                         n = line[++i];
344                                         if (n == 'P') {
345                                                 if (stk[stktop].opno == FT) {
346                                                         stktop--;
347                                                 } else {
348                                                         pe(lineno);
349                                                         printf("unmatched \\fP\n");
350                                                 }
351                                         } else {
352                                                 stk[++stktop].opno = FT;
353                                                 stk[stktop].pl = 1;
354                                                 stk[stktop].parm = n;
355                                                 stk[stktop].lno = lineno;
356                                         }
357                                 }
358                         }
359         }
360         /*
361          * We've hit the end and look at all this stuff that hasn't been
362          * matched yet!  Complain, complain.
363          */
364         for (i=stktop; i>=0; i--) {
365                 complain(i);
366         }
367 }
368
369 void
370 complain(i)
371 int i;
372 {
373         pe(stk[i].lno);
374         printf("Unmatched ");
375         prop(i);
376         printf("\n");
377 }
378
379 void
380 prop(i)
381 int i;
382 {
383         if (stk[i].pl == 0)
384                 printf(".%s", br[stk[i].opno].opbr);
385         else switch(stk[i].opno) {
386         case SZ:
387                 printf("\\s%c%d", stk[i].pl, stk[i].parm);
388                 break;
389         case FT:
390                 printf("\\f%c", stk[i].parm);
391                 break;
392         default:
393                 printf("Bug: stk[%d].opno = %d = .%s, .%s",
394                         i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
395         }
396 }
397
398 void
399 chkcmd(line, mac)
400 char *line;
401 char *mac;
402 {
403         register int i;
404
405         /*
406          * Check to see if it matches top of stack.
407          */
408         if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
409                 stktop--;       /* OK. Pop & forget */
410         else {
411                 /* No. Maybe it's an opener */
412                 for (i=0; br[i].opbr; i++) {
413                         if (eq(mac, br[i].opbr)) {
414                                 /* Found. Push it. */
415                                 stktop++;
416                                 stk[stktop].opno = i;
417                                 stk[stktop].pl = 0;
418                                 stk[stktop].parm = 0;
419                                 stk[stktop].lno = lineno;
420                                 break;
421                         }
422                         /*
423                          * Maybe it's an unmatched closer.
424                          * NOTE: this depends on the fact
425                          * that none of the closers can be
426                          * openers too.
427                          */
428                         if (eq(mac, br[i].clbr)) {
429                                 nomatch(mac);
430                                 break;
431                         }
432                 }
433         }
434 }
435
436 void
437 nomatch(mac)
438 char *mac;
439 {
440         register int i, j;
441
442         /*
443          * Look for a match further down on stack
444          * If we find one, it suggests that the stuff in
445          * between is supposed to match itself.
446          */
447         for (j=stktop; j>=0; j--)
448                 if (eq(mac,br[stk[j].opno].clbr)) {
449                         /* Found.  Make a good diagnostic. */
450                         if (j == stktop-2) {
451                                 /*
452                                  * Check for special case \fx..\fR and don't
453                                  * complain.
454                                  */
455                                 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
456                                  && stk[j+2].opno==FT && stk[j+2].parm=='R') {
457                                         stktop = j -1;
458                                         return;
459                                 }
460                                 /*
461                                  * We have two unmatched frobs.  Chances are
462                                  * they were intended to match, so we mention
463                                  * them together.
464                                  */
465                                 pe(stk[j+1].lno);
466                                 prop(j+1);
467                                 printf(" does not match %d: ", stk[j+2].lno);
468                                 prop(j+2);
469                                 printf("\n");
470                         } else for (i=j+1; i <= stktop; i++) {
471                                 complain(i);
472                         }
473                         stktop = j-1;
474                         return;
475                 }
476         /* Didn't find one.  Throw this away. */
477         pe(lineno);
478         printf("Unmatched .%s\n", mac);
479 }
480
481 /* eq: are two strings equal? */
482 int
483 eq(s1, s2)
484 char *s1, *s2;
485 {
486         return (strcmp(s1, s2) == 0);
487 }
488
489 /* print the first part of an error message, given the line number */
490 void
491 pe(lineno)
492 int lineno;
493 {
494         if (nfiles > 1)
495                 printf("%s: ", cfilename);
496         printf("%d: ", lineno);
497 }
498
499 void
500 checkknown(mac)
501 char *mac;
502 {
503
504         if (eq(mac, "."))
505                 return;
506         if (binsrch(mac) >= 0)
507                 return;
508         if (mac[0] == '\\' && mac[1] == '"')    /* comments */
509                 return;
510
511         pe(lineno);
512         printf("Unknown command: .%s\n", mac);
513 }
514
515 /*
516  * We have a .de xx line in "line".  Add xx to the list of known commands.
517  */
518 void
519 addcmd(line)
520 char *line;
521 {
522         char *mac;
523
524         /* grab the macro being defined */
525         mac = line+4;
526         while (isspace(*mac))
527                 mac++;
528         if (*mac == 0) {
529                 pe(lineno);
530                 printf("illegal define: %s\n", line);
531                 return;
532         }
533         mac[2] = 0;
534         if (isspace(mac[1]) || mac[1] == '\\')
535                 mac[1] = 0;
536         if (ncmds >= MAXCMDS) {
537                 printf("Only %d known commands allowed\n", MAXCMDS);
538                 exit(1);
539         }
540         addmac(mac);
541 }
542
543 /*
544  * Add mac to the list.  We should really have some kind of tree
545  * structure here but this is a quick-and-dirty job and I just don't
546  * have time to mess with it.  (I wonder if this will come back to haunt
547  * me someday?)  Anyway, I claim that .de is fairly rare in user
548  * nroff programs, and the register loop below is pretty fast.
549  */
550 void
551 addmac(mac)
552 char *mac;
553 {
554         register char **src, **dest, **loc;
555
556         if (binsrch(mac) >= 0){ /* it's OK to redefine something */
557 #ifdef DEBUG
558                 printf("binsrch(%s) -> already in table\n", mac);
559 #endif DEBUG
560                 return;
561         }
562         /* binsrch sets slot as a side effect */
563 #ifdef DEBUG
564 printf("binsrch(%s) -> %d\n", mac, slot);
565 #endif
566         loc = &knowncmds[slot];
567         src = &knowncmds[ncmds-1];
568         dest = src+1;
569         while (dest > loc)
570                 *dest-- = *src--;
571         *loc = malloc(3);
572         strcpy(*loc, mac);
573         ncmds++;
574 #ifdef DEBUG
575 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
576 #endif
577 }
578
579 /*
580  * Do a binary search in knowncmds for mac.
581  * If found, return the index.  If not, return -1.
582  */
583 int
584 binsrch(mac)
585 char *mac;
586 {
587         register char *p;       /* pointer to current cmd in list */
588         register int d;         /* difference if any */
589         register int mid;       /* mid point in binary search */
590         register int top, bot;  /* boundaries of bin search, inclusive */
591
592         top = ncmds-1;
593         bot = 0;
594         while (top >= bot) {
595                 mid = (top+bot)/2;
596                 p = knowncmds[mid];
597                 d = p[0] - mac[0];
598                 if (d == 0)
599                         d = p[1] - mac[1];
600                 if (d == 0)
601                         return mid;
602                 if (d < 0)
603                         bot = mid + 1;
604                 else
605                         top = mid - 1;
606         }
607         slot = bot;     /* place it would have gone */
608         return -1;
609 }