Merge from vendor branch DIFFUTILS:
[dragonfly.git] / usr.bin / checknr / checknr.c
1 /*
2  * Copyright (c) 1980, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by the University of
16  *      California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * @(#) Copyright (c) 1980, 1993 The Regents of the University of California.  All rights reserved.
34  * @(#)checknr.c        8.1 (Berkeley) 6/6/93
35  *
36  * $DragonFly: src/usr.bin/checknr/checknr.c,v 1.4 2003/11/03 19:31:28 eirikn Exp $
37  */
38
39 /*
40  * checknr: check an nroff/troff input file for matching macro calls.
41  * we also attempt to match size and font changes, but only the embedded
42  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
43  * later but for now think of these restrictions as contributions to
44  * structured typesetting.
45  */
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <ctype.h>
50
51 #define MAXSTK  100     /* Stack size */
52 #define MAXBR   100     /* Max number of bracket pairs known */
53 #define MAXCMDS 500     /* Max number of commands known */
54
55 void addcmd(char *);
56 void addmac(char *);
57 int binsrch(char *);
58 void checkknown(char *);
59 void chkcmd(char *, char *);
60 void complain(int);
61 int eq(char *, char *);
62 void nomatch(char *);
63 void pe(int);
64 void process(FILE *);
65 void prop(int);
66 static void usage(void);
67
68 /*
69  * The stack on which we remember what we've seen so far.
70  */
71 struct stkstr {
72         int opno;       /* number of opening bracket */
73         int pl;         /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
74         int parm;       /* parm to size, font, etc */
75         int lno;        /* line number the thing came in in */
76 } stk[MAXSTK];
77 int stktop;
78
79 /*
80  * The kinds of opening and closing brackets.
81  */
82 struct brstr {
83         char *opbr;
84         char *clbr;
85 } br[MAXBR] = {
86         /* A few bare bones troff commands */
87 #define SZ      0
88         {"sz",  "sz"},  /* also \s */
89 #define FT      1
90         {"ft",  "ft"},  /* also \f */
91         /* the -mm package */
92         {"AL",  "LE"},
93         {"AS",  "AE"},
94         {"BL",  "LE"},
95         {"BS",  "BE"},
96         {"DF",  "DE"},
97         {"DL",  "LE"},
98         {"DS",  "DE"},
99         {"FS",  "FE"},
100         {"ML",  "LE"},
101         {"NS",  "NE"},
102         {"RL",  "LE"},
103         {"VL",  "LE"},
104         /* the -ms package */
105         {"AB",  "AE"},
106         {"BD",  "DE"},
107         {"CD",  "DE"},
108         {"DS",  "DE"},
109         {"FS",  "FE"},
110         {"ID",  "DE"},
111         {"KF",  "KE"},
112         {"KS",  "KE"},
113         {"LD",  "DE"},
114         {"LG",  "NL"},
115         {"QS",  "QE"},
116         {"RS",  "RE"},
117         {"SM",  "NL"},
118         {"XA",  "XE"},
119         {"XS",  "XE"},
120         /* The -me package */
121         {"(b",  ")b"},
122         {"(c",  ")c"},
123         {"(d",  ")d"},
124         {"(f",  ")f"},
125         {"(l",  ")l"},
126         {"(q",  ")q"},
127         {"(x",  ")x"},
128         {"(z",  ")z"},
129         /* Things needed by preprocessors */
130         {"EQ",  "EN"},
131         {"TS",  "TE"},
132         /* Refer */
133         {"[",   "]"},
134         {0,     0}
135 };
136
137 /*
138  * All commands known to nroff, plus macro packages.
139  * Used so we can complain about unrecognized commands.
140  */
141 char *knowncmds[MAXCMDS] = {
142 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
143 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
144 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
145 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
146 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
147 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
148 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
149 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
150 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
151 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
152 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
153 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
154 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
155 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
156 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
157 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
158 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
159 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
160 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
161 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
162 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
163 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
164 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
165 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
166 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
167 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
168 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
169 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
170 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
171 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
172 "yr", 0
173 };
174
175 int     lineno;         /* current line number in input file */
176 char    line[256];      /* the current line */
177 char    *cfilename;     /* name of current file */
178 int     nfiles;         /* number of files to process */
179 int     fflag;          /* -f: ignore \f */
180 int     sflag;          /* -s: ignore \s */
181 int     ncmds;          /* size of knowncmds */
182 int     slot;           /* slot in knowncmds found by binsrch */
183
184 int
185 main(int argc, char **argv)
186 {
187         FILE *f;
188         int i;
189         char *cp;
190         char b1[4];
191
192         /* Figure out how many known commands there are */
193         while (knowncmds[ncmds])
194                 ncmds++;
195         while (argc > 1 && argv[1][0] == '-') {
196                 switch(argv[1][1]) {
197
198                 /* -a: add pairs of macros */
199                 case 'a':
200                         i = strlen(argv[1]) - 2;
201                         if (i % 6 != 0)
202                                 usage();
203                         /* look for empty macro slots */
204                         for (i=0; br[i].opbr; i++)
205                                 ;
206                         for (cp=argv[1]+3; cp[-1]; cp += 6) {
207                                 br[i].opbr = malloc(3);
208                                 strncpy(br[i].opbr, cp, 2);
209                                 br[i].clbr = malloc(3);
210                                 strncpy(br[i].clbr, cp+3, 2);
211                                 addmac(br[i].opbr);     /* knows pairs are also known cmds */
212                                 addmac(br[i].clbr);
213                                 i++;
214                         }
215                         break;
216
217                 /* -c: add known commands */
218                 case 'c':
219                         i = strlen(argv[1]) - 2;
220                         if (i % 3 != 0)
221                                 usage();
222                         for (cp=argv[1]+3; cp[-1]; cp += 3) {
223                                 if (cp[2] && cp[2] != '.')
224                                         usage();
225                                 strncpy(b1, cp, 2);
226                                 b1[2] = '\0';
227                                 addmac(b1);
228                         }
229                         break;
230
231                 /* -f: ignore font changes */
232                 case 'f':
233                         fflag = 1;
234                         break;
235
236                 /* -s: ignore size changes */
237                 case 's':
238                         sflag = 1;
239                         break;
240                 default:
241                         usage();
242                 }
243                 argc--; argv++;
244         }
245
246         nfiles = argc - 1;
247
248         if (nfiles > 0) {
249                 for (i=1; i<argc; i++) {
250                         cfilename = argv[i];
251                         f = fopen(cfilename, "r");
252                         if (f == NULL)
253                                 perror(cfilename);
254                         else
255                                 process(f);
256                 }
257         } else {
258                 cfilename = "stdin";
259                 process(stdin);
260         }
261         exit(0);
262 }
263
264 static void
265 usage(void)
266 {
267         fprintf(stderr,
268         "usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n");
269         exit(1);
270 }
271
272 void
273 process(FILE *f)
274 {
275         register int i, n;
276         char mac[5];    /* The current macro or nroff command */
277         int pl;
278
279         stktop = -1;
280         for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
281                 if (line[0] == '.') {
282                         /*
283                          * find and isolate the macro/command name.
284                          */
285                         strncpy(mac, line+1, 4);
286                         if (isspace(mac[0])) {
287                                 pe(lineno);
288                                 printf("Empty command\n");
289                         } else if (isspace(mac[1])) {
290                                 mac[1] = 0;
291                         } else if (isspace(mac[2])) {
292                                 mac[2] = 0;
293                         } else if (mac[0] != '\\' || mac[1] != '\"') {
294                                 pe(lineno);
295                                 printf("Command too long\n");
296                         }
297
298                         /*
299                          * Is it a known command?
300                          */
301                         checkknown(mac);
302
303                         /*
304                          * Should we add it?
305                          */
306                         if (eq(mac, "de"))
307                                 addcmd(line);
308
309                         chkcmd(line, mac);
310                 }
311
312                 /*
313                  * At this point we process the line looking
314                  * for \s and \f.
315                  */
316                 for (i=0; line[i]; i++)
317                         if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
318                                 if (!sflag && line[++i]=='s') {
319                                         pl = line[++i];
320                                         if (isdigit(pl)) {
321                                                 n = pl - '0';
322                                                 pl = ' ';
323                                         } else
324                                                 n = 0;
325                                         while (isdigit(line[++i]))
326                                                 n = 10 * n + line[i] - '0';
327                                         i--;
328                                         if (n == 0) {
329                                                 if (stk[stktop].opno == SZ) {
330                                                         stktop--;
331                                                 } else {
332                                                         pe(lineno);
333                                                         printf("unmatched \\s0\n");
334                                                 }
335                                         } else {
336                                                 stk[++stktop].opno = SZ;
337                                                 stk[stktop].pl = pl;
338                                                 stk[stktop].parm = n;
339                                                 stk[stktop].lno = lineno;
340                                         }
341                                 } else if (!fflag && line[i]=='f') {
342                                         n = line[++i];
343                                         if (n == 'P') {
344                                                 if (stk[stktop].opno == FT) {
345                                                         stktop--;
346                                                 } else {
347                                                         pe(lineno);
348                                                         printf("unmatched \\fP\n");
349                                                 }
350                                         } else {
351                                                 stk[++stktop].opno = FT;
352                                                 stk[stktop].pl = 1;
353                                                 stk[stktop].parm = n;
354                                                 stk[stktop].lno = lineno;
355                                         }
356                                 }
357                         }
358         }
359         /*
360          * We've hit the end and look at all this stuff that hasn't been
361          * matched yet!  Complain, complain.
362          */
363         for (i=stktop; i>=0; i--) {
364                 complain(i);
365         }
366 }
367
368 void
369 complain(int i)
370 {
371         pe(stk[i].lno);
372         printf("Unmatched ");
373         prop(i);
374         printf("\n");
375 }
376
377 void
378 prop(int i)
379 {
380         if (stk[i].pl == 0)
381                 printf(".%s", br[stk[i].opno].opbr);
382         else switch(stk[i].opno) {
383         case SZ:
384                 printf("\\s%c%d", stk[i].pl, stk[i].parm);
385                 break;
386         case FT:
387                 printf("\\f%c", stk[i].parm);
388                 break;
389         default:
390                 printf("Bug: stk[%d].opno = %d = .%s, .%s",
391                         i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
392         }
393 }
394
395 void
396 chkcmd(char *line, char *mac)
397 {
398         register int i;
399
400         /*
401          * Check to see if it matches top of stack.
402          */
403         if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
404                 stktop--;       /* OK. Pop & forget */
405         else {
406                 /* No. Maybe it's an opener */
407                 for (i=0; br[i].opbr; i++) {
408                         if (eq(mac, br[i].opbr)) {
409                                 /* Found. Push it. */
410                                 stktop++;
411                                 stk[stktop].opno = i;
412                                 stk[stktop].pl = 0;
413                                 stk[stktop].parm = 0;
414                                 stk[stktop].lno = lineno;
415                                 break;
416                         }
417                         /*
418                          * Maybe it's an unmatched closer.
419                          * NOTE: this depends on the fact
420                          * that none of the closers can be
421                          * openers too.
422                          */
423                         if (eq(mac, br[i].clbr)) {
424                                 nomatch(mac);
425                                 break;
426                         }
427                 }
428         }
429 }
430
431 void
432 nomatch(char *mac)
433 {
434         register int i, j;
435
436         /*
437          * Look for a match further down on stack
438          * If we find one, it suggests that the stuff in
439          * between is supposed to match itself.
440          */
441         for (j=stktop; j>=0; j--)
442                 if (eq(mac,br[stk[j].opno].clbr)) {
443                         /* Found.  Make a good diagnostic. */
444                         if (j == stktop-2) {
445                                 /*
446                                  * Check for special case \fx..\fR and don't
447                                  * complain.
448                                  */
449                                 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
450                                  && stk[j+2].opno==FT && stk[j+2].parm=='R') {
451                                         stktop = j -1;
452                                         return;
453                                 }
454                                 /*
455                                  * We have two unmatched frobs.  Chances are
456                                  * they were intended to match, so we mention
457                                  * them together.
458                                  */
459                                 pe(stk[j+1].lno);
460                                 prop(j+1);
461                                 printf(" does not match %d: ", stk[j+2].lno);
462                                 prop(j+2);
463                                 printf("\n");
464                         } else for (i=j+1; i <= stktop; i++) {
465                                 complain(i);
466                         }
467                         stktop = j-1;
468                         return;
469                 }
470         /* Didn't find one.  Throw this away. */
471         pe(lineno);
472         printf("Unmatched .%s\n", mac);
473 }
474
475 /* eq: are two strings equal? */
476 int
477 eq(char *s1, char *s2)
478 {
479         return (strcmp(s1, s2) == 0);
480 }
481
482 /* print the first part of an error message, given the line number */
483 void
484 pe(int lineno)
485 {
486         if (nfiles > 1)
487                 printf("%s: ", cfilename);
488         printf("%d: ", lineno);
489 }
490
491 void
492 checkknown(char *mac)
493 {
494
495         if (eq(mac, "."))
496                 return;
497         if (binsrch(mac) >= 0)
498                 return;
499         if (mac[0] == '\\' && mac[1] == '"')    /* comments */
500                 return;
501
502         pe(lineno);
503         printf("Unknown command: .%s\n", mac);
504 }
505
506 /*
507  * We have a .de xx line in "line".  Add xx to the list of known commands.
508  */
509 void
510 addcmd(char *line)
511 {
512         char *mac;
513
514         /* grab the macro being defined */
515         mac = line+4;
516         while (isspace(*mac))
517                 mac++;
518         if (*mac == 0) {
519                 pe(lineno);
520                 printf("illegal define: %s\n", line);
521                 return;
522         }
523         mac[2] = 0;
524         if (isspace(mac[1]) || mac[1] == '\\')
525                 mac[1] = 0;
526         if (ncmds >= MAXCMDS) {
527                 printf("Only %d known commands allowed\n", MAXCMDS);
528                 exit(1);
529         }
530         addmac(mac);
531 }
532
533 /*
534  * Add mac to the list.  We should really have some kind of tree
535  * structure here but this is a quick-and-dirty job and I just don't
536  * have time to mess with it.  (I wonder if this will come back to haunt
537  * me someday?)  Anyway, I claim that .de is fairly rare in user
538  * nroff programs, and the register loop below is pretty fast.
539  */
540 void
541 addmac(char *mac)
542 {
543         register char **src, **dest, **loc;
544
545         if (binsrch(mac) >= 0){ /* it's OK to redefine something */
546 #ifdef DEBUG
547                 printf("binsrch(%s) -> already in table\n", mac);
548 #endif DEBUG
549                 return;
550         }
551         /* binsrch sets slot as a side effect */
552 #ifdef DEBUG
553 printf("binsrch(%s) -> %d\n", mac, slot);
554 #endif
555         loc = &knowncmds[slot];
556         src = &knowncmds[ncmds-1];
557         dest = src+1;
558         while (dest > loc)
559                 *dest-- = *src--;
560         *loc = malloc(3);
561         strcpy(*loc, mac);
562         ncmds++;
563 #ifdef DEBUG
564 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
565 #endif
566 }
567
568 /*
569  * Do a binary search in knowncmds for mac.
570  * If found, return the index.  If not, return -1.
571  */
572 int
573 binsrch(char *mac)
574 {
575         register char *p;       /* pointer to current cmd in list */
576         register int d;         /* difference if any */
577         register int mid;       /* mid point in binary search */
578         register int top, bot;  /* boundaries of bin search, inclusive */
579
580         top = ncmds-1;
581         bot = 0;
582         while (top >= bot) {
583                 mid = (top+bot)/2;
584                 p = knowncmds[mid];
585                 d = p[0] - mac[0];
586                 if (d == 0)
587                         d = p[1] - mac[1];
588                 if (d == 0)
589                         return mid;
590                 if (d < 0)
591                         bot = mid + 1;
592                 else
593                         top = mid - 1;
594         }
595         slot = bot;     /* place it would have gone */
596         return -1;
597 }