Initial import of binutils 2.22 on the new vendor branch
[dragonfly.git] / usr.bin / checknr / checknr.c
1 /*
2  * Copyright (c) 1980, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by the University of
16  *      California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * @(#) Copyright (c) 1980, 1993 The Regents of the University of California.  All rights reserved.
34  * @(#)checknr.c        8.1 (Berkeley) 6/6/93
35  *
36  * $DragonFly: src/usr.bin/checknr/checknr.c,v 1.13 2008/11/11 01:02:40 pavalos Exp $
37  */
38
39 #include <err.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <ctype.h>
44
45 #define MAXSTK  100     /* Stack size */
46 #define MAXBR   100     /* Max number of bracket pairs known */
47 #define MAXCMDS 500     /* Max number of commands known */
48
49 static void     addcmd(char *);
50 static void     addmac(const char *);
51 static int      binsrch(const char *, int *);
52 static void     checkknown(const char *);
53 static void     chkcmd(const char *);
54 static void     complain(int);
55 static int      eq(const char *, const char *);
56 static void     nomatch(const char *);
57 static void     pe(int);
58 static void     process(FILE *);
59 static void     prop(int);
60 static void     usage(void);
61
62 /*
63  * The stack on which we remember what we've seen so far.
64  */
65 struct stkstr {
66         int opno;       /* number of opening bracket */
67         int pl;         /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
68         int parm;       /* parm to size, font, etc */
69         int lno;        /* line number the thing came in on */
70 } stk[MAXSTK];
71 int stktop;
72
73 /*
74  * The kinds of opening and closing brackets.
75  */
76 struct brstr {
77         char opbr[3];
78         char clbr[3];
79 } br[MAXBR] = {
80         /* A few bare bones troff commands */
81 #define SZ      0
82         {"sz",  "sz"},  /* also \s */
83 #define FT      1
84         {"ft",  "ft"},  /* also \f */
85         /* the -mm package */
86         {"AL",  "LE"},
87         {"AS",  "AE"},
88         {"BL",  "LE"},
89         {"BS",  "BE"},
90         {"DF",  "DE"},
91         {"DL",  "LE"},
92         {"DS",  "DE"},
93         {"FS",  "FE"},
94         {"ML",  "LE"},
95         {"NS",  "NE"},
96         {"RL",  "LE"},
97         {"VL",  "LE"},
98         /* the -ms package */
99         {"AB",  "AE"},
100         {"BD",  "DE"},
101         {"CD",  "DE"},
102         {"DS",  "DE"},
103         {"FS",  "FE"},
104         {"ID",  "DE"},
105         {"KF",  "KE"},
106         {"KS",  "KE"},
107         {"LD",  "DE"},
108         {"LG",  "NL"},
109         {"QS",  "QE"},
110         {"RS",  "RE"},
111         {"SM",  "NL"},
112         {"XA",  "XE"},
113         {"XS",  "XE"},
114         /* The -me package */
115         {"(b",  ")b"},
116         {"(c",  ")c"},
117         {"(d",  ")d"},
118         {"(f",  ")f"},
119         {"(l",  ")l"},
120         {"(q",  ")q"},
121         {"(x",  ")x"},
122         {"(z",  ")z"},
123         /* Things needed by preprocessors */
124         {"EQ",  "EN"},
125         {"TS",  "TE"},
126         /* Refer */
127         {"[",   "]"}
128 };
129
130 /*
131  * All commands known to nroff, plus macro packages.
132  * Used so we can complain about unrecognized commands.
133  */
134 char knowncmds[MAXCMDS][3] = {
135 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
136 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
137 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
138 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
139 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
140 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
141 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
142 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
143 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
144 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
145 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
146 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
147 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
148 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
149 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
150 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
151 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
152 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
153 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
154 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
155 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
156 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
157 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
158 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
159 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
160 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
161 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
162 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
163 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
164 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
165 "yr"
166 };
167
168 int     lineno;         /* current line number in input file */
169 char    line[256];      /* the current line */
170 const char *cfilename;  /* name of current file */
171 int     nfiles;         /* number of files to process */
172 int     fflag;          /* -f: ignore \f */
173 int     sflag;          /* -s: ignore \s */
174 int     ncmds;          /* size of knowncmds */
175
176 /*
177  * checknr: check an nroff/troff input file for matching macro calls.
178  * we also attempt to match size and font changes, but only the embedded
179  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
180  * later but for now think of these restrictions as contributions to
181  * structured typesetting.
182  */
183 int
184 main(int argc, char **argv)
185 {
186         FILE *f;
187         int i;
188         char *cp;
189         char b1[4];
190
191         /* Figure out how many known commands there are */
192         ncmds = 0;
193         while (ncmds < MAXCMDS && knowncmds[ncmds][0] != '\0')
194                 ncmds++;
195         while (argc > 1 && argv[1][0] == '-') {
196                 switch(argv[1][1]) {
197
198                 /* -a: add pairs of macros */
199                 case 'a':
200                         if ((strlen(argv[1]) - 2) % 6 != 0)
201                                 usage();
202                         /* look for empty macro slots */
203                         i = 0;
204                         while (i < MAXBR && br[i].opbr[0] != '\0')
205                                 i++;
206                         if (i >= MAXBR) {
207                                 errx(1, "Only %d known macro-pairs allowed",
208                                     MAXBR);
209                         }
210                         for (cp = argv[1] + 3; cp[-1]; cp += 6) {
211                                 strncpy(br[i].opbr, cp, 2);
212                                 strncpy(br[i].clbr, cp + 3, 2);
213                                 /*
214                                  * known pairs are also known cmds
215                                  */
216                                 addmac(br[i].opbr);
217                                 addmac(br[i].clbr);
218                                 i++;
219                         }
220                         break;
221
222                 /* -c: add known commands */
223                 case 'c':
224                         i = strlen(argv[1]) - 2;
225                         if (i % 3 != 0)
226                                 usage();
227                         for (cp = argv[1] + 3; cp[-1]; cp += 3) {
228                                 if (cp[2] && cp[2] != '.')
229                                         usage();
230                                 strncpy(b1, cp, 2);
231                                 b1[2] = '\0';
232                                 addmac(b1);
233                         }
234                         break;
235
236                 /* -f: ignore font changes */
237                 case 'f':
238                         fflag = 1;
239                         break;
240
241                 /* -s: ignore size changes */
242                 case 's':
243                         sflag = 1;
244                         break;
245                 default:
246                         usage();
247                 }
248                 argc--; argv++;
249         }
250
251         nfiles = argc - 1;
252
253         if (nfiles > 0) {
254                 for (i = 1; i < argc; i++) {
255                         cfilename = argv[i];
256                         f = fopen(cfilename, "r");
257                         if (f == NULL)
258                                 warn("%s", cfilename);
259                         else {
260                                 process(f);
261                                 fclose(f);
262                         }
263                 }
264         } else {
265                 cfilename = "stdin";
266                 process(stdin);
267         }
268         exit(0);
269 }
270
271 static void
272 usage(void)
273 {
274         fprintf(stderr,
275             "usage: checknr [-fs] [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] "
276             "file\n");
277         exit(1);
278 }
279
280 static void
281 process(FILE *f)
282 {
283         int i, n;
284         char mac[5];    /* The current macro or nroff command */
285         int pl;
286
287         stktop = -1;
288         for (lineno = 1; fgets(line, sizeof(line), f); lineno++) {
289                 if (line[0] == '.') {
290                         /*
291                          * find and isolate the macro/command name.
292                          */
293                         strncpy(mac, line + 1, 4);
294                         if (isspace(mac[0])) {
295                                 pe(lineno);
296                                 printf("Empty command\n");
297                         } else if (isspace(mac[1])) {
298                                 mac[1] = 0;
299                         } else if (isspace(mac[2])) {
300                                 mac[2] = 0;
301                         } else if (mac[0] != '\\' || mac[1] != '\"') {
302                                 pe(lineno);
303                                 printf("Command too long\n");
304                         }
305
306                         /*
307                          * Is it a known command?
308                          */
309                         checkknown(mac);
310
311                         /*
312                          * Should we add it?
313                          */
314                         if (eq(mac, "de"))
315                                 addcmd(line);
316
317                         chkcmd(mac);
318                 }
319
320                 /*
321                  * At this point we process the line looking
322                  * for \s and \f.
323                  */
324                 for (i = 0; line[i]; i++) {
325                         if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
326                                 if (!sflag && line[++i] == 's') {
327                                         pl = line[++i];
328                                         if (isdigit(pl)) {
329                                                 n = pl - '0';
330                                                 pl = ' ';
331                                         } else
332                                                 n = 0;
333                                         while (isdigit(line[++i]))
334                                                 n = 10 * n + line[i] - '0';
335                                         i--;
336                                         if (n == 0) {
337                                                 if (stk[stktop].opno == SZ) {
338                                                         stktop--;
339                                                 } else {
340                                                         pe(lineno);
341                                                         printf("unmatched \\s0\n");
342                                                 }
343                                         } else {
344                                                 stk[++stktop].opno = SZ;
345                                                 stk[stktop].pl = pl;
346                                                 stk[stktop].parm = n;
347                                                 stk[stktop].lno = lineno;
348                                         }
349                                 } else if (!fflag && line[i] == 'f') {
350                                         n = line[++i];
351                                         if (n == 'P') {
352                                                 if (stk[stktop].opno == FT) {
353                                                         stktop--;
354                                                 } else {
355                                                         pe(lineno);
356                                                         printf("unmatched \\fP\n");
357                                                 }
358                                         } else {
359                                                 stk[++stktop].opno = FT;
360                                                 stk[stktop].pl = 1;
361                                                 stk[stktop].parm = n;
362                                                 stk[stktop].lno = lineno;
363                                         }
364                                 }
365                         }
366                 }
367         }
368         /*
369          * We've hit the end and look at all this stuff that hasn't been
370          * matched yet!  Complain, complain.
371          */
372         for (i = stktop; i >= 0; i--) {
373                 complain(i);
374         }
375 }
376
377 static void
378 complain(int i)
379 {
380         pe(stk[i].lno);
381         printf("Unmatched ");
382         prop(i);
383         printf("\n");
384 }
385
386 static void
387 prop(int i)
388 {
389         if (stk[i].pl == 0)
390                 printf(".%s", br[stk[i].opno].opbr);
391         else switch(stk[i].opno) {
392         case SZ:
393                 printf("\\s%c%d", stk[i].pl, stk[i].parm);
394                 break;
395         case FT:
396                 printf("\\f%c", stk[i].parm);
397                 break;
398         default:
399                 printf("Bug: stk[%d].opno = %d = .%s, .%s",
400                         i, stk[i].opno, br[stk[i].opno].opbr,
401                         br[stk[i].opno].clbr);
402         }
403 }
404
405 static void
406 chkcmd(const char *mac)
407 {
408         int i;
409
410         /*
411          * Check to see if it matches top of stack.
412          */
413         if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
414                 stktop--;       /* OK. Pop & forget */
415         else {
416                 /* No. Maybe it's an opener */
417                 for (i = 0; br[i].opbr[0] != '\0'; i++) {
418                         if (eq(mac, br[i].opbr)) {
419                                 /* Found. Push it. */
420                                 stktop++;
421                                 stk[stktop].opno = i;
422                                 stk[stktop].pl = 0;
423                                 stk[stktop].parm = 0;
424                                 stk[stktop].lno = lineno;
425                                 break;
426                         }
427                         /*
428                          * Maybe it's an unmatched closer.
429                          * NOTE: this depends on the fact
430                          * that none of the closers can be
431                          * openers too.
432                          */
433                         if (eq(mac, br[i].clbr)) {
434                                 nomatch(mac);
435                                 break;
436                         }
437                 }
438         }
439 }
440
441 static void
442 nomatch(const char *mac)
443 {
444         int i, j;
445
446         /*
447          * Look for a match further down on stack
448          * If we find one, it suggests that the stuff in
449          * between is supposed to match itself.
450          */
451         for (j = stktop; j >= 0; j--) {
452                 if (eq(mac, br[stk[j].opno].clbr)) {
453                         /* Found.  Make a good diagnostic. */
454                         if (j == stktop - 2) {
455                                 /*
456                                  * Check for special case \fx..\fR and don't
457                                  * complain.
458                                  */
459                                 if (stk[j + 1].opno == FT &&
460                                     stk[j + 1].parm != 'R' &&
461                                     stk[j + 2].opno == FT &&
462                                     stk[j + 2].parm == 'R') {
463                                         stktop = j - 1;
464                                         return;
465                                 }
466                                 /*
467                                  * We have two unmatched frobs.  Chances are
468                                  * they were intended to match, so we mention
469                                  * them together.
470                                  */
471                                 pe(stk[j + 1].lno);
472                                 prop(j + 1);
473                                 printf(" does not match %d: ", stk[j + 2].lno);
474                                 prop(j + 2);
475                                 printf("\n");
476                         } else {
477                                 for (i = j + 1; i <= stktop; i++) {
478                                         complain(i);
479                                 }
480                         }
481                         stktop = j - 1;
482                         return;
483                 }
484         }
485         /* Didn't find one.  Throw this away. */
486         pe(lineno);
487         printf("Unmatched .%s\n", mac);
488 }
489
490 /* eq: are two strings equal? */
491 static int
492 eq(const char *s1, const char *s2)
493 {
494         return (strcmp(s1, s2) == 0);
495 }
496
497 /* print the first part of an error message, given the line number */
498 static void
499 pe(int mylineno)
500 {
501         if (nfiles > 1)
502                 printf("%s: ", cfilename);
503         printf("%d: ", mylineno);
504 }
505
506 static void
507 checkknown(const char *mac)
508 {
509         if (eq(mac, "."))
510                 return;
511         if (binsrch(mac, NULL) >= 0)
512                 return;
513         if (mac[0] == '\\' && mac[1] == '"')    /* comments */
514                 return;
515
516         pe(lineno);
517         printf("Unknown command: .%s\n", mac);
518 }
519
520 /*
521  * We have a .de xx line in "line".  Add xx to the list of known commands.
522  */
523 static void
524 addcmd(char *myline)
525 {
526         char *mac;
527
528         /* grab the macro being defined */
529         mac = myline + 4;
530         while (isspace(*mac))
531                 mac++;
532         if (*mac == 0) {
533                 pe(lineno);
534                 printf("illegal define: %s\n", myline);
535                 return;
536         }
537         mac[2] = 0;
538         if (isspace(mac[1]) || mac[1] == '\\')
539                 mac[1] = 0;
540         addmac(mac);
541 }
542
543 /*
544  * Add mac to the list.  We should really have some kind of tree
545  * structure here, but the loop below is reasonably fast.
546  */
547 static void
548 addmac(const char *mac)
549 {
550         int i, slot;
551
552         if (ncmds >= MAXCMDS) {
553                 errx(1, "Only %d known commands allowed", MAXCMDS);
554         }
555
556         /* Don't try to add it if it's already in the table. */
557         if (binsrch(mac, &slot) >= 0) {
558 #ifdef DEBUG
559                 printf("binsrch(%s) -> already in table\n", mac);
560 #endif /* DEBUG */
561                 return;
562         }
563 #ifdef DEBUG
564         printf("binsrch(%s) -> %d\n", mac, slot);
565 #endif
566         for (i = ncmds - 1; i >= slot; i--) {
567                 strncpy(knowncmds[i + 1], knowncmds[i], 2);
568         }
569         strncpy(knowncmds[slot], mac, 2);
570         ncmds++;
571 #ifdef DEBUG
572         printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2],
573                 knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1],
574                 knowncmds[slot+2], ncmds);
575 #endif
576 }
577
578 /*
579  * Do a binary search in knowncmds for mac.
580  * If found, return the index.  If not, return -1.
581  * Also, if not found, and if slot_ptr is not NULL,
582  * set *slot_ptr to where it should have been.
583  */
584 static int
585 binsrch(const char *mac, int *slot_ptr)
586 {
587         const char *p;  /* pointer to current cmd in list */
588         int d;          /* difference if any */
589         int mid;        /* mid point in binary search */
590         int top, bot;   /* boundaries of bin search, inclusive */
591
592         top = ncmds - 1;
593         bot = 0;
594         while (top >= bot) {
595                 mid = (top + bot) / 2;
596                 p = knowncmds[mid];
597                 d = p[0] - mac[0];
598                 if (d == 0)
599                         d = p[1] - mac[1];
600                 if (d == 0)
601                         return mid;
602                 if (d < 0)
603                         bot = mid + 1;
604                 else
605                         top = mid - 1;
606         }
607         if (slot_ptr != NULL)
608                 *slot_ptr = bot;        /* place it would have gone */
609         return -1;
610 }