2 * Copyright (c) 1980, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 static const char copyright[] =
36 "@(#) Copyright (c) 1980, 1993\n\
37 The Regents of the University of California. All rights reserved.\n";
41 static const char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93";
45 * checknr: check an nroff/troff input file for matching macro calls.
46 * we also attempt to match size and font changes, but only the embedded
47 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
48 * later but for now think of these restrictions as contributions to
49 * structured typesetting.
56 #define MAXSTK 100 /* Stack size */
57 #define MAXBR 100 /* Max number of bracket pairs known */
58 #define MAXCMDS 500 /* Max number of commands known */
60 void addcmd __P((char *));
61 void addmac __P((char *));
62 int binsrch __P((char *));
63 void checkknown __P((char *));
64 void chkcmd __P((char *, char *));
65 void complain __P((int));
66 int eq __P((char *, char *));
67 void nomatch __P((char *));
69 void process __P((FILE *));
71 static void usage __P((void));
74 * The stack on which we remember what we've seen so far.
77 int opno; /* number of opening bracket */
78 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
79 int parm; /* parm to size, font, etc */
80 int lno; /* line number the thing came in in */
85 * The kinds of opening and closing brackets.
91 /* A few bare bones troff commands */
93 {"sz", "sz"}, /* also \s */
95 {"ft", "ft"}, /* also \f */
109 /* the -ms package */
125 /* The -me package */
134 /* Things needed by preprocessors */
143 * All commands known to nroff, plus macro packages.
144 * Used so we can complain about unrecognized commands.
146 char *knowncmds[MAXCMDS] = {
147 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
148 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
149 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
150 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
151 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
152 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
153 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
154 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
155 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
156 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
157 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
158 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
159 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
160 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
161 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
162 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
163 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
164 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
165 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
166 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
167 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
168 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
169 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
170 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
171 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
172 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
173 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
174 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
175 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
176 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
180 int lineno; /* current line number in input file */
181 char line[256]; /* the current line */
182 char *cfilename; /* name of current file */
183 int nfiles; /* number of files to process */
184 int fflag; /* -f: ignore \f */
185 int sflag; /* -s: ignore \s */
186 int ncmds; /* size of knowncmds */
187 int slot; /* slot in knowncmds found by binsrch */
199 /* Figure out how many known commands there are */
200 while (knowncmds[ncmds])
202 while (argc > 1 && argv[1][0] == '-') {
205 /* -a: add pairs of macros */
207 i = strlen(argv[1]) - 2;
210 /* look for empty macro slots */
211 for (i=0; br[i].opbr; i++)
213 for (cp=argv[1]+3; cp[-1]; cp += 6) {
214 br[i].opbr = malloc(3);
215 strncpy(br[i].opbr, cp, 2);
216 br[i].clbr = malloc(3);
217 strncpy(br[i].clbr, cp+3, 2);
218 addmac(br[i].opbr); /* knows pairs are also known cmds */
224 /* -c: add known commands */
226 i = strlen(argv[1]) - 2;
229 for (cp=argv[1]+3; cp[-1]; cp += 3) {
230 if (cp[2] && cp[2] != '.')
238 /* -f: ignore font changes */
243 /* -s: ignore size changes */
256 for (i=1; i<argc; i++) {
258 f = fopen(cfilename, "r");
275 "usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n");
284 char mac[5]; /* The current macro or nroff command */
288 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
289 if (line[0] == '.') {
291 * find and isolate the macro/command name.
293 strncpy(mac, line+1, 4);
294 if (isspace(mac[0])) {
296 printf("Empty command\n");
297 } else if (isspace(mac[1])) {
299 } else if (isspace(mac[2])) {
301 } else if (mac[0] != '\\' || mac[1] != '\"') {
303 printf("Command too long\n");
307 * Is it a known command?
321 * At this point we process the line looking
324 for (i=0; line[i]; i++)
325 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
326 if (!sflag && line[++i]=='s') {
333 while (isdigit(line[++i]))
334 n = 10 * n + line[i] - '0';
337 if (stk[stktop].opno == SZ) {
341 printf("unmatched \\s0\n");
344 stk[++stktop].opno = SZ;
346 stk[stktop].parm = n;
347 stk[stktop].lno = lineno;
349 } else if (!fflag && line[i]=='f') {
352 if (stk[stktop].opno == FT) {
356 printf("unmatched \\fP\n");
359 stk[++stktop].opno = FT;
361 stk[stktop].parm = n;
362 stk[stktop].lno = lineno;
368 * We've hit the end and look at all this stuff that hasn't been
369 * matched yet! Complain, complain.
371 for (i=stktop; i>=0; i--) {
381 printf("Unmatched ");
391 printf(".%s", br[stk[i].opno].opbr);
392 else switch(stk[i].opno) {
394 printf("\\s%c%d", stk[i].pl, stk[i].parm);
397 printf("\\f%c", stk[i].parm);
400 printf("Bug: stk[%d].opno = %d = .%s, .%s",
401 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
413 * Check to see if it matches top of stack.
415 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
416 stktop--; /* OK. Pop & forget */
418 /* No. Maybe it's an opener */
419 for (i=0; br[i].opbr; i++) {
420 if (eq(mac, br[i].opbr)) {
421 /* Found. Push it. */
423 stk[stktop].opno = i;
425 stk[stktop].parm = 0;
426 stk[stktop].lno = lineno;
430 * Maybe it's an unmatched closer.
431 * NOTE: this depends on the fact
432 * that none of the closers can be
435 if (eq(mac, br[i].clbr)) {
450 * Look for a match further down on stack
451 * If we find one, it suggests that the stuff in
452 * between is supposed to match itself.
454 for (j=stktop; j>=0; j--)
455 if (eq(mac,br[stk[j].opno].clbr)) {
456 /* Found. Make a good diagnostic. */
459 * Check for special case \fx..\fR and don't
462 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
463 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
468 * We have two unmatched frobs. Chances are
469 * they were intended to match, so we mention
474 printf(" does not match %d: ", stk[j+2].lno);
477 } else for (i=j+1; i <= stktop; i++) {
483 /* Didn't find one. Throw this away. */
485 printf("Unmatched .%s\n", mac);
488 /* eq: are two strings equal? */
493 return (strcmp(s1, s2) == 0);
496 /* print the first part of an error message, given the line number */
502 printf("%s: ", cfilename);
503 printf("%d: ", lineno);
513 if (binsrch(mac) >= 0)
515 if (mac[0] == '\\' && mac[1] == '"') /* comments */
519 printf("Unknown command: .%s\n", mac);
523 * We have a .de xx line in "line". Add xx to the list of known commands.
531 /* grab the macro being defined */
533 while (isspace(*mac))
537 printf("illegal define: %s\n", line);
541 if (isspace(mac[1]) || mac[1] == '\\')
543 if (ncmds >= MAXCMDS) {
544 printf("Only %d known commands allowed\n", MAXCMDS);
551 * Add mac to the list. We should really have some kind of tree
552 * structure here but this is a quick-and-dirty job and I just don't
553 * have time to mess with it. (I wonder if this will come back to haunt
554 * me someday?) Anyway, I claim that .de is fairly rare in user
555 * nroff programs, and the register loop below is pretty fast.
561 register char **src, **dest, **loc;
563 if (binsrch(mac) >= 0){ /* it's OK to redefine something */
565 printf("binsrch(%s) -> already in table\n", mac);
569 /* binsrch sets slot as a side effect */
571 printf("binsrch(%s) -> %d\n", mac, slot);
573 loc = &knowncmds[slot];
574 src = &knowncmds[ncmds-1];
582 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
587 * Do a binary search in knowncmds for mac.
588 * If found, return the index. If not, return -1.
594 register char *p; /* pointer to current cmd in list */
595 register int d; /* difference if any */
596 register int mid; /* mid point in binary search */
597 register int top, bot; /* boundaries of bin search, inclusive */
614 slot = bot; /* place it would have gone */