Import mdocml-1.12.3
[dragonfly.git] / contrib / mdocml / roff.c
CommitLineData
7888c61d 1/* $Id: roff.c,v 1.189 2013/12/30 18:44:06 schwarze Exp $ */
80387638 2/*
f88b6c16
FF
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
80387638
SW
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#ifdef HAVE_CONFIG_H
19#include "config.h"
20#endif
21
22#include <assert.h>
80387638 23#include <ctype.h>
f88b6c16 24#include <stdio.h>
80387638
SW
25#include <stdlib.h>
26#include <string.h>
80387638
SW
27
28#include "mandoc.h"
80387638
SW
29#include "libroff.h"
30#include "libmandoc.h"
31
a4c7eb57 32/* Maximum number of nested if-else conditionals. */
80387638
SW
33#define RSTACK_MAX 128
34
36342e81
SW
35/* Maximum number of string expansions per line, to break infinite loops. */
36#define EXPAND_LIMIT 1000
37
80387638
SW
38enum rofft {
39 ROFF_ad,
40 ROFF_am,
41 ROFF_ami,
42 ROFF_am1,
f88b6c16 43 ROFF_cc,
80387638
SW
44 ROFF_de,
45 ROFF_dei,
46 ROFF_de1,
47 ROFF_ds,
48 ROFF_el,
7888c61d
FF
49 ROFF_fam,
50 ROFF_hw,
80387638
SW
51 ROFF_hy,
52 ROFF_ie,
53 ROFF_if,
54 ROFF_ig,
60e1e752 55 ROFF_it,
80387638
SW
56 ROFF_ne,
57 ROFF_nh,
58 ROFF_nr,
60e1e752
SW
59 ROFF_ns,
60 ROFF_ps,
80387638
SW
61 ROFF_rm,
62 ROFF_so,
60e1e752 63 ROFF_ta,
80387638 64 ROFF_tr,
f88b6c16
FF
65 ROFF_Dd,
66 ROFF_TH,
80387638
SW
67 ROFF_TS,
68 ROFF_TE,
69 ROFF_T_,
60e1e752
SW
70 ROFF_EQ,
71 ROFF_EN,
80387638 72 ROFF_cblock,
a4c7eb57 73 ROFF_ccond,
80387638
SW
74 ROFF_USERDEF,
75 ROFF_MAX
76};
77
78enum roffrule {
7888c61d
FF
79 ROFFRULE_DENY,
80 ROFFRULE_ALLOW
36342e81
SW
81};
82
83/*
84 * An incredibly-simple string buffer.
85 */
80387638 86struct roffstr {
36342e81
SW
87 char *p; /* nil-terminated buffer */
88 size_t sz; /* saved strlen(p) */
89};
90
91/*
92 * A key-value roffstr pair as part of a singly-linked list.
93 */
94struct roffkv {
95 struct roffstr key;
96 struct roffstr val;
97 struct roffkv *next; /* next in list */
80387638
SW
98};
99
7888c61d
FF
100/*
101 * A single number register as part of a singly-linked list.
102 */
103struct roffreg {
104 struct roffstr key;
105 int val;
106 struct roffreg *next;
107};
108
80387638 109struct roff {
f88b6c16 110 enum mparset parsetype; /* requested parse type */
60e1e752 111 struct mparse *parse; /* parse point */
80387638 112 struct roffnode *last; /* leaf of stack */
80387638 113 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
f88b6c16 114 char control; /* control character */
80387638 115 int rstackpos; /* position in rstack */
7888c61d 116 struct roffreg *regtab; /* number registers */
36342e81
SW
117 struct roffkv *strtab; /* user-defined strings & macros */
118 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
119 struct roffstr *xtab; /* single-byte trans table (`tr') */
80387638
SW
120 const char *current_string; /* value of last called user macro */
121 struct tbl_node *first_tbl; /* first table parsed */
122 struct tbl_node *last_tbl; /* last table parsed */
123 struct tbl_node *tbl; /* current table being parsed */
60e1e752
SW
124 struct eqn_node *last_eqn; /* last equation parsed */
125 struct eqn_node *first_eqn; /* first equation parsed */
126 struct eqn_node *eqn; /* current equation being parsed */
80387638
SW
127};
128
129struct roffnode {
130 enum rofft tok; /* type of node */
131 struct roffnode *parent; /* up one in stack */
132 int line; /* parse line */
133 int col; /* parse col */
134 char *name; /* node name, e.g. macro name */
135 char *end; /* end-rules: custom token */
136 int endspan; /* end-rules: next-line or infty */
137 enum roffrule rule; /* current evaluation rule */
138};
139
140#define ROFF_ARGS struct roff *r, /* parse ctx */ \
141 enum rofft tok, /* tok of macro */ \
142 char **bufp, /* input buffer */ \
143 size_t *szp, /* size of input buffer */ \
144 int ln, /* parse line */ \
145 int ppos, /* original pos in buffer */ \
146 int pos, /* current pos in buffer */ \
147 int *offs /* reset offset of buffer data */
148
149typedef enum rofferr (*roffproc)(ROFF_ARGS);
150
151struct roffmac {
152 const char *name; /* macro name */
153 roffproc proc; /* process new macro */
154 roffproc text; /* process as child text of macro */
155 roffproc sub; /* process as child of macro */
156 int flags;
157#define ROFFMAC_STRUCT (1 << 0) /* always interpret */
158 struct roffmac *next;
159};
160
a4c7eb57
SW
161struct predef {
162 const char *name; /* predefined input name */
163 const char *str; /* replacement symbol */
164};
165
166#define PREDEF(__name, __str) \
167 { (__name), (__str) },
168
36342e81
SW
169static enum rofft roffhash_find(const char *, size_t);
170static void roffhash_init(void);
171static void roffnode_cleanscope(struct roff *);
172static void roffnode_pop(struct roff *);
173static void roffnode_push(struct roff *, enum rofft,
174 const char *, int, int);
80387638
SW
175static enum rofferr roff_block(ROFF_ARGS);
176static enum rofferr roff_block_text(ROFF_ARGS);
177static enum rofferr roff_block_sub(ROFF_ARGS);
178static enum rofferr roff_cblock(ROFF_ARGS);
f88b6c16 179static enum rofferr roff_cc(ROFF_ARGS);
80387638
SW
180static enum rofferr roff_ccond(ROFF_ARGS);
181static enum rofferr roff_cond(ROFF_ARGS);
182static enum rofferr roff_cond_text(ROFF_ARGS);
183static enum rofferr roff_cond_sub(ROFF_ARGS);
184static enum rofferr roff_ds(ROFF_ARGS);
185static enum roffrule roff_evalcond(const char *, int *);
36342e81 186static void roff_free1(struct roff *);
7888c61d 187static void roff_freereg(struct roffreg *);
36342e81 188static void roff_freestr(struct roffkv *);
60e1e752 189static char *roff_getname(struct roff *, char **, int, int);
7888c61d
FF
190static int roff_getnum(const char *, int *, int *);
191static int roff_getop(const char *, int *, char *);
192static int roff_getregn(const struct roff *,
193 const char *, size_t);
80387638
SW
194static const char *roff_getstrn(const struct roff *,
195 const char *, size_t);
f88b6c16 196static enum rofferr roff_it(ROFF_ARGS);
80387638 197static enum rofferr roff_line_ignore(ROFF_ARGS);
80387638 198static enum rofferr roff_nr(ROFF_ARGS);
36342e81
SW
199static void roff_openeqn(struct roff *, const char *,
200 int, int, const char *);
201static enum rofft roff_parse(struct roff *, const char *, int *);
f88b6c16 202static enum rofferr roff_parsetext(char **, size_t *, int, int *);
36342e81 203static enum rofferr roff_res(struct roff *,
a4c7eb57 204 char **, size_t *, int, int);
60e1e752 205static enum rofferr roff_rm(ROFF_ARGS);
80387638
SW
206static void roff_setstr(struct roff *,
207 const char *, const char *, int);
36342e81
SW
208static void roff_setstrn(struct roffkv **, const char *,
209 size_t, const char *, size_t, int);
80387638 210static enum rofferr roff_so(ROFF_ARGS);
36342e81 211static enum rofferr roff_tr(ROFF_ARGS);
f88b6c16
FF
212static enum rofferr roff_Dd(ROFF_ARGS);
213static enum rofferr roff_TH(ROFF_ARGS);
80387638
SW
214static enum rofferr roff_TE(ROFF_ARGS);
215static enum rofferr roff_TS(ROFF_ARGS);
60e1e752
SW
216static enum rofferr roff_EQ(ROFF_ARGS);
217static enum rofferr roff_EN(ROFF_ARGS);
80387638
SW
218static enum rofferr roff_T_(ROFF_ARGS);
219static enum rofferr roff_userdef(ROFF_ARGS);
220
36342e81 221/* See roffhash_find() */
80387638
SW
222
223#define ASCII_HI 126
224#define ASCII_LO 33
225#define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
226
227static struct roffmac *hash[HASHWIDTH];
228
229static struct roffmac roffs[ROFF_MAX] = {
230 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
231 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
232 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
233 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
f88b6c16 234 { "cc", roff_cc, NULL, NULL, 0, NULL },
80387638
SW
235 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
236 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
237 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
238 { "ds", roff_ds, NULL, NULL, 0, NULL },
239 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
7888c61d
FF
240 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
241 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
80387638
SW
242 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
243 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
244 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
245 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
f88b6c16 246 { "it", roff_it, NULL, NULL, 0, NULL },
80387638
SW
247 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
248 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
249 { "nr", roff_nr, NULL, NULL, 0, NULL },
60e1e752
SW
250 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
251 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
252 { "rm", roff_rm, NULL, NULL, 0, NULL },
80387638 253 { "so", roff_so, NULL, NULL, 0, NULL },
60e1e752 254 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
36342e81 255 { "tr", roff_tr, NULL, NULL, 0, NULL },
f88b6c16
FF
256 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
257 { "TH", roff_TH, NULL, NULL, 0, NULL },
80387638
SW
258 { "TS", roff_TS, NULL, NULL, 0, NULL },
259 { "TE", roff_TE, NULL, NULL, 0, NULL },
260 { "T&", roff_T_, NULL, NULL, 0, NULL },
60e1e752
SW
261 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
262 { "EN", roff_EN, NULL, NULL, 0, NULL },
80387638
SW
263 { ".", roff_cblock, NULL, NULL, 0, NULL },
264 { "\\}", roff_ccond, NULL, NULL, 0, NULL },
265 { NULL, roff_userdef, NULL, NULL, 0, NULL },
266};
267
f88b6c16
FF
268const char *const __mdoc_reserved[] = {
269 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
270 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
271 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
272 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
273 "Ds", "Dt", "Dv", "Dx", "D1",
274 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
275 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
276 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
277 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
278 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
279 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
280 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
281 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
282 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
283 "Ss", "St", "Sx", "Sy",
284 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
285 "%A", "%B", "%D", "%I", "%J", "%N", "%O",
286 "%P", "%Q", "%R", "%T", "%U", "%V",
287 NULL
288};
289
290const char *const __man_reserved[] = {
291 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
292 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
293 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
294 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
295 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
296 NULL
297};
298
a4c7eb57
SW
299/* Array of injected predefined strings. */
300#define PREDEFS_MAX 38
301static const struct predef predefs[PREDEFS_MAX] = {
302#include "predefs.in"
303};
304
36342e81 305/* See roffhash_find() */
80387638
SW
306#define ROFF_HASH(p) (p[0] - ASCII_LO)
307
f88b6c16
FF
308static int roffit_lines; /* number of lines to delay */
309static char *roffit_macro; /* nil-terminated macro line */
310
80387638 311static void
36342e81 312roffhash_init(void)
80387638
SW
313{
314 struct roffmac *n;
315 int buc, i;
316
317 for (i = 0; i < (int)ROFF_USERDEF; i++) {
318 assert(roffs[i].name[0] >= ASCII_LO);
319 assert(roffs[i].name[0] <= ASCII_HI);
320
321 buc = ROFF_HASH(roffs[i].name);
322
323 if (NULL != (n = hash[buc])) {
324 for ( ; n->next; n = n->next)
325 /* Do nothing. */ ;
326 n->next = &roffs[i];
327 } else
328 hash[buc] = &roffs[i];
329 }
330}
331
80387638
SW
332/*
333 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
334 * the nil-terminated string name could be found.
335 */
336static enum rofft
36342e81 337roffhash_find(const char *p, size_t s)
80387638
SW
338{
339 int buc;
340 struct roffmac *n;
341
342 /*
343 * libroff has an extremely simple hashtable, for the time
344 * being, which simply keys on the first character, which must
345 * be printable, then walks a chain. It works well enough until
346 * optimised.
347 */
348
349 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
350 return(ROFF_MAX);
351
352 buc = ROFF_HASH(p);
353
354 if (NULL == (n = hash[buc]))
355 return(ROFF_MAX);
356 for ( ; n; n = n->next)
357 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
358 return((enum rofft)(n - roffs));
359
360 return(ROFF_MAX);
361}
362
363
364/*
365 * Pop the current node off of the stack of roff instructions currently
366 * pending.
367 */
368static void
369roffnode_pop(struct roff *r)
370{
371 struct roffnode *p;
372
373 assert(r->last);
374 p = r->last;
375
80387638
SW
376 r->last = r->last->parent;
377 free(p->name);
378 free(p->end);
379 free(p);
380}
381
382
383/*
384 * Push a roff node onto the instruction stack. This must later be
385 * removed with roffnode_pop().
386 */
387static void
388roffnode_push(struct roff *r, enum rofft tok, const char *name,
389 int line, int col)
390{
391 struct roffnode *p;
392
393 p = mandoc_calloc(1, sizeof(struct roffnode));
394 p->tok = tok;
395 if (name)
396 p->name = mandoc_strdup(name);
397 p->parent = r->last;
398 p->line = line;
399 p->col = col;
400 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
401
402 r->last = p;
403}
404
405
406static void
407roff_free1(struct roff *r)
408{
f88b6c16 409 struct tbl_node *tbl;
60e1e752 410 struct eqn_node *e;
36342e81 411 int i;
80387638 412
f88b6c16
FF
413 while (NULL != (tbl = r->first_tbl)) {
414 r->first_tbl = tbl->next;
415 tbl_free(tbl);
80387638
SW
416 }
417
418 r->first_tbl = r->last_tbl = r->tbl = NULL;
419
60e1e752
SW
420 while (NULL != (e = r->first_eqn)) {
421 r->first_eqn = e->next;
422 eqn_free(e);
423 }
424
425 r->first_eqn = r->last_eqn = r->eqn = NULL;
426
80387638
SW
427 while (r->last)
428 roffnode_pop(r);
429
36342e81
SW
430 roff_freestr(r->strtab);
431 roff_freestr(r->xmbtab);
432
433 r->strtab = r->xmbtab = NULL;
80387638 434
7888c61d
FF
435 roff_freereg(r->regtab);
436
437 r->regtab = NULL;
438
36342e81
SW
439 if (r->xtab)
440 for (i = 0; i < 128; i++)
441 free(r->xtab[i].p);
442
443 free(r->xtab);
444 r->xtab = NULL;
445}
80387638
SW
446
447void
448roff_reset(struct roff *r)
449{
36342e81 450 int i;
80387638
SW
451
452 roff_free1(r);
36342e81 453
f88b6c16 454 r->control = 0;
36342e81
SW
455
456 for (i = 0; i < PREDEFS_MAX; i++)
457 roff_setstr(r, predefs[i].name, predefs[i].str, 0);
80387638
SW
458}
459
460
461void
462roff_free(struct roff *r)
463{
464
465 roff_free1(r);
466 free(r);
467}
468
469
470struct roff *
f88b6c16 471roff_alloc(enum mparset type, struct mparse *parse)
80387638
SW
472{
473 struct roff *r;
a4c7eb57 474 int i;
80387638
SW
475
476 r = mandoc_calloc(1, sizeof(struct roff));
f88b6c16 477 r->parsetype = type;
60e1e752 478 r->parse = parse;
80387638
SW
479 r->rstackpos = -1;
480
36342e81 481 roffhash_init();
a4c7eb57
SW
482
483 for (i = 0; i < PREDEFS_MAX; i++)
484 roff_setstr(r, predefs[i].name, predefs[i].str, 0);
485
80387638
SW
486 return(r);
487}
488
80387638 489/*
7888c61d
FF
490 * In the current line, expand user-defined strings ("\*")
491 * and references to number registers ("\n").
492 * Also check the syntax of other escape sequences.
80387638 493 */
36342e81 494static enum rofferr
a4c7eb57 495roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
80387638 496{
7888c61d 497 char ubuf[12]; /* buffer to print the number */
80387638
SW
498 const char *stesc; /* start of an escape sequence ('\\') */
499 const char *stnam; /* start of the name, after "[(*" */
500 const char *cp; /* end of the name, e.g. before ']' */
501 const char *res; /* the string to be substituted */
7888c61d
FF
502 char *nbuf; /* new buffer to copy bufp to */
503 size_t nsz; /* size of the new buffer */
504 size_t maxl; /* expected length of the escape name */
505 size_t naml; /* actual length of the escape name */
506 int expand_count; /* to avoid infinite loops */
80387638 507
36342e81 508 expand_count = 0;
80387638 509
36342e81 510again:
80387638
SW
511 cp = *bufp + pos;
512 while (NULL != (cp = strchr(cp, '\\'))) {
513 stesc = cp++;
514
515 /*
7888c61d 516 * The second character must be an asterisk or an n.
80387638
SW
517 * If it isn't, skip it anyway: It is escaped,
518 * so it can't start another escape sequence.
519 */
520
521 if ('\0' == *cp)
36342e81
SW
522 return(ROFF_CONT);
523
7888c61d
FF
524 switch (*cp) {
525 case ('*'):
526 res = NULL;
527 break;
528 case ('n'):
529 res = ubuf;
530 break;
531 default:
532 if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL))
36342e81 533 continue;
36342e81
SW
534 mandoc_msg
535 (MANDOCERR_BADESCAPE, r->parse,
536 ln, (int)(stesc - *bufp), NULL);
537 return(ROFF_CONT);
538 }
539
540 cp++;
80387638
SW
541
542 /*
543 * The third character decides the length
7888c61d 544 * of the name of the string or register.
80387638
SW
545 * Save a pointer to the name.
546 */
547
548 switch (*cp) {
549 case ('\0'):
36342e81 550 return(ROFF_CONT);
80387638
SW
551 case ('('):
552 cp++;
553 maxl = 2;
554 break;
555 case ('['):
556 cp++;
557 maxl = 0;
558 break;
559 default:
560 maxl = 1;
561 break;
562 }
563 stnam = cp;
564
565 /* Advance to the end of the name. */
566
7888c61d 567 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
36342e81
SW
568 if ('\0' == *cp) {
569 mandoc_msg
570 (MANDOCERR_BADESCAPE,
571 r->parse, ln,
572 (int)(stesc - *bufp), NULL);
573 return(ROFF_CONT);
574 }
80387638
SW
575 if (0 == maxl && ']' == *cp)
576 break;
577 }
578
579 /*
580 * Retrieve the replacement string; if it is
581 * undefined, resume searching for escapes.
582 */
583
7888c61d
FF
584 if (NULL == res)
585 res = roff_getstrn(r, stnam, naml);
586 else
587 snprintf(ubuf, sizeof(ubuf), "%d",
588 roff_getregn(r, stnam, naml));
80387638
SW
589
590 if (NULL == res) {
36342e81
SW
591 mandoc_msg
592 (MANDOCERR_BADESCAPE, r->parse,
593 ln, (int)(stesc - *bufp), NULL);
a4c7eb57 594 res = "";
80387638
SW
595 }
596
597 /* Replace the escape sequence by the string. */
598
36342e81
SW
599 pos = stesc - *bufp;
600
80387638 601 nsz = *szp + strlen(res) + 1;
7888c61d 602 nbuf = mandoc_malloc(nsz);
80387638 603
7888c61d
FF
604 strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1));
605 strlcat(nbuf, res, nsz);
606 strlcat(nbuf, cp + (maxl ? 0 : 1), nsz);
80387638
SW
607
608 free(*bufp);
609
7888c61d 610 *bufp = nbuf;
80387638 611 *szp = nsz;
80387638 612
36342e81
SW
613 if (EXPAND_LIMIT >= ++expand_count)
614 goto again;
615
616 /* Just leave the string unexpanded. */
617 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
618 return(ROFF_IGN);
619 }
620 return(ROFF_CONT);
80387638
SW
621}
622
36342e81 623/*
f88b6c16
FF
624 * Process text streams:
625 * Convert all breakable hyphens into ASCII_HYPH.
626 * Decrement and spring input line trap.
36342e81
SW
627 */
628static enum rofferr
f88b6c16 629roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
36342e81
SW
630{
631 size_t sz;
632 const char *start;
f88b6c16
FF
633 char *p;
634 int isz;
36342e81
SW
635 enum mandoc_esc esc;
636
f88b6c16 637 start = p = *bufp + pos;
36342e81
SW
638
639 while ('\0' != *p) {
640 sz = strcspn(p, "-\\");
641 p += sz;
642
643 if ('\0' == *p)
644 break;
645
646 if ('\\' == *p) {
647 /* Skip over escapes. */
648 p++;
7888c61d 649 esc = mandoc_escape((const char **)&p, NULL, NULL);
36342e81
SW
650 if (ESCAPE_ERROR == esc)
651 break;
652 continue;
653 } else if (p == start) {
654 p++;
655 continue;
656 }
657
658 if (isalpha((unsigned char)p[-1]) &&
659 isalpha((unsigned char)p[1]))
660 *p = ASCII_HYPH;
661 p++;
662 }
663
f88b6c16
FF
664 /* Spring the input line trap. */
665 if (1 == roffit_lines) {
666 isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
667 if (-1 == isz) {
668 perror(NULL);
669 exit((int)MANDOCLEVEL_SYSERR);
670 }
671 free(*bufp);
672 *bufp = p;
673 *szp = isz + 1;
674 *offs = 0;
675 free(roffit_macro);
676 roffit_lines = 0;
677 return(ROFF_REPARSE);
678 } else if (1 < roffit_lines)
679 --roffit_lines;
36342e81
SW
680 return(ROFF_CONT);
681}
80387638
SW
682
683enum rofferr
684roff_parseln(struct roff *r, int ln, char **bufp,
685 size_t *szp, int pos, int *offs)
686{
687 enum rofft t;
688 enum rofferr e;
60e1e752 689 int ppos, ctl;
80387638
SW
690
691 /*
692 * Run the reserved-word filter only if we have some reserved
693 * words to fill in.
694 */
695
36342e81
SW
696 e = roff_res(r, bufp, szp, ln, pos);
697 if (ROFF_IGN == e)
698 return(e);
699 assert(ROFF_CONT == e);
80387638 700
60e1e752 701 ppos = pos;
f88b6c16 702 ctl = roff_getcontrol(r, *bufp, &pos);
60e1e752 703
80387638
SW
704 /*
705 * First, if a scope is open and we're not a macro, pass the
706 * text through the macro's filter. If a scope isn't open and
707 * we're not a macro, just let it through.
60e1e752
SW
708 * Finally, if there's an equation scope open, divert it into it
709 * no matter our state.
80387638
SW
710 */
711
60e1e752 712 if (r->last && ! ctl) {
80387638
SW
713 t = r->last->tok;
714 assert(roffs[t].text);
715 e = (*roffs[t].text)
716 (r, t, bufp, szp, ln, pos, pos, offs);
717 assert(ROFF_IGN == e || ROFF_CONT == e);
60e1e752
SW
718 if (ROFF_CONT != e)
719 return(e);
7888c61d
FF
720 }
721 if (r->eqn)
722 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
723 if ( ! ctl) {
60e1e752
SW
724 if (r->tbl)
725 return(tbl_read(r->tbl, ln, *bufp, pos));
f88b6c16 726 return(roff_parsetext(bufp, szp, pos, offs));
7888c61d 727 }
80387638
SW
728
729 /*
730 * If a scope is open, go to the child handler for that macro,
731 * as it may want to preprocess before doing anything with it.
60e1e752 732 * Don't do so if an equation is open.
80387638
SW
733 */
734
735 if (r->last) {
736 t = r->last->tok;
737 assert(roffs[t].sub);
738 return((*roffs[t].sub)
739 (r, t, bufp, szp,
60e1e752 740 ln, ppos, pos, offs));
80387638
SW
741 }
742
743 /*
744 * Lastly, as we've no scope open, try to look up and execute
745 * the new macro. If no macro is found, simply return and let
746 * the compilers handle it.
747 */
748
80387638
SW
749 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
750 return(ROFF_CONT);
751
752 assert(roffs[t].proc);
753 return((*roffs[t].proc)
754 (r, t, bufp, szp,
755 ln, ppos, pos, offs));
756}
757
758
759void
760roff_endparse(struct roff *r)
761{
762
763 if (r->last)
60e1e752 764 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
80387638
SW
765 r->last->line, r->last->col, NULL);
766
60e1e752
SW
767 if (r->eqn) {
768 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
36342e81
SW
769 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
770 eqn_end(&r->eqn);
60e1e752
SW
771 }
772
80387638 773 if (r->tbl) {
60e1e752 774 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
80387638 775 r->tbl->line, r->tbl->pos, NULL);
36342e81 776 tbl_end(&r->tbl);
80387638
SW
777 }
778}
779
80387638
SW
780/*
781 * Parse a roff node's type from the input buffer. This must be in the
782 * form of ".foo xxx" in the usual way.
783 */
784static enum rofft
785roff_parse(struct roff *r, const char *buf, int *pos)
786{
787 const char *mac;
788 size_t maclen;
789 enum rofft t;
790
36342e81
SW
791 if ('\0' == buf[*pos] || '"' == buf[*pos] ||
792 '\t' == buf[*pos] || ' ' == buf[*pos])
80387638
SW
793 return(ROFF_MAX);
794
36342e81
SW
795 /*
796 * We stop the macro parse at an escape, tab, space, or nil.
797 * However, `\}' is also a valid macro, so make sure we don't
798 * clobber it by seeing the `\' as the end of token.
799 */
800
80387638 801 mac = buf + *pos;
36342e81 802 maclen = strcspn(mac + 1, " \\\t\0") + 1;
80387638
SW
803
804 t = (r->current_string = roff_getstrn(r, mac, maclen))
36342e81 805 ? ROFF_USERDEF : roffhash_find(mac, maclen);
80387638 806
60e1e752
SW
807 *pos += (int)maclen;
808
80387638
SW
809 while (buf[*pos] && ' ' == buf[*pos])
810 (*pos)++;
811
812 return(t);
813}
814
80387638
SW
815/* ARGSUSED */
816static enum rofferr
817roff_cblock(ROFF_ARGS)
818{
819
820 /*
821 * A block-close `..' should only be invoked as a child of an
822 * ignore macro, otherwise raise a warning and just ignore it.
823 */
824
825 if (NULL == r->last) {
60e1e752 826 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
80387638
SW
827 return(ROFF_IGN);
828 }
829
830 switch (r->last->tok) {
831 case (ROFF_am):
832 /* FALLTHROUGH */
833 case (ROFF_ami):
834 /* FALLTHROUGH */
835 case (ROFF_am1):
836 /* FALLTHROUGH */
837 case (ROFF_de):
838 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
839 /* FALLTHROUGH */
840 case (ROFF_dei):
841 /* FALLTHROUGH */
842 case (ROFF_ig):
843 break;
844 default:
60e1e752 845 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
80387638
SW
846 return(ROFF_IGN);
847 }
848
849 if ((*bufp)[pos])
60e1e752 850 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
80387638
SW
851
852 roffnode_pop(r);
853 roffnode_cleanscope(r);
854 return(ROFF_IGN);
855
856}
857
858
859static void
860roffnode_cleanscope(struct roff *r)
861{
862
863 while (r->last) {
f88b6c16 864 if (--r->last->endspan != 0)
80387638
SW
865 break;
866 roffnode_pop(r);
867 }
868}
869
870
871/* ARGSUSED */
872static enum rofferr
873roff_ccond(ROFF_ARGS)
874{
875
876 if (NULL == r->last) {
60e1e752 877 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
80387638
SW
878 return(ROFF_IGN);
879 }
880
881 switch (r->last->tok) {
882 case (ROFF_el):
883 /* FALLTHROUGH */
884 case (ROFF_ie):
885 /* FALLTHROUGH */
886 case (ROFF_if):
887 break;
888 default:
60e1e752 889 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
80387638
SW
890 return(ROFF_IGN);
891 }
892
893 if (r->last->endspan > -1) {
60e1e752 894 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
80387638
SW
895 return(ROFF_IGN);
896 }
897
898 if ((*bufp)[pos])
60e1e752 899 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
80387638
SW
900
901 roffnode_pop(r);
902 roffnode_cleanscope(r);
903 return(ROFF_IGN);
904}
905
906
907/* ARGSUSED */
908static enum rofferr
909roff_block(ROFF_ARGS)
910{
911 int sv;
912 size_t sz;
913 char *name;
914
915 name = NULL;
916
917 if (ROFF_ig != tok) {
918 if ('\0' == (*bufp)[pos]) {
60e1e752 919 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
80387638
SW
920 return(ROFF_IGN);
921 }
922
923 /*
924 * Re-write `de1', since we don't really care about
925 * groff's strange compatibility mode, into `de'.
926 */
927
928 if (ROFF_de1 == tok)
929 tok = ROFF_de;
930 if (ROFF_de == tok)
931 name = *bufp + pos;
932 else
60e1e752 933 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
80387638
SW
934 roffs[tok].name);
935
a4c7eb57 936 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
80387638
SW
937 pos++;
938
a4c7eb57 939 while (isspace((unsigned char)(*bufp)[pos]))
80387638
SW
940 (*bufp)[pos++] = '\0';
941 }
942
943 roffnode_push(r, tok, name, ln, ppos);
944
945 /*
946 * At the beginning of a `de' macro, clear the existing string
947 * with the same name, if there is one. New content will be
948 * added from roff_block_text() in multiline mode.
949 */
950
951 if (ROFF_de == tok)
952 roff_setstr(r, name, "", 0);
953
954 if ('\0' == (*bufp)[pos])
955 return(ROFF_IGN);
956
957 /* If present, process the custom end-of-line marker. */
958
959 sv = pos;
a4c7eb57 960 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
80387638
SW
961 pos++;
962
963 /*
964 * Note: groff does NOT like escape characters in the input.
965 * Instead of detecting this, we're just going to let it fly and
966 * to hell with it.
967 */
968
969 assert(pos > sv);
970 sz = (size_t)(pos - sv);
971
972 if (1 == sz && '.' == (*bufp)[sv])
973 return(ROFF_IGN);
974
975 r->last->end = mandoc_malloc(sz + 1);
976
977 memcpy(r->last->end, *bufp + sv, sz);
978 r->last->end[(int)sz] = '\0';
979
980 if ((*bufp)[pos])
60e1e752 981 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
80387638
SW
982
983 return(ROFF_IGN);
984}
985
986
987/* ARGSUSED */
988static enum rofferr
989roff_block_sub(ROFF_ARGS)
990{
991 enum rofft t;
992 int i, j;
993
994 /*
995 * First check whether a custom macro exists at this level. If
996 * it does, then check against it. This is some of groff's
997 * stranger behaviours. If we encountered a custom end-scope
998 * tag and that tag also happens to be a "real" macro, then we
999 * need to try interpreting it again as a real macro. If it's
1000 * not, then return ignore. Else continue.
1001 */
1002
1003 if (r->last->end) {
60e1e752 1004 for (i = pos, j = 0; r->last->end[j]; j++, i++)
80387638
SW
1005 if ((*bufp)[i] != r->last->end[j])
1006 break;
1007
1008 if ('\0' == r->last->end[j] &&
1009 ('\0' == (*bufp)[i] ||
1010 ' ' == (*bufp)[i] ||
1011 '\t' == (*bufp)[i])) {
1012 roffnode_pop(r);
1013 roffnode_cleanscope(r);
1014
60e1e752
SW
1015 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1016 i++;
1017
1018 pos = i;
80387638
SW
1019 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1020 return(ROFF_RERUN);
1021 return(ROFF_IGN);
1022 }
1023 }
1024
1025 /*
1026 * If we have no custom end-query or lookup failed, then try
1027 * pulling it out of the hashtable.
1028 */
1029
a4c7eb57 1030 t = roff_parse(r, *bufp, &pos);
80387638
SW
1031
1032 /*
1033 * Macros other than block-end are only significant
1034 * in `de' blocks; elsewhere, simply throw them away.
1035 */
1036 if (ROFF_cblock != t) {
1037 if (ROFF_de == tok)
1038 roff_setstr(r, r->last->name, *bufp + ppos, 1);
1039 return(ROFF_IGN);
1040 }
1041
1042 assert(roffs[t].proc);
1043 return((*roffs[t].proc)(r, t, bufp, szp,
1044 ln, ppos, pos, offs));
1045}
1046
1047
1048/* ARGSUSED */
1049static enum rofferr
1050roff_block_text(ROFF_ARGS)
1051{
1052
1053 if (ROFF_de == tok)
1054 roff_setstr(r, r->last->name, *bufp + pos, 1);
1055
1056 return(ROFF_IGN);
1057}
1058
1059
1060/* ARGSUSED */
1061static enum rofferr
1062roff_cond_sub(ROFF_ARGS)
1063{
1064 enum rofft t;
1065 enum roffrule rr;
a4c7eb57 1066 char *ep;
80387638 1067
80387638 1068 rr = r->last->rule;
a4c7eb57 1069 roffnode_cleanscope(r);
f88b6c16 1070 t = roff_parse(r, *bufp, &pos);
80387638 1071
a4c7eb57 1072 /*
f88b6c16
FF
1073 * Fully handle known macros when they are structurally
1074 * required or when the conditional evaluated to true.
80387638
SW
1075 */
1076
f88b6c16
FF
1077 if ((ROFF_MAX != t) &&
1078 (ROFF_ccond == t || ROFFRULE_ALLOW == rr ||
1079 ROFFMAC_STRUCT & roffs[t].flags)) {
1080 assert(roffs[t].proc);
1081 return((*roffs[t].proc)(r, t, bufp, szp,
1082 ln, ppos, pos, offs));
1083 }
36342e81 1084
f88b6c16 1085 /* Always check for the closing delimiter `\}'. */
36342e81 1086
f88b6c16
FF
1087 ep = &(*bufp)[pos];
1088 while (NULL != (ep = strchr(ep, '\\'))) {
1089 if ('}' != *(++ep))
1090 continue;
80387638 1091
f88b6c16
FF
1092 /*
1093 * If we're at the end of line, then just chop
1094 * off the \} and resize the buffer.
1095 * If we aren't, then convert it to spaces.
1096 */
a4c7eb57 1097
f88b6c16
FF
1098 if ('\0' == *(ep + 1)) {
1099 *--ep = '\0';
1100 *szp -= 2;
1101 } else
1102 *(ep - 1) = *ep = ' ';
80387638 1103
f88b6c16
FF
1104 roff_ccond(r, ROFF_ccond, bufp, szp,
1105 ln, pos, pos + 2, offs);
1106 break;
1107 }
1108 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
80387638
SW
1109}
1110
80387638
SW
1111/* ARGSUSED */
1112static enum rofferr
1113roff_cond_text(ROFF_ARGS)
1114{
a4c7eb57 1115 char *ep;
80387638
SW
1116 enum roffrule rr;
1117
1118 rr = r->last->rule;
a4c7eb57 1119 roffnode_cleanscope(r);
80387638 1120
a4c7eb57
SW
1121 ep = &(*bufp)[pos];
1122 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1123 ep++;
1124 if ('}' != *ep)
1125 continue;
1126 *ep = '&';
1127 roff_ccond(r, ROFF_ccond, bufp, szp,
1128 ln, pos, pos + 2, offs);
80387638 1129 }
80387638
SW
1130 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1131}
1132
7888c61d
FF
1133static int
1134roff_getnum(const char *v, int *pos, int *res)
1135{
1136 int p, n;
1137
1138 p = *pos;
1139 n = v[p] == '-';
1140 if (n)
1141 p++;
1142
1143 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1144 *res += 10 * *res + v[p] - '0';
1145 if (p == *pos + n)
1146 return 0;
1147
1148 if (n)
1149 *res = -*res;
1150
1151 *pos = p;
1152 return 1;
1153}
1154
1155static int
1156roff_getop(const char *v, int *pos, char *res)
1157{
1158 int e;
1159
1160 *res = v[*pos];
1161 e = v[*pos + 1] == '=';
1162
1163 switch (*res) {
1164 case '=':
1165 break;
1166 case '>':
1167 if (e)
1168 *res = 'g';
1169 break;
1170 case '<':
1171 if (e)
1172 *res = 'l';
1173 break;
1174 default:
1175 return(0);
1176 }
1177
1178 *pos += 1 + e;
1179
1180 return(*res);
1181}
1182
80387638
SW
1183static enum roffrule
1184roff_evalcond(const char *v, int *pos)
1185{
7888c61d
FF
1186 int not, lh, rh;
1187 char op;
80387638
SW
1188
1189 switch (v[*pos]) {
1190 case ('n'):
1191 (*pos)++;
1192 return(ROFFRULE_ALLOW);
1193 case ('e'):
1194 /* FALLTHROUGH */
1195 case ('o'):
1196 /* FALLTHROUGH */
1197 case ('t'):
1198 (*pos)++;
1199 return(ROFFRULE_DENY);
7888c61d
FF
1200 case ('!'):
1201 (*pos)++;
1202 not = 1;
1203 break;
80387638 1204 default:
7888c61d 1205 not = 0;
80387638
SW
1206 break;
1207 }
1208
7888c61d
FF
1209 if (!roff_getnum(v, pos, &lh))
1210 return ROFFRULE_DENY;
1211 if (!roff_getop(v, pos, &op)) {
1212 if (lh < 0)
1213 lh = 0;
1214 goto out;
1215 }
1216 if (!roff_getnum(v, pos, &rh))
1217 return ROFFRULE_DENY;
1218 switch (op) {
1219 case 'g':
1220 lh = lh >= rh;
1221 break;
1222 case 'l':
1223 lh = lh <= rh;
1224 break;
1225 case '=':
1226 lh = lh == rh;
1227 break;
1228 case '>':
1229 lh = lh > rh;
1230 break;
1231 case '<':
1232 lh = lh < rh;
1233 break;
1234 default:
1235 return ROFFRULE_DENY;
1236 }
1237out:
1238 if (not)
1239 lh = !lh;
1240 return lh ? ROFFRULE_ALLOW : ROFFRULE_DENY;
80387638
SW
1241}
1242
1243/* ARGSUSED */
1244static enum rofferr
1245roff_line_ignore(ROFF_ARGS)
1246{
1247
80387638
SW
1248 return(ROFF_IGN);
1249}
1250
1251/* ARGSUSED */
1252static enum rofferr
1253roff_cond(ROFF_ARGS)
1254{
f88b6c16
FF
1255
1256 roffnode_push(r, tok, NULL, ln, ppos);
80387638 1257
a4c7eb57
SW
1258 /*
1259 * An `.el' has no conditional body: it will consume the value
1260 * of the current rstack entry set in prior `ie' calls or
1261 * defaults to DENY.
1262 *
1263 * If we're not an `el', however, then evaluate the conditional.
1264 */
80387638 1265
f88b6c16 1266 r->last->rule = ROFF_el == tok ?
a4c7eb57
SW
1267 (r->rstackpos < 0 ?
1268 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1269 roff_evalcond(*bufp, &pos);
80387638 1270
a4c7eb57
SW
1271 /*
1272 * An if-else will put the NEGATION of the current evaluated
1273 * conditional into the stack of rules.
1274 */
1275
80387638 1276 if (ROFF_ie == tok) {
a4c7eb57
SW
1277 if (r->rstackpos == RSTACK_MAX - 1) {
1278 mandoc_msg(MANDOCERR_MEM,
1279 r->parse, ln, ppos, NULL);
1280 return(ROFF_ERR);
1281 }
1282 r->rstack[++r->rstackpos] =
1283 ROFFRULE_DENY == r->last->rule ?
1284 ROFFRULE_ALLOW : ROFFRULE_DENY;
80387638
SW
1285 }
1286
1287 /* If the parent has false as its rule, then so do we. */
1288
1289 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1290 r->last->rule = ROFFRULE_DENY;
1291
1292 /*
f88b6c16
FF
1293 * Determine scope.
1294 * If there is nothing on the line after the conditional,
1295 * not even whitespace, use next-line scope.
80387638
SW
1296 */
1297
f88b6c16
FF
1298 if ('\0' == (*bufp)[pos]) {
1299 r->last->endspan = 2;
1300 goto out;
1301 }
1302
1303 while (' ' == (*bufp)[pos])
1304 pos++;
1305
1306 /* An opening brace requests multiline scope. */
80387638
SW
1307
1308 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1309 r->last->endspan = -1;
1310 pos += 2;
f88b6c16 1311 goto out;
80387638
SW
1312 }
1313
1314 /*
f88b6c16
FF
1315 * Anything else following the conditional causes
1316 * single-line scope. Warn if the scope contains
1317 * nothing but trailing whitespace.
80387638
SW
1318 */
1319
1320 if ('\0' == (*bufp)[pos])
f88b6c16 1321 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
80387638 1322
f88b6c16 1323 r->last->endspan = 1;
80387638 1324
f88b6c16 1325out:
80387638
SW
1326 *offs = pos;
1327 return(ROFF_RERUN);
1328}
1329
1330
1331/* ARGSUSED */
1332static enum rofferr
1333roff_ds(ROFF_ARGS)
1334{
1335 char *name, *string;
1336
1337 /*
1338 * A symbol is named by the first word following the macro
1339 * invocation up to a space. Its value is anything after the
1340 * name's trailing whitespace and optional double-quote. Thus,
1341 *
1342 * [.ds foo "bar " ]
1343 *
1344 * will have `bar " ' as its value.
1345 */
1346
60e1e752
SW
1347 string = *bufp + pos;
1348 name = roff_getname(r, &string, ln, pos);
80387638
SW
1349 if ('\0' == *name)
1350 return(ROFF_IGN);
1351
60e1e752
SW
1352 /* Read past initial double-quote. */
1353 if ('"' == *string)
80387638
SW
1354 string++;
1355
1356 /* The rest is the value. */
1357 roff_setstr(r, name, string, 0);
1358 return(ROFF_IGN);
1359}
1360
7888c61d
FF
1361void
1362roff_setreg(struct roff *r, const char *name, int val, char sign)
1363{
1364 struct roffreg *reg;
1365
1366 /* Search for an existing register with the same name. */
1367 reg = r->regtab;
1368
1369 while (reg && strcmp(name, reg->key.p))
1370 reg = reg->next;
1371
1372 if (NULL == reg) {
1373 /* Create a new register. */
1374 reg = mandoc_malloc(sizeof(struct roffreg));
1375 reg->key.p = mandoc_strdup(name);
1376 reg->key.sz = strlen(name);
1377 reg->val = 0;
1378 reg->next = r->regtab;
1379 r->regtab = reg;
1380 }
1381
1382 if ('+' == sign)
1383 reg->val += val;
1384 else if ('-' == sign)
1385 reg->val -= val;
1386 else
1387 reg->val = val;
1388}
1389
36342e81 1390int
7888c61d 1391roff_getreg(const struct roff *r, const char *name)
36342e81 1392{
7888c61d 1393 struct roffreg *reg;
36342e81 1394
7888c61d
FF
1395 for (reg = r->regtab; reg; reg = reg->next)
1396 if (0 == strcmp(name, reg->key.p))
1397 return(reg->val);
1398
1399 return(0);
36342e81
SW
1400}
1401
7888c61d
FF
1402static int
1403roff_getregn(const struct roff *r, const char *name, size_t len)
36342e81 1404{
7888c61d 1405 struct roffreg *reg;
36342e81 1406
7888c61d
FF
1407 for (reg = r->regtab; reg; reg = reg->next)
1408 if (len == reg->key.sz &&
1409 0 == strncmp(name, reg->key.p, len))
1410 return(reg->val);
1411
1412 return(0);
36342e81
SW
1413}
1414
7888c61d
FF
1415static void
1416roff_freereg(struct roffreg *reg)
36342e81 1417{
7888c61d 1418 struct roffreg *old_reg;
36342e81 1419
7888c61d
FF
1420 while (NULL != reg) {
1421 free(reg->key.p);
1422 old_reg = reg;
1423 reg = reg->next;
1424 free(old_reg);
1425 }
36342e81 1426}
80387638
SW
1427
1428/* ARGSUSED */
1429static enum rofferr
1430roff_nr(ROFF_ARGS)
1431{
60e1e752
SW
1432 const char *key;
1433 char *val;
7888c61d 1434 size_t sz;
a4c7eb57 1435 int iv;
7888c61d 1436 char sign;
80387638 1437
60e1e752
SW
1438 val = *bufp + pos;
1439 key = roff_getname(r, &val, ln, pos);
80387638 1440
7888c61d
FF
1441 sign = *val;
1442 if ('+' == sign || '-' == sign)
1443 val++;
1444
1445 sz = strspn(val, "0123456789");
1446 iv = sz ? mandoc_strntoi(val, sz, 10) : 0;
1447
1448 roff_setreg(r, key, iv, sign);
80387638
SW
1449
1450 return(ROFF_IGN);
1451}
1452
1453/* ARGSUSED */
1454static enum rofferr
60e1e752
SW
1455roff_rm(ROFF_ARGS)
1456{
1457 const char *name;
1458 char *cp;
1459
1460 cp = *bufp + pos;
1461 while ('\0' != *cp) {
1462 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1463 if ('\0' != *name)
1464 roff_setstr(r, name, NULL, 0);
1465 }
1466 return(ROFF_IGN);
1467}
1468
1469/* ARGSUSED */
1470static enum rofferr
f88b6c16
FF
1471roff_it(ROFF_ARGS)
1472{
1473 char *cp;
1474 size_t len;
1475 int iv;
1476
1477 /* Parse the number of lines. */
1478 cp = *bufp + pos;
1479 len = strcspn(cp, " \t");
1480 cp[len] = '\0';
1481 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1482 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1483 ln, ppos, *bufp + 1);
1484 return(ROFF_IGN);
1485 }
1486 cp += len + 1;
1487
1488 /* Arm the input line trap. */
1489 roffit_lines = iv;
1490 roffit_macro = mandoc_strdup(cp);
1491 return(ROFF_IGN);
1492}
1493
1494/* ARGSUSED */
1495static enum rofferr
1496roff_Dd(ROFF_ARGS)
1497{
1498 const char *const *cp;
1499
1500 if (MPARSE_MDOC != r->parsetype)
1501 for (cp = __mdoc_reserved; *cp; cp++)
1502 roff_setstr(r, *cp, NULL, 0);
1503
1504 return(ROFF_CONT);
1505}
1506
1507/* ARGSUSED */
1508static enum rofferr
1509roff_TH(ROFF_ARGS)
1510{
1511 const char *const *cp;
1512
1513 if (MPARSE_MDOC != r->parsetype)
1514 for (cp = __man_reserved; *cp; cp++)
1515 roff_setstr(r, *cp, NULL, 0);
1516
1517 return(ROFF_CONT);
1518}
1519
1520/* ARGSUSED */
1521static enum rofferr
80387638
SW
1522roff_TE(ROFF_ARGS)
1523{
1524
1525 if (NULL == r->tbl)
60e1e752 1526 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
80387638 1527 else
36342e81 1528 tbl_end(&r->tbl);
80387638 1529
80387638
SW
1530 return(ROFF_IGN);
1531}
1532
1533/* ARGSUSED */
1534static enum rofferr
1535roff_T_(ROFF_ARGS)
1536{
1537
1538 if (NULL == r->tbl)
60e1e752 1539 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
80387638
SW
1540 else
1541 tbl_restart(ppos, ln, r->tbl);
1542
1543 return(ROFF_IGN);
1544}
1545
36342e81
SW
1546#if 0
1547static int
1548roff_closeeqn(struct roff *r)
60e1e752 1549{
36342e81
SW
1550
1551 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1552}
1553#endif
1554
1555static void
1556roff_openeqn(struct roff *r, const char *name, int line,
1557 int offs, const char *buf)
1558{
1559 struct eqn_node *e;
1560 int poff;
60e1e752
SW
1561
1562 assert(NULL == r->eqn);
36342e81 1563 e = eqn_alloc(name, offs, line, r->parse);
60e1e752
SW
1564
1565 if (r->last_eqn)
1566 r->last_eqn->next = e;
1567 else
1568 r->first_eqn = r->last_eqn = e;
1569
1570 r->eqn = r->last_eqn = e;
36342e81
SW
1571
1572 if (buf) {
1573 poff = 0;
1574 eqn_read(&r->eqn, line, buf, offs, &poff);
1575 }
1576}
1577
1578/* ARGSUSED */
1579static enum rofferr
1580roff_EQ(ROFF_ARGS)
1581{
1582
1583 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
60e1e752
SW
1584 return(ROFF_IGN);
1585}
1586
1587/* ARGSUSED */
1588static enum rofferr
1589roff_EN(ROFF_ARGS)
1590{
1591
1592 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1593 return(ROFF_IGN);
1594}
1595
1596/* ARGSUSED */
1597static enum rofferr
80387638
SW
1598roff_TS(ROFF_ARGS)
1599{
f88b6c16 1600 struct tbl_node *tbl;
80387638
SW
1601
1602 if (r->tbl) {
60e1e752 1603 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
36342e81 1604 tbl_end(&r->tbl);
80387638
SW
1605 }
1606
f88b6c16 1607 tbl = tbl_alloc(ppos, ln, r->parse);
80387638
SW
1608
1609 if (r->last_tbl)
f88b6c16 1610 r->last_tbl->next = tbl;
80387638 1611 else
f88b6c16
FF
1612 r->first_tbl = r->last_tbl = tbl;
1613
1614 r->tbl = r->last_tbl = tbl;
1615 return(ROFF_IGN);
1616}
1617
1618/* ARGSUSED */
1619static enum rofferr
1620roff_cc(ROFF_ARGS)
1621{
1622 const char *p;
1623
1624 p = *bufp + pos;
1625
1626 if ('\0' == *p || '.' == (r->control = *p++))
1627 r->control = 0;
1628
1629 if ('\0' != *p)
1630 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
80387638 1631
80387638
SW
1632 return(ROFF_IGN);
1633}
1634
1635/* ARGSUSED */
1636static enum rofferr
36342e81
SW
1637roff_tr(ROFF_ARGS)
1638{
1639 const char *p, *first, *second;
1640 size_t fsz, ssz;
1641 enum mandoc_esc esc;
1642
1643 p = *bufp + pos;
1644
1645 if ('\0' == *p) {
1646 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1647 return(ROFF_IGN);
1648 }
1649
1650 while ('\0' != *p) {
1651 fsz = ssz = 1;
1652
1653 first = p++;
1654 if ('\\' == *first) {
1655 esc = mandoc_escape(&p, NULL, NULL);
1656 if (ESCAPE_ERROR == esc) {
1657 mandoc_msg
1658 (MANDOCERR_BADESCAPE, r->parse,
1659 ln, (int)(p - *bufp), NULL);
1660 return(ROFF_IGN);
1661 }
1662 fsz = (size_t)(p - first);
1663 }
1664
1665 second = p++;
1666 if ('\\' == *second) {
1667 esc = mandoc_escape(&p, NULL, NULL);
1668 if (ESCAPE_ERROR == esc) {
1669 mandoc_msg
1670 (MANDOCERR_BADESCAPE, r->parse,
1671 ln, (int)(p - *bufp), NULL);
1672 return(ROFF_IGN);
1673 }
1674 ssz = (size_t)(p - second);
1675 } else if ('\0' == *second) {
1676 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1677 ln, (int)(p - *bufp), NULL);
1678 second = " ";
1679 p--;
1680 }
1681
1682 if (fsz > 1) {
1683 roff_setstrn(&r->xmbtab, first,
1684 fsz, second, ssz, 0);
1685 continue;
1686 }
1687
1688 if (NULL == r->xtab)
1689 r->xtab = mandoc_calloc
1690 (128, sizeof(struct roffstr));
1691
1692 free(r->xtab[(int)*first].p);
1693 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1694 r->xtab[(int)*first].sz = ssz;
1695 }
1696
1697 return(ROFF_IGN);
1698}
1699
1700/* ARGSUSED */
1701static enum rofferr
80387638
SW
1702roff_so(ROFF_ARGS)
1703{
1704 char *name;
1705
60e1e752 1706 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
80387638
SW
1707
1708 /*
1709 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1710 * opening anything that's not in our cwd or anything beneath
1711 * it. Thus, explicitly disallow traversing up the file-system
1712 * or using absolute paths.
1713 */
1714
1715 name = *bufp + pos;
1716 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
60e1e752 1717 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
80387638
SW
1718 return(ROFF_ERR);
1719 }
1720
1721 *offs = pos;
1722 return(ROFF_SO);
1723}
1724
1725/* ARGSUSED */
1726static enum rofferr
1727roff_userdef(ROFF_ARGS)
1728{
1729 const char *arg[9];
1730 char *cp, *n1, *n2;
1731 int i;
1732
1733 /*
1734 * Collect pointers to macro argument strings
7888c61d 1735 * and NUL-terminate them.
80387638
SW
1736 */
1737 cp = *bufp + pos;
1738 for (i = 0; i < 9; i++)
1739 arg[i] = '\0' == *cp ? "" :
60e1e752 1740 mandoc_getarg(r->parse, &cp, ln, &pos);
80387638
SW
1741
1742 /*
1743 * Expand macro arguments.
1744 */
1745 *szp = 0;
1746 n1 = cp = mandoc_strdup(r->current_string);
1747 while (NULL != (cp = strstr(cp, "\\$"))) {
1748 i = cp[2] - '1';
1749 if (0 > i || 8 < i) {
1750 /* Not an argument invocation. */
1751 cp += 2;
1752 continue;
1753 }
1754
1755 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1756 n2 = mandoc_malloc(*szp);
1757
1758 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1759 strlcat(n2, arg[i], *szp);
1760 strlcat(n2, cp + 3, *szp);
1761
1762 cp = n2 + (cp - n1);
1763 free(n1);
1764 n1 = n2;
1765 }
1766
1767 /*
1768 * Replace the macro invocation
1769 * by the expanded macro.
1770 */
1771 free(*bufp);
1772 *bufp = n1;
1773 if (0 == *szp)
1774 *szp = strlen(*bufp) + 1;
1775
1776 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1777 ROFF_REPARSE : ROFF_APPEND);
1778}
1779
60e1e752
SW
1780static char *
1781roff_getname(struct roff *r, char **cpp, int ln, int pos)
1782{
1783 char *name, *cp;
1784
1785 name = *cpp;
1786 if ('\0' == *name)
1787 return(name);
1788
1789 /* Read until end of name. */
1790 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1791 if ('\\' != *cp)
1792 continue;
1793 cp++;
1794 if ('\\' == *cp)
1795 continue;
1796 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1797 *cp = '\0';
1798 name = cp;
1799 }
1800
1801 /* Nil-terminate name. */
1802 if ('\0' != *cp)
1803 *(cp++) = '\0';
1804
1805 /* Read past spaces. */
1806 while (' ' == *cp)
1807 cp++;
1808
1809 *cpp = cp;
1810 return(name);
1811}
1812
80387638
SW
1813/*
1814 * Store *string into the user-defined string called *name.
1815 * In multiline mode, append to an existing entry and append '\n';
1816 * else replace the existing entry, if there is one.
1817 * To clear an existing entry, call with (*r, *name, NULL, 0).
1818 */
1819static void
1820roff_setstr(struct roff *r, const char *name, const char *string,
1821 int multiline)
1822{
36342e81
SW
1823
1824 roff_setstrn(&r->strtab, name, strlen(name), string,
1825 string ? strlen(string) : 0, multiline);
1826}
1827
1828static void
1829roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1830 const char *string, size_t stringsz, int multiline)
1831{
1832 struct roffkv *n;
1833 char *c;
1834 int i;
1835 size_t oldch, newch;
80387638
SW
1836
1837 /* Search for an existing string with the same name. */
36342e81
SW
1838 n = *r;
1839
1840 while (n && strcmp(name, n->key.p))
80387638
SW
1841 n = n->next;
1842
1843 if (NULL == n) {
1844 /* Create a new string table entry. */
36342e81
SW
1845 n = mandoc_malloc(sizeof(struct roffkv));
1846 n->key.p = mandoc_strndup(name, namesz);
1847 n->key.sz = namesz;
1848 n->val.p = NULL;
1849 n->val.sz = 0;
1850 n->next = *r;
1851 *r = n;
80387638
SW
1852 } else if (0 == multiline) {
1853 /* In multiline mode, append; else replace. */
36342e81
SW
1854 free(n->val.p);
1855 n->val.p = NULL;
1856 n->val.sz = 0;
80387638
SW
1857 }
1858
1859 if (NULL == string)
1860 return;
1861
1862 /*
1863 * One additional byte for the '\n' in multiline mode,
1864 * and one for the terminating '\0'.
1865 */
36342e81
SW
1866 newch = stringsz + (multiline ? 2u : 1u);
1867
1868 if (NULL == n->val.p) {
1869 n->val.p = mandoc_malloc(newch);
1870 *n->val.p = '\0';
80387638
SW
1871 oldch = 0;
1872 } else {
36342e81
SW
1873 oldch = n->val.sz;
1874 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
80387638
SW
1875 }
1876
1877 /* Skip existing content in the destination buffer. */
36342e81 1878 c = n->val.p + (int)oldch;
80387638
SW
1879
1880 /* Append new content to the destination buffer. */
36342e81
SW
1881 i = 0;
1882 while (i < (int)stringsz) {
80387638
SW
1883 /*
1884 * Rudimentary roff copy mode:
1885 * Handle escaped backslashes.
1886 */
36342e81
SW
1887 if ('\\' == string[i] && '\\' == string[i + 1])
1888 i++;
1889 *c++ = string[i++];
80387638
SW
1890 }
1891
1892 /* Append terminating bytes. */
1893 if (multiline)
1894 *c++ = '\n';
36342e81 1895
80387638 1896 *c = '\0';
36342e81 1897 n->val.sz = (int)(c - n->val.p);
80387638
SW
1898}
1899
80387638
SW
1900static const char *
1901roff_getstrn(const struct roff *r, const char *name, size_t len)
1902{
36342e81 1903 const struct roffkv *n;
80387638 1904
36342e81
SW
1905 for (n = r->strtab; n; n = n->next)
1906 if (0 == strncmp(name, n->key.p, len) &&
1907 '\0' == n->key.p[(int)len])
1908 return(n->val.p);
80387638 1909
36342e81 1910 return(NULL);
80387638
SW
1911}
1912
80387638 1913static void
36342e81 1914roff_freestr(struct roffkv *r)
80387638 1915{
36342e81 1916 struct roffkv *n, *nn;
80387638 1917
36342e81
SW
1918 for (n = r; n; n = nn) {
1919 free(n->key.p);
1920 free(n->val.p);
80387638
SW
1921 nn = n->next;
1922 free(n);
1923 }
80387638
SW
1924}
1925
1926const struct tbl_span *
1927roff_span(const struct roff *r)
1928{
1929
1930 return(r->tbl ? tbl_span(r->tbl) : NULL);
1931}
60e1e752
SW
1932
1933const struct eqn *
1934roff_eqn(const struct roff *r)
1935{
1936
1937 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1938}
36342e81
SW
1939
1940/*
1941 * Duplicate an input string, making the appropriate character
1942 * conversations (as stipulated by `tr') along the way.
1943 * Returns a heap-allocated string with all the replacements made.
1944 */
1945char *
1946roff_strdup(const struct roff *r, const char *p)
1947{
1948 const struct roffkv *cp;
1949 char *res;
1950 const char *pp;
1951 size_t ssz, sz;
1952 enum mandoc_esc esc;
1953
1954 if (NULL == r->xmbtab && NULL == r->xtab)
1955 return(mandoc_strdup(p));
1956 else if ('\0' == *p)
1957 return(mandoc_strdup(""));
1958
1959 /*
1960 * Step through each character looking for term matches
1961 * (remember that a `tr' can be invoked with an escape, which is
1962 * a glyph but the escape is multi-character).
1963 * We only do this if the character hash has been initialised
1964 * and the string is >0 length.
1965 */
1966
1967 res = NULL;
1968 ssz = 0;
1969
1970 while ('\0' != *p) {
1971 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
1972 sz = r->xtab[(int)*p].sz;
1973 res = mandoc_realloc(res, ssz + sz + 1);
1974 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
1975 ssz += sz;
1976 p++;
1977 continue;
1978 } else if ('\\' != *p) {
1979 res = mandoc_realloc(res, ssz + 2);
1980 res[ssz++] = *p++;
1981 continue;
1982 }
1983
1984 /* Search for term matches. */
1985 for (cp = r->xmbtab; cp; cp = cp->next)
1986 if (0 == strncmp(p, cp->key.p, cp->key.sz))
1987 break;
1988
1989 if (NULL != cp) {
1990 /*
1991 * A match has been found.
1992 * Append the match to the array and move
1993 * forward by its keysize.
1994 */
1995 res = mandoc_realloc
1996 (res, ssz + cp->val.sz + 1);
1997 memcpy(res + ssz, cp->val.p, cp->val.sz);
1998 ssz += cp->val.sz;
1999 p += (int)cp->key.sz;
2000 continue;
2001 }
2002
2003 /*
2004 * Handle escapes carefully: we need to copy
2005 * over just the escape itself, or else we might
2006 * do replacements within the escape itself.
2007 * Make sure to pass along the bogus string.
2008 */
2009 pp = p++;
2010 esc = mandoc_escape(&p, NULL, NULL);
2011 if (ESCAPE_ERROR == esc) {
2012 sz = strlen(pp);
2013 res = mandoc_realloc(res, ssz + sz + 1);
2014 memcpy(res + ssz, pp, sz);
2015 break;
2016 }
2017 /*
2018 * We bail out on bad escapes.
2019 * No need to warn: we already did so when
2020 * roff_res() was called.
2021 */
2022 sz = (int)(p - pp);
2023 res = mandoc_realloc(res, ssz + sz + 1);
2024 memcpy(res + ssz, pp, sz);
2025 ssz += sz;
2026 }
2027
2028 res[(int)ssz] = '\0';
2029 return(res);
2030}
f88b6c16
FF
2031
2032/*
2033 * Find out whether a line is a macro line or not.
2034 * If it is, adjust the current position and return one; if it isn't,
2035 * return zero and don't change the current position.
2036 * If the control character has been set with `.cc', then let that grain
2037 * precedence.
2038 * This is slighly contrary to groff, where using the non-breaking
2039 * control character when `cc' has been invoked will cause the
2040 * non-breaking macro contents to be printed verbatim.
2041 */
2042int
2043roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2044{
2045 int pos;
2046
2047 pos = *ppos;
2048
2049 if (0 != r->control && cp[pos] == r->control)
2050 pos++;
2051 else if (0 != r->control)
2052 return(0);
2053 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2054 pos += 2;
2055 else if ('.' == cp[pos] || '\'' == cp[pos])
2056 pos++;
2057 else
2058 return(0);
2059
2060 while (' ' == cp[pos] || '\t' == cp[pos])
2061 pos++;
2062
2063 *ppos = pos;
2064 return(1);
2065}