Import mdocml-1.12.2
[dragonfly.git] / contrib / mdocml / roff.c
CommitLineData
f88b6c16 1/* $Id: roff.c,v 1.178 2013/07/13 12:52:07 schwarze Exp $ */
80387638 2/*
f88b6c16
FF
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
80387638
SW
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#ifdef HAVE_CONFIG_H
19#include "config.h"
20#endif
21
22#include <assert.h>
80387638 23#include <ctype.h>
f88b6c16 24#include <stdio.h>
80387638
SW
25#include <stdlib.h>
26#include <string.h>
80387638
SW
27
28#include "mandoc.h"
80387638
SW
29#include "libroff.h"
30#include "libmandoc.h"
31
a4c7eb57 32/* Maximum number of nested if-else conditionals. */
80387638
SW
33#define RSTACK_MAX 128
34
36342e81
SW
35/* Maximum number of string expansions per line, to break infinite loops. */
36#define EXPAND_LIMIT 1000
37
80387638
SW
38enum rofft {
39 ROFF_ad,
40 ROFF_am,
41 ROFF_ami,
42 ROFF_am1,
f88b6c16 43 ROFF_cc,
80387638
SW
44 ROFF_de,
45 ROFF_dei,
46 ROFF_de1,
47 ROFF_ds,
48 ROFF_el,
49 ROFF_hy,
50 ROFF_ie,
51 ROFF_if,
52 ROFF_ig,
60e1e752 53 ROFF_it,
80387638
SW
54 ROFF_ne,
55 ROFF_nh,
56 ROFF_nr,
60e1e752
SW
57 ROFF_ns,
58 ROFF_ps,
80387638
SW
59 ROFF_rm,
60 ROFF_so,
60e1e752 61 ROFF_ta,
80387638 62 ROFF_tr,
f88b6c16
FF
63 ROFF_Dd,
64 ROFF_TH,
80387638
SW
65 ROFF_TS,
66 ROFF_TE,
67 ROFF_T_,
60e1e752
SW
68 ROFF_EQ,
69 ROFF_EN,
80387638 70 ROFF_cblock,
a4c7eb57 71 ROFF_ccond,
80387638
SW
72 ROFF_USERDEF,
73 ROFF_MAX
74};
75
76enum roffrule {
77 ROFFRULE_ALLOW,
78 ROFFRULE_DENY
79};
80
36342e81
SW
81/*
82 * A single register entity. If "set" is zero, the value of the
83 * register should be the default one, which is per-register.
84 * Registers are assumed to be unsigned ints for now.
85 */
86struct reg {
87 int set; /* whether set or not */
88 unsigned int u; /* unsigned integer */
89};
90
91/*
92 * An incredibly-simple string buffer.
93 */
80387638 94struct roffstr {
36342e81
SW
95 char *p; /* nil-terminated buffer */
96 size_t sz; /* saved strlen(p) */
97};
98
99/*
100 * A key-value roffstr pair as part of a singly-linked list.
101 */
102struct roffkv {
103 struct roffstr key;
104 struct roffstr val;
105 struct roffkv *next; /* next in list */
80387638
SW
106};
107
108struct roff {
f88b6c16 109 enum mparset parsetype; /* requested parse type */
60e1e752 110 struct mparse *parse; /* parse point */
80387638 111 struct roffnode *last; /* leaf of stack */
80387638 112 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
f88b6c16 113 char control; /* control character */
80387638 114 int rstackpos; /* position in rstack */
36342e81
SW
115 struct reg regs[REG__MAX];
116 struct roffkv *strtab; /* user-defined strings & macros */
117 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
118 struct roffstr *xtab; /* single-byte trans table (`tr') */
80387638
SW
119 const char *current_string; /* value of last called user macro */
120 struct tbl_node *first_tbl; /* first table parsed */
121 struct tbl_node *last_tbl; /* last table parsed */
122 struct tbl_node *tbl; /* current table being parsed */
60e1e752
SW
123 struct eqn_node *last_eqn; /* last equation parsed */
124 struct eqn_node *first_eqn; /* first equation parsed */
125 struct eqn_node *eqn; /* current equation being parsed */
80387638
SW
126};
127
128struct roffnode {
129 enum rofft tok; /* type of node */
130 struct roffnode *parent; /* up one in stack */
131 int line; /* parse line */
132 int col; /* parse col */
133 char *name; /* node name, e.g. macro name */
134 char *end; /* end-rules: custom token */
135 int endspan; /* end-rules: next-line or infty */
136 enum roffrule rule; /* current evaluation rule */
137};
138
139#define ROFF_ARGS struct roff *r, /* parse ctx */ \
140 enum rofft tok, /* tok of macro */ \
141 char **bufp, /* input buffer */ \
142 size_t *szp, /* size of input buffer */ \
143 int ln, /* parse line */ \
144 int ppos, /* original pos in buffer */ \
145 int pos, /* current pos in buffer */ \
146 int *offs /* reset offset of buffer data */
147
148typedef enum rofferr (*roffproc)(ROFF_ARGS);
149
150struct roffmac {
151 const char *name; /* macro name */
152 roffproc proc; /* process new macro */
153 roffproc text; /* process as child text of macro */
154 roffproc sub; /* process as child of macro */
155 int flags;
156#define ROFFMAC_STRUCT (1 << 0) /* always interpret */
157 struct roffmac *next;
158};
159
a4c7eb57
SW
160struct predef {
161 const char *name; /* predefined input name */
162 const char *str; /* replacement symbol */
163};
164
165#define PREDEF(__name, __str) \
166 { (__name), (__str) },
167
36342e81
SW
168static enum rofft roffhash_find(const char *, size_t);
169static void roffhash_init(void);
170static void roffnode_cleanscope(struct roff *);
171static void roffnode_pop(struct roff *);
172static void roffnode_push(struct roff *, enum rofft,
173 const char *, int, int);
80387638
SW
174static enum rofferr roff_block(ROFF_ARGS);
175static enum rofferr roff_block_text(ROFF_ARGS);
176static enum rofferr roff_block_sub(ROFF_ARGS);
177static enum rofferr roff_cblock(ROFF_ARGS);
f88b6c16 178static enum rofferr roff_cc(ROFF_ARGS);
80387638
SW
179static enum rofferr roff_ccond(ROFF_ARGS);
180static enum rofferr roff_cond(ROFF_ARGS);
181static enum rofferr roff_cond_text(ROFF_ARGS);
182static enum rofferr roff_cond_sub(ROFF_ARGS);
183static enum rofferr roff_ds(ROFF_ARGS);
184static enum roffrule roff_evalcond(const char *, int *);
36342e81
SW
185static void roff_free1(struct roff *);
186static void roff_freestr(struct roffkv *);
60e1e752 187static char *roff_getname(struct roff *, char **, int, int);
80387638
SW
188static const char *roff_getstrn(const struct roff *,
189 const char *, size_t);
f88b6c16 190static enum rofferr roff_it(ROFF_ARGS);
80387638 191static enum rofferr roff_line_ignore(ROFF_ARGS);
80387638 192static enum rofferr roff_nr(ROFF_ARGS);
36342e81
SW
193static void roff_openeqn(struct roff *, const char *,
194 int, int, const char *);
195static enum rofft roff_parse(struct roff *, const char *, int *);
f88b6c16 196static enum rofferr roff_parsetext(char **, size_t *, int, int *);
36342e81 197static enum rofferr roff_res(struct roff *,
a4c7eb57 198 char **, size_t *, int, int);
60e1e752 199static enum rofferr roff_rm(ROFF_ARGS);
80387638
SW
200static void roff_setstr(struct roff *,
201 const char *, const char *, int);
36342e81
SW
202static void roff_setstrn(struct roffkv **, const char *,
203 size_t, const char *, size_t, int);
80387638 204static enum rofferr roff_so(ROFF_ARGS);
36342e81 205static enum rofferr roff_tr(ROFF_ARGS);
f88b6c16
FF
206static enum rofferr roff_Dd(ROFF_ARGS);
207static enum rofferr roff_TH(ROFF_ARGS);
80387638
SW
208static enum rofferr roff_TE(ROFF_ARGS);
209static enum rofferr roff_TS(ROFF_ARGS);
60e1e752
SW
210static enum rofferr roff_EQ(ROFF_ARGS);
211static enum rofferr roff_EN(ROFF_ARGS);
80387638
SW
212static enum rofferr roff_T_(ROFF_ARGS);
213static enum rofferr roff_userdef(ROFF_ARGS);
214
36342e81 215/* See roffhash_find() */
80387638
SW
216
217#define ASCII_HI 126
218#define ASCII_LO 33
219#define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
220
221static struct roffmac *hash[HASHWIDTH];
222
223static struct roffmac roffs[ROFF_MAX] = {
224 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
225 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
226 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
227 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
f88b6c16 228 { "cc", roff_cc, NULL, NULL, 0, NULL },
80387638
SW
229 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
230 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
231 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
232 { "ds", roff_ds, NULL, NULL, 0, NULL },
233 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
234 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
235 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
236 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
237 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
f88b6c16 238 { "it", roff_it, NULL, NULL, 0, NULL },
80387638
SW
239 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
240 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
241 { "nr", roff_nr, NULL, NULL, 0, NULL },
60e1e752
SW
242 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
243 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
244 { "rm", roff_rm, NULL, NULL, 0, NULL },
80387638 245 { "so", roff_so, NULL, NULL, 0, NULL },
60e1e752 246 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
36342e81 247 { "tr", roff_tr, NULL, NULL, 0, NULL },
f88b6c16
FF
248 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
249 { "TH", roff_TH, NULL, NULL, 0, NULL },
80387638
SW
250 { "TS", roff_TS, NULL, NULL, 0, NULL },
251 { "TE", roff_TE, NULL, NULL, 0, NULL },
252 { "T&", roff_T_, NULL, NULL, 0, NULL },
60e1e752
SW
253 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
254 { "EN", roff_EN, NULL, NULL, 0, NULL },
80387638
SW
255 { ".", roff_cblock, NULL, NULL, 0, NULL },
256 { "\\}", roff_ccond, NULL, NULL, 0, NULL },
257 { NULL, roff_userdef, NULL, NULL, 0, NULL },
258};
259
f88b6c16
FF
260const char *const __mdoc_reserved[] = {
261 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
262 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
263 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
264 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
265 "Ds", "Dt", "Dv", "Dx", "D1",
266 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
267 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
268 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
269 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
270 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
271 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
272 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
273 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
274 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
275 "Ss", "St", "Sx", "Sy",
276 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
277 "%A", "%B", "%D", "%I", "%J", "%N", "%O",
278 "%P", "%Q", "%R", "%T", "%U", "%V",
279 NULL
280};
281
282const char *const __man_reserved[] = {
283 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
284 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
285 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
286 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
287 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
288 NULL
289};
290
a4c7eb57
SW
291/* Array of injected predefined strings. */
292#define PREDEFS_MAX 38
293static const struct predef predefs[PREDEFS_MAX] = {
294#include "predefs.in"
295};
296
36342e81 297/* See roffhash_find() */
80387638
SW
298#define ROFF_HASH(p) (p[0] - ASCII_LO)
299
f88b6c16
FF
300static int roffit_lines; /* number of lines to delay */
301static char *roffit_macro; /* nil-terminated macro line */
302
80387638 303static void
36342e81 304roffhash_init(void)
80387638
SW
305{
306 struct roffmac *n;
307 int buc, i;
308
309 for (i = 0; i < (int)ROFF_USERDEF; i++) {
310 assert(roffs[i].name[0] >= ASCII_LO);
311 assert(roffs[i].name[0] <= ASCII_HI);
312
313 buc = ROFF_HASH(roffs[i].name);
314
315 if (NULL != (n = hash[buc])) {
316 for ( ; n->next; n = n->next)
317 /* Do nothing. */ ;
318 n->next = &roffs[i];
319 } else
320 hash[buc] = &roffs[i];
321 }
322}
323
80387638
SW
324/*
325 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
326 * the nil-terminated string name could be found.
327 */
328static enum rofft
36342e81 329roffhash_find(const char *p, size_t s)
80387638
SW
330{
331 int buc;
332 struct roffmac *n;
333
334 /*
335 * libroff has an extremely simple hashtable, for the time
336 * being, which simply keys on the first character, which must
337 * be printable, then walks a chain. It works well enough until
338 * optimised.
339 */
340
341 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
342 return(ROFF_MAX);
343
344 buc = ROFF_HASH(p);
345
346 if (NULL == (n = hash[buc]))
347 return(ROFF_MAX);
348 for ( ; n; n = n->next)
349 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
350 return((enum rofft)(n - roffs));
351
352 return(ROFF_MAX);
353}
354
355
356/*
357 * Pop the current node off of the stack of roff instructions currently
358 * pending.
359 */
360static void
361roffnode_pop(struct roff *r)
362{
363 struct roffnode *p;
364
365 assert(r->last);
366 p = r->last;
367
80387638
SW
368 r->last = r->last->parent;
369 free(p->name);
370 free(p->end);
371 free(p);
372}
373
374
375/*
376 * Push a roff node onto the instruction stack. This must later be
377 * removed with roffnode_pop().
378 */
379static void
380roffnode_push(struct roff *r, enum rofft tok, const char *name,
381 int line, int col)
382{
383 struct roffnode *p;
384
385 p = mandoc_calloc(1, sizeof(struct roffnode));
386 p->tok = tok;
387 if (name)
388 p->name = mandoc_strdup(name);
389 p->parent = r->last;
390 p->line = line;
391 p->col = col;
392 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
393
394 r->last = p;
395}
396
397
398static void
399roff_free1(struct roff *r)
400{
f88b6c16 401 struct tbl_node *tbl;
60e1e752 402 struct eqn_node *e;
36342e81 403 int i;
80387638 404
f88b6c16
FF
405 while (NULL != (tbl = r->first_tbl)) {
406 r->first_tbl = tbl->next;
407 tbl_free(tbl);
80387638
SW
408 }
409
410 r->first_tbl = r->last_tbl = r->tbl = NULL;
411
60e1e752
SW
412 while (NULL != (e = r->first_eqn)) {
413 r->first_eqn = e->next;
414 eqn_free(e);
415 }
416
417 r->first_eqn = r->last_eqn = r->eqn = NULL;
418
80387638
SW
419 while (r->last)
420 roffnode_pop(r);
421
36342e81
SW
422 roff_freestr(r->strtab);
423 roff_freestr(r->xmbtab);
424
425 r->strtab = r->xmbtab = NULL;
80387638 426
36342e81
SW
427 if (r->xtab)
428 for (i = 0; i < 128; i++)
429 free(r->xtab[i].p);
430
431 free(r->xtab);
432 r->xtab = NULL;
433}
80387638
SW
434
435void
436roff_reset(struct roff *r)
437{
36342e81 438 int i;
80387638
SW
439
440 roff_free1(r);
36342e81 441
f88b6c16 442 r->control = 0;
36342e81
SW
443 memset(&r->regs, 0, sizeof(struct reg) * REG__MAX);
444
445 for (i = 0; i < PREDEFS_MAX; i++)
446 roff_setstr(r, predefs[i].name, predefs[i].str, 0);
80387638
SW
447}
448
449
450void
451roff_free(struct roff *r)
452{
453
454 roff_free1(r);
455 free(r);
456}
457
458
459struct roff *
f88b6c16 460roff_alloc(enum mparset type, struct mparse *parse)
80387638
SW
461{
462 struct roff *r;
a4c7eb57 463 int i;
80387638
SW
464
465 r = mandoc_calloc(1, sizeof(struct roff));
f88b6c16 466 r->parsetype = type;
60e1e752 467 r->parse = parse;
80387638
SW
468 r->rstackpos = -1;
469
36342e81 470 roffhash_init();
a4c7eb57
SW
471
472 for (i = 0; i < PREDEFS_MAX; i++)
473 roff_setstr(r, predefs[i].name, predefs[i].str, 0);
474
80387638
SW
475 return(r);
476}
477
80387638
SW
478/*
479 * Pre-filter each and every line for reserved words (one beginning with
480 * `\*', e.g., `\*(ab'). These must be handled before the actual line
481 * is processed.
36342e81 482 * This also checks the syntax of regular escapes.
80387638 483 */
36342e81 484static enum rofferr
a4c7eb57 485roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
80387638 486{
36342e81 487 enum mandoc_esc esc;
80387638
SW
488 const char *stesc; /* start of an escape sequence ('\\') */
489 const char *stnam; /* start of the name, after "[(*" */
490 const char *cp; /* end of the name, e.g. before ']' */
491 const char *res; /* the string to be substituted */
36342e81 492 int i, maxl, expand_count;
80387638
SW
493 size_t nsz;
494 char *n;
495
36342e81 496 expand_count = 0;
80387638 497
36342e81 498again:
80387638
SW
499 cp = *bufp + pos;
500 while (NULL != (cp = strchr(cp, '\\'))) {
501 stesc = cp++;
502
503 /*
504 * The second character must be an asterisk.
505 * If it isn't, skip it anyway: It is escaped,
506 * so it can't start another escape sequence.
507 */
508
509 if ('\0' == *cp)
36342e81
SW
510 return(ROFF_CONT);
511
512 if ('*' != *cp) {
513 res = cp;
514 esc = mandoc_escape(&cp, NULL, NULL);
515 if (ESCAPE_ERROR != esc)
516 continue;
517 cp = res;
518 mandoc_msg
519 (MANDOCERR_BADESCAPE, r->parse,
520 ln, (int)(stesc - *bufp), NULL);
521 return(ROFF_CONT);
522 }
523
524 cp++;
80387638
SW
525
526 /*
527 * The third character decides the length
528 * of the name of the string.
529 * Save a pointer to the name.
530 */
531
532 switch (*cp) {
533 case ('\0'):
36342e81 534 return(ROFF_CONT);
80387638
SW
535 case ('('):
536 cp++;
537 maxl = 2;
538 break;
539 case ('['):
540 cp++;
541 maxl = 0;
542 break;
543 default:
544 maxl = 1;
545 break;
546 }
547 stnam = cp;
548
549 /* Advance to the end of the name. */
550
551 for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
36342e81
SW
552 if ('\0' == *cp) {
553 mandoc_msg
554 (MANDOCERR_BADESCAPE,
555 r->parse, ln,
556 (int)(stesc - *bufp), NULL);
557 return(ROFF_CONT);
558 }
80387638
SW
559 if (0 == maxl && ']' == *cp)
560 break;
561 }
562
563 /*
564 * Retrieve the replacement string; if it is
565 * undefined, resume searching for escapes.
566 */
567
568 res = roff_getstrn(r, stnam, (size_t)i);
569
570 if (NULL == res) {
36342e81
SW
571 mandoc_msg
572 (MANDOCERR_BADESCAPE, r->parse,
573 ln, (int)(stesc - *bufp), NULL);
a4c7eb57 574 res = "";
80387638
SW
575 }
576
577 /* Replace the escape sequence by the string. */
578
36342e81
SW
579 pos = stesc - *bufp;
580
80387638
SW
581 nsz = *szp + strlen(res) + 1;
582 n = mandoc_malloc(nsz);
583
584 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
585 strlcat(n, res, nsz);
586 strlcat(n, cp + (maxl ? 0 : 1), nsz);
587
588 free(*bufp);
589
590 *bufp = n;
591 *szp = nsz;
80387638 592
36342e81
SW
593 if (EXPAND_LIMIT >= ++expand_count)
594 goto again;
595
596 /* Just leave the string unexpanded. */
597 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
598 return(ROFF_IGN);
599 }
600 return(ROFF_CONT);
80387638
SW
601}
602
36342e81 603/*
f88b6c16
FF
604 * Process text streams:
605 * Convert all breakable hyphens into ASCII_HYPH.
606 * Decrement and spring input line trap.
36342e81
SW
607 */
608static enum rofferr
f88b6c16 609roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
36342e81
SW
610{
611 size_t sz;
612 const char *start;
f88b6c16
FF
613 char *p;
614 int isz;
36342e81
SW
615 enum mandoc_esc esc;
616
f88b6c16 617 start = p = *bufp + pos;
36342e81
SW
618
619 while ('\0' != *p) {
620 sz = strcspn(p, "-\\");
621 p += sz;
622
623 if ('\0' == *p)
624 break;
625
626 if ('\\' == *p) {
627 /* Skip over escapes. */
628 p++;
629 esc = mandoc_escape
630 ((const char **)&p, NULL, NULL);
631 if (ESCAPE_ERROR == esc)
632 break;
633 continue;
634 } else if (p == start) {
635 p++;
636 continue;
637 }
638
639 if (isalpha((unsigned char)p[-1]) &&
640 isalpha((unsigned char)p[1]))
641 *p = ASCII_HYPH;
642 p++;
643 }
644
f88b6c16
FF
645 /* Spring the input line trap. */
646 if (1 == roffit_lines) {
647 isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
648 if (-1 == isz) {
649 perror(NULL);
650 exit((int)MANDOCLEVEL_SYSERR);
651 }
652 free(*bufp);
653 *bufp = p;
654 *szp = isz + 1;
655 *offs = 0;
656 free(roffit_macro);
657 roffit_lines = 0;
658 return(ROFF_REPARSE);
659 } else if (1 < roffit_lines)
660 --roffit_lines;
36342e81
SW
661 return(ROFF_CONT);
662}
80387638
SW
663
664enum rofferr
665roff_parseln(struct roff *r, int ln, char **bufp,
666 size_t *szp, int pos, int *offs)
667{
668 enum rofft t;
669 enum rofferr e;
60e1e752 670 int ppos, ctl;
80387638
SW
671
672 /*
673 * Run the reserved-word filter only if we have some reserved
674 * words to fill in.
675 */
676
36342e81
SW
677 e = roff_res(r, bufp, szp, ln, pos);
678 if (ROFF_IGN == e)
679 return(e);
680 assert(ROFF_CONT == e);
80387638 681
60e1e752 682 ppos = pos;
f88b6c16 683 ctl = roff_getcontrol(r, *bufp, &pos);
60e1e752 684
80387638
SW
685 /*
686 * First, if a scope is open and we're not a macro, pass the
687 * text through the macro's filter. If a scope isn't open and
688 * we're not a macro, just let it through.
60e1e752
SW
689 * Finally, if there's an equation scope open, divert it into it
690 * no matter our state.
80387638
SW
691 */
692
60e1e752 693 if (r->last && ! ctl) {
80387638
SW
694 t = r->last->tok;
695 assert(roffs[t].text);
696 e = (*roffs[t].text)
697 (r, t, bufp, szp, ln, pos, pos, offs);
698 assert(ROFF_IGN == e || ROFF_CONT == e);
60e1e752
SW
699 if (ROFF_CONT != e)
700 return(e);
701 if (r->eqn)
36342e81 702 return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
80387638 703 if (r->tbl)
60e1e752 704 return(tbl_read(r->tbl, ln, *bufp, pos));
f88b6c16 705 return(roff_parsetext(bufp, szp, pos, offs));
60e1e752
SW
706 } else if ( ! ctl) {
707 if (r->eqn)
36342e81 708 return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
60e1e752
SW
709 if (r->tbl)
710 return(tbl_read(r->tbl, ln, *bufp, pos));
f88b6c16 711 return(roff_parsetext(bufp, szp, pos, offs));
60e1e752 712 } else if (r->eqn)
36342e81 713 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
80387638
SW
714
715 /*
716 * If a scope is open, go to the child handler for that macro,
717 * as it may want to preprocess before doing anything with it.
60e1e752 718 * Don't do so if an equation is open.
80387638
SW
719 */
720
721 if (r->last) {
722 t = r->last->tok;
723 assert(roffs[t].sub);
724 return((*roffs[t].sub)
725 (r, t, bufp, szp,
60e1e752 726 ln, ppos, pos, offs));
80387638
SW
727 }
728
729 /*
730 * Lastly, as we've no scope open, try to look up and execute
731 * the new macro. If no macro is found, simply return and let
732 * the compilers handle it.
733 */
734
80387638
SW
735 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
736 return(ROFF_CONT);
737
738 assert(roffs[t].proc);
739 return((*roffs[t].proc)
740 (r, t, bufp, szp,
741 ln, ppos, pos, offs));
742}
743
744
745void
746roff_endparse(struct roff *r)
747{
748
749 if (r->last)
60e1e752 750 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
80387638
SW
751 r->last->line, r->last->col, NULL);
752
60e1e752
SW
753 if (r->eqn) {
754 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
36342e81
SW
755 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
756 eqn_end(&r->eqn);
60e1e752
SW
757 }
758
80387638 759 if (r->tbl) {
60e1e752 760 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
80387638 761 r->tbl->line, r->tbl->pos, NULL);
36342e81 762 tbl_end(&r->tbl);
80387638
SW
763 }
764}
765
80387638
SW
766/*
767 * Parse a roff node's type from the input buffer. This must be in the
768 * form of ".foo xxx" in the usual way.
769 */
770static enum rofft
771roff_parse(struct roff *r, const char *buf, int *pos)
772{
773 const char *mac;
774 size_t maclen;
775 enum rofft t;
776
36342e81
SW
777 if ('\0' == buf[*pos] || '"' == buf[*pos] ||
778 '\t' == buf[*pos] || ' ' == buf[*pos])
80387638
SW
779 return(ROFF_MAX);
780
36342e81
SW
781 /*
782 * We stop the macro parse at an escape, tab, space, or nil.
783 * However, `\}' is also a valid macro, so make sure we don't
784 * clobber it by seeing the `\' as the end of token.
785 */
786
80387638 787 mac = buf + *pos;
36342e81 788 maclen = strcspn(mac + 1, " \\\t\0") + 1;
80387638
SW
789
790 t = (r->current_string = roff_getstrn(r, mac, maclen))
36342e81 791 ? ROFF_USERDEF : roffhash_find(mac, maclen);
80387638 792
60e1e752
SW
793 *pos += (int)maclen;
794
80387638
SW
795 while (buf[*pos] && ' ' == buf[*pos])
796 (*pos)++;
797
798 return(t);
799}
800
80387638
SW
801/* ARGSUSED */
802static enum rofferr
803roff_cblock(ROFF_ARGS)
804{
805
806 /*
807 * A block-close `..' should only be invoked as a child of an
808 * ignore macro, otherwise raise a warning and just ignore it.
809 */
810
811 if (NULL == r->last) {
60e1e752 812 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
80387638
SW
813 return(ROFF_IGN);
814 }
815
816 switch (r->last->tok) {
817 case (ROFF_am):
818 /* FALLTHROUGH */
819 case (ROFF_ami):
820 /* FALLTHROUGH */
821 case (ROFF_am1):
822 /* FALLTHROUGH */
823 case (ROFF_de):
824 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
825 /* FALLTHROUGH */
826 case (ROFF_dei):
827 /* FALLTHROUGH */
828 case (ROFF_ig):
829 break;
830 default:
60e1e752 831 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
80387638
SW
832 return(ROFF_IGN);
833 }
834
835 if ((*bufp)[pos])
60e1e752 836 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
80387638
SW
837
838 roffnode_pop(r);
839 roffnode_cleanscope(r);
840 return(ROFF_IGN);
841
842}
843
844
845static void
846roffnode_cleanscope(struct roff *r)
847{
848
849 while (r->last) {
f88b6c16 850 if (--r->last->endspan != 0)
80387638
SW
851 break;
852 roffnode_pop(r);
853 }
854}
855
856
857/* ARGSUSED */
858static enum rofferr
859roff_ccond(ROFF_ARGS)
860{
861
862 if (NULL == r->last) {
60e1e752 863 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
80387638
SW
864 return(ROFF_IGN);
865 }
866
867 switch (r->last->tok) {
868 case (ROFF_el):
869 /* FALLTHROUGH */
870 case (ROFF_ie):
871 /* FALLTHROUGH */
872 case (ROFF_if):
873 break;
874 default:
60e1e752 875 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
80387638
SW
876 return(ROFF_IGN);
877 }
878
879 if (r->last->endspan > -1) {
60e1e752 880 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
80387638
SW
881 return(ROFF_IGN);
882 }
883
884 if ((*bufp)[pos])
60e1e752 885 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
80387638
SW
886
887 roffnode_pop(r);
888 roffnode_cleanscope(r);
889 return(ROFF_IGN);
890}
891
892
893/* ARGSUSED */
894static enum rofferr
895roff_block(ROFF_ARGS)
896{
897 int sv;
898 size_t sz;
899 char *name;
900
901 name = NULL;
902
903 if (ROFF_ig != tok) {
904 if ('\0' == (*bufp)[pos]) {
60e1e752 905 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
80387638
SW
906 return(ROFF_IGN);
907 }
908
909 /*
910 * Re-write `de1', since we don't really care about
911 * groff's strange compatibility mode, into `de'.
912 */
913
914 if (ROFF_de1 == tok)
915 tok = ROFF_de;
916 if (ROFF_de == tok)
917 name = *bufp + pos;
918 else
60e1e752 919 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
80387638
SW
920 roffs[tok].name);
921
a4c7eb57 922 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
80387638
SW
923 pos++;
924
a4c7eb57 925 while (isspace((unsigned char)(*bufp)[pos]))
80387638
SW
926 (*bufp)[pos++] = '\0';
927 }
928
929 roffnode_push(r, tok, name, ln, ppos);
930
931 /*
932 * At the beginning of a `de' macro, clear the existing string
933 * with the same name, if there is one. New content will be
934 * added from roff_block_text() in multiline mode.
935 */
936
937 if (ROFF_de == tok)
938 roff_setstr(r, name, "", 0);
939
940 if ('\0' == (*bufp)[pos])
941 return(ROFF_IGN);
942
943 /* If present, process the custom end-of-line marker. */
944
945 sv = pos;
a4c7eb57 946 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
80387638
SW
947 pos++;
948
949 /*
950 * Note: groff does NOT like escape characters in the input.
951 * Instead of detecting this, we're just going to let it fly and
952 * to hell with it.
953 */
954
955 assert(pos > sv);
956 sz = (size_t)(pos - sv);
957
958 if (1 == sz && '.' == (*bufp)[sv])
959 return(ROFF_IGN);
960
961 r->last->end = mandoc_malloc(sz + 1);
962
963 memcpy(r->last->end, *bufp + sv, sz);
964 r->last->end[(int)sz] = '\0';
965
966 if ((*bufp)[pos])
60e1e752 967 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
80387638
SW
968
969 return(ROFF_IGN);
970}
971
972
973/* ARGSUSED */
974static enum rofferr
975roff_block_sub(ROFF_ARGS)
976{
977 enum rofft t;
978 int i, j;
979
980 /*
981 * First check whether a custom macro exists at this level. If
982 * it does, then check against it. This is some of groff's
983 * stranger behaviours. If we encountered a custom end-scope
984 * tag and that tag also happens to be a "real" macro, then we
985 * need to try interpreting it again as a real macro. If it's
986 * not, then return ignore. Else continue.
987 */
988
989 if (r->last->end) {
60e1e752 990 for (i = pos, j = 0; r->last->end[j]; j++, i++)
80387638
SW
991 if ((*bufp)[i] != r->last->end[j])
992 break;
993
994 if ('\0' == r->last->end[j] &&
995 ('\0' == (*bufp)[i] ||
996 ' ' == (*bufp)[i] ||
997 '\t' == (*bufp)[i])) {
998 roffnode_pop(r);
999 roffnode_cleanscope(r);
1000
60e1e752
SW
1001 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1002 i++;
1003
1004 pos = i;
80387638
SW
1005 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1006 return(ROFF_RERUN);
1007 return(ROFF_IGN);
1008 }
1009 }
1010
1011 /*
1012 * If we have no custom end-query or lookup failed, then try
1013 * pulling it out of the hashtable.
1014 */
1015
a4c7eb57 1016 t = roff_parse(r, *bufp, &pos);
80387638
SW
1017
1018 /*
1019 * Macros other than block-end are only significant
1020 * in `de' blocks; elsewhere, simply throw them away.
1021 */
1022 if (ROFF_cblock != t) {
1023 if (ROFF_de == tok)
1024 roff_setstr(r, r->last->name, *bufp + ppos, 1);
1025 return(ROFF_IGN);
1026 }
1027
1028 assert(roffs[t].proc);
1029 return((*roffs[t].proc)(r, t, bufp, szp,
1030 ln, ppos, pos, offs));
1031}
1032
1033
1034/* ARGSUSED */
1035static enum rofferr
1036roff_block_text(ROFF_ARGS)
1037{
1038
1039 if (ROFF_de == tok)
1040 roff_setstr(r, r->last->name, *bufp + pos, 1);
1041
1042 return(ROFF_IGN);
1043}
1044
1045
1046/* ARGSUSED */
1047static enum rofferr
1048roff_cond_sub(ROFF_ARGS)
1049{
1050 enum rofft t;
1051 enum roffrule rr;
a4c7eb57 1052 char *ep;
80387638 1053
80387638 1054 rr = r->last->rule;
a4c7eb57 1055 roffnode_cleanscope(r);
f88b6c16 1056 t = roff_parse(r, *bufp, &pos);
80387638 1057
a4c7eb57 1058 /*
f88b6c16
FF
1059 * Fully handle known macros when they are structurally
1060 * required or when the conditional evaluated to true.
80387638
SW
1061 */
1062
f88b6c16
FF
1063 if ((ROFF_MAX != t) &&
1064 (ROFF_ccond == t || ROFFRULE_ALLOW == rr ||
1065 ROFFMAC_STRUCT & roffs[t].flags)) {
1066 assert(roffs[t].proc);
1067 return((*roffs[t].proc)(r, t, bufp, szp,
1068 ln, ppos, pos, offs));
1069 }
36342e81 1070
f88b6c16 1071 /* Always check for the closing delimiter `\}'. */
36342e81 1072
f88b6c16
FF
1073 ep = &(*bufp)[pos];
1074 while (NULL != (ep = strchr(ep, '\\'))) {
1075 if ('}' != *(++ep))
1076 continue;
80387638 1077
f88b6c16
FF
1078 /*
1079 * If we're at the end of line, then just chop
1080 * off the \} and resize the buffer.
1081 * If we aren't, then convert it to spaces.
1082 */
a4c7eb57 1083
f88b6c16
FF
1084 if ('\0' == *(ep + 1)) {
1085 *--ep = '\0';
1086 *szp -= 2;
1087 } else
1088 *(ep - 1) = *ep = ' ';
80387638 1089
f88b6c16
FF
1090 roff_ccond(r, ROFF_ccond, bufp, szp,
1091 ln, pos, pos + 2, offs);
1092 break;
1093 }
1094 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
80387638
SW
1095}
1096
80387638
SW
1097/* ARGSUSED */
1098static enum rofferr
1099roff_cond_text(ROFF_ARGS)
1100{
a4c7eb57 1101 char *ep;
80387638
SW
1102 enum roffrule rr;
1103
1104 rr = r->last->rule;
a4c7eb57 1105 roffnode_cleanscope(r);
80387638 1106
a4c7eb57
SW
1107 ep = &(*bufp)[pos];
1108 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1109 ep++;
1110 if ('}' != *ep)
1111 continue;
1112 *ep = '&';
1113 roff_ccond(r, ROFF_ccond, bufp, szp,
1114 ln, pos, pos + 2, offs);
80387638 1115 }
80387638
SW
1116 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1117}
1118
80387638
SW
1119static enum roffrule
1120roff_evalcond(const char *v, int *pos)
1121{
1122
1123 switch (v[*pos]) {
1124 case ('n'):
1125 (*pos)++;
1126 return(ROFFRULE_ALLOW);
1127 case ('e'):
1128 /* FALLTHROUGH */
1129 case ('o'):
1130 /* FALLTHROUGH */
1131 case ('t'):
1132 (*pos)++;
1133 return(ROFFRULE_DENY);
1134 default:
1135 break;
1136 }
1137
1138 while (v[*pos] && ' ' != v[*pos])
1139 (*pos)++;
1140 return(ROFFRULE_DENY);
1141}
1142
1143/* ARGSUSED */
1144static enum rofferr
1145roff_line_ignore(ROFF_ARGS)
1146{
1147
80387638
SW
1148 return(ROFF_IGN);
1149}
1150
1151/* ARGSUSED */
1152static enum rofferr
1153roff_cond(ROFF_ARGS)
1154{
f88b6c16
FF
1155
1156 roffnode_push(r, tok, NULL, ln, ppos);
80387638 1157
a4c7eb57
SW
1158 /*
1159 * An `.el' has no conditional body: it will consume the value
1160 * of the current rstack entry set in prior `ie' calls or
1161 * defaults to DENY.
1162 *
1163 * If we're not an `el', however, then evaluate the conditional.
1164 */
80387638 1165
f88b6c16 1166 r->last->rule = ROFF_el == tok ?
a4c7eb57
SW
1167 (r->rstackpos < 0 ?
1168 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1169 roff_evalcond(*bufp, &pos);
80387638 1170
a4c7eb57
SW
1171 /*
1172 * An if-else will put the NEGATION of the current evaluated
1173 * conditional into the stack of rules.
1174 */
1175
80387638 1176 if (ROFF_ie == tok) {
a4c7eb57
SW
1177 if (r->rstackpos == RSTACK_MAX - 1) {
1178 mandoc_msg(MANDOCERR_MEM,
1179 r->parse, ln, ppos, NULL);
1180 return(ROFF_ERR);
1181 }
1182 r->rstack[++r->rstackpos] =
1183 ROFFRULE_DENY == r->last->rule ?
1184 ROFFRULE_ALLOW : ROFFRULE_DENY;
80387638
SW
1185 }
1186
1187 /* If the parent has false as its rule, then so do we. */
1188
1189 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1190 r->last->rule = ROFFRULE_DENY;
1191
1192 /*
f88b6c16
FF
1193 * Determine scope.
1194 * If there is nothing on the line after the conditional,
1195 * not even whitespace, use next-line scope.
80387638
SW
1196 */
1197
f88b6c16
FF
1198 if ('\0' == (*bufp)[pos]) {
1199 r->last->endspan = 2;
1200 goto out;
1201 }
1202
1203 while (' ' == (*bufp)[pos])
1204 pos++;
1205
1206 /* An opening brace requests multiline scope. */
80387638
SW
1207
1208 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1209 r->last->endspan = -1;
1210 pos += 2;
f88b6c16 1211 goto out;
80387638
SW
1212 }
1213
1214 /*
f88b6c16
FF
1215 * Anything else following the conditional causes
1216 * single-line scope. Warn if the scope contains
1217 * nothing but trailing whitespace.
80387638
SW
1218 */
1219
1220 if ('\0' == (*bufp)[pos])
f88b6c16 1221 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
80387638 1222
f88b6c16 1223 r->last->endspan = 1;
80387638 1224
f88b6c16 1225out:
80387638
SW
1226 *offs = pos;
1227 return(ROFF_RERUN);
1228}
1229
1230
1231/* ARGSUSED */
1232static enum rofferr
1233roff_ds(ROFF_ARGS)
1234{
1235 char *name, *string;
1236
1237 /*
1238 * A symbol is named by the first word following the macro
1239 * invocation up to a space. Its value is anything after the
1240 * name's trailing whitespace and optional double-quote. Thus,
1241 *
1242 * [.ds foo "bar " ]
1243 *
1244 * will have `bar " ' as its value.
1245 */
1246
60e1e752
SW
1247 string = *bufp + pos;
1248 name = roff_getname(r, &string, ln, pos);
80387638
SW
1249 if ('\0' == *name)
1250 return(ROFF_IGN);
1251
60e1e752
SW
1252 /* Read past initial double-quote. */
1253 if ('"' == *string)
80387638
SW
1254 string++;
1255
1256 /* The rest is the value. */
1257 roff_setstr(r, name, string, 0);
1258 return(ROFF_IGN);
1259}
1260
36342e81
SW
1261int
1262roff_regisset(const struct roff *r, enum regs reg)
1263{
1264
1265 return(r->regs[(int)reg].set);
1266}
1267
1268unsigned int
1269roff_regget(const struct roff *r, enum regs reg)
1270{
1271
1272 return(r->regs[(int)reg].u);
1273}
1274
1275void
1276roff_regunset(struct roff *r, enum regs reg)
1277{
1278
1279 r->regs[(int)reg].set = 0;
1280}
80387638
SW
1281
1282/* ARGSUSED */
1283static enum rofferr
1284roff_nr(ROFF_ARGS)
1285{
60e1e752
SW
1286 const char *key;
1287 char *val;
a4c7eb57 1288 int iv;
80387638 1289
60e1e752
SW
1290 val = *bufp + pos;
1291 key = roff_getname(r, &val, ln, pos);
80387638 1292
80387638 1293 if (0 == strcmp(key, "nS")) {
36342e81
SW
1294 r->regs[(int)REG_nS].set = 1;
1295 if ((iv = mandoc_strntoi(val, strlen(val), 10)) >= 0)
1296 r->regs[(int)REG_nS].u = (unsigned)iv;
a4c7eb57 1297 else
36342e81 1298 r->regs[(int)REG_nS].u = 0u;
80387638
SW
1299 }
1300
1301 return(ROFF_IGN);
1302}
1303
60e1e752
SW
1304/* ARGSUSED */
1305static enum rofferr
1306roff_rm(ROFF_ARGS)
1307{
1308 const char *name;
1309 char *cp;
1310
1311 cp = *bufp + pos;
1312 while ('\0' != *cp) {
1313 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1314 if ('\0' != *name)
1315 roff_setstr(r, name, NULL, 0);
1316 }
1317 return(ROFF_IGN);
1318}
1319
f88b6c16
FF
1320/* ARGSUSED */
1321static enum rofferr
1322roff_it(ROFF_ARGS)
1323{
1324 char *cp;
1325 size_t len;
1326 int iv;
1327
1328 /* Parse the number of lines. */
1329 cp = *bufp + pos;
1330 len = strcspn(cp, " \t");
1331 cp[len] = '\0';
1332 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1333 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1334 ln, ppos, *bufp + 1);
1335 return(ROFF_IGN);
1336 }
1337 cp += len + 1;
1338
1339 /* Arm the input line trap. */
1340 roffit_lines = iv;
1341 roffit_macro = mandoc_strdup(cp);
1342 return(ROFF_IGN);
1343}
1344
1345/* ARGSUSED */
1346static enum rofferr
1347roff_Dd(ROFF_ARGS)
1348{
1349 const char *const *cp;
1350
1351 if (MPARSE_MDOC != r->parsetype)
1352 for (cp = __mdoc_reserved; *cp; cp++)
1353 roff_setstr(r, *cp, NULL, 0);
1354
1355 return(ROFF_CONT);
1356}
1357
1358/* ARGSUSED */
1359static enum rofferr
1360roff_TH(ROFF_ARGS)
1361{
1362 const char *const *cp;
1363
1364 if (MPARSE_MDOC != r->parsetype)
1365 for (cp = __man_reserved; *cp; cp++)
1366 roff_setstr(r, *cp, NULL, 0);
1367
1368 return(ROFF_CONT);
1369}
1370
80387638
SW
1371/* ARGSUSED */
1372static enum rofferr
1373roff_TE(ROFF_ARGS)
1374{
1375
1376 if (NULL == r->tbl)
60e1e752 1377 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
80387638 1378 else
36342e81 1379 tbl_end(&r->tbl);
80387638 1380
80387638
SW
1381 return(ROFF_IGN);
1382}
1383
1384/* ARGSUSED */
1385static enum rofferr
1386roff_T_(ROFF_ARGS)
1387{
1388
1389 if (NULL == r->tbl)
60e1e752 1390 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
80387638
SW
1391 else
1392 tbl_restart(ppos, ln, r->tbl);
1393
1394 return(ROFF_IGN);
1395}
1396
36342e81
SW
1397#if 0
1398static int
1399roff_closeeqn(struct roff *r)
60e1e752 1400{
36342e81
SW
1401
1402 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1403}
1404#endif
1405
1406static void
1407roff_openeqn(struct roff *r, const char *name, int line,
1408 int offs, const char *buf)
1409{
1410 struct eqn_node *e;
1411 int poff;
60e1e752
SW
1412
1413 assert(NULL == r->eqn);
36342e81 1414 e = eqn_alloc(name, offs, line, r->parse);
60e1e752
SW
1415
1416 if (r->last_eqn)
1417 r->last_eqn->next = e;
1418 else
1419 r->first_eqn = r->last_eqn = e;
1420
1421 r->eqn = r->last_eqn = e;
36342e81
SW
1422
1423 if (buf) {
1424 poff = 0;
1425 eqn_read(&r->eqn, line, buf, offs, &poff);
1426 }
1427}
1428
1429/* ARGSUSED */
1430static enum rofferr
1431roff_EQ(ROFF_ARGS)
1432{
1433
1434 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
60e1e752
SW
1435 return(ROFF_IGN);
1436}
1437
1438/* ARGSUSED */
1439static enum rofferr
1440roff_EN(ROFF_ARGS)
1441{
1442
1443 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1444 return(ROFF_IGN);
1445}
1446
80387638
SW
1447/* ARGSUSED */
1448static enum rofferr
1449roff_TS(ROFF_ARGS)
1450{
f88b6c16 1451 struct tbl_node *tbl;
80387638
SW
1452
1453 if (r->tbl) {
60e1e752 1454 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
36342e81 1455 tbl_end(&r->tbl);
80387638
SW
1456 }
1457
f88b6c16 1458 tbl = tbl_alloc(ppos, ln, r->parse);
80387638
SW
1459
1460 if (r->last_tbl)
f88b6c16 1461 r->last_tbl->next = tbl;
80387638 1462 else
f88b6c16
FF
1463 r->first_tbl = r->last_tbl = tbl;
1464
1465 r->tbl = r->last_tbl = tbl;
1466 return(ROFF_IGN);
1467}
1468
1469/* ARGSUSED */
1470static enum rofferr
1471roff_cc(ROFF_ARGS)
1472{
1473 const char *p;
1474
1475 p = *bufp + pos;
1476
1477 if ('\0' == *p || '.' == (r->control = *p++))
1478 r->control = 0;
1479
1480 if ('\0' != *p)
1481 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
80387638 1482
80387638
SW
1483 return(ROFF_IGN);
1484}
1485
36342e81
SW
1486/* ARGSUSED */
1487static enum rofferr
1488roff_tr(ROFF_ARGS)
1489{
1490 const char *p, *first, *second;
1491 size_t fsz, ssz;
1492 enum mandoc_esc esc;
1493
1494 p = *bufp + pos;
1495
1496 if ('\0' == *p) {
1497 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1498 return(ROFF_IGN);
1499 }
1500
1501 while ('\0' != *p) {
1502 fsz = ssz = 1;
1503
1504 first = p++;
1505 if ('\\' == *first) {
1506 esc = mandoc_escape(&p, NULL, NULL);
1507 if (ESCAPE_ERROR == esc) {
1508 mandoc_msg
1509 (MANDOCERR_BADESCAPE, r->parse,
1510 ln, (int)(p - *bufp), NULL);
1511 return(ROFF_IGN);
1512 }
1513 fsz = (size_t)(p - first);
1514 }
1515
1516 second = p++;
1517 if ('\\' == *second) {
1518 esc = mandoc_escape(&p, NULL, NULL);
1519 if (ESCAPE_ERROR == esc) {
1520 mandoc_msg
1521 (MANDOCERR_BADESCAPE, r->parse,
1522 ln, (int)(p - *bufp), NULL);
1523 return(ROFF_IGN);
1524 }
1525 ssz = (size_t)(p - second);
1526 } else if ('\0' == *second) {
1527 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1528 ln, (int)(p - *bufp), NULL);
1529 second = " ";
1530 p--;
1531 }
1532
1533 if (fsz > 1) {
1534 roff_setstrn(&r->xmbtab, first,
1535 fsz, second, ssz, 0);
1536 continue;
1537 }
1538
1539 if (NULL == r->xtab)
1540 r->xtab = mandoc_calloc
1541 (128, sizeof(struct roffstr));
1542
1543 free(r->xtab[(int)*first].p);
1544 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1545 r->xtab[(int)*first].sz = ssz;
1546 }
1547
1548 return(ROFF_IGN);
1549}
1550
80387638
SW
1551/* ARGSUSED */
1552static enum rofferr
1553roff_so(ROFF_ARGS)
1554{
1555 char *name;
1556
60e1e752 1557 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
80387638
SW
1558
1559 /*
1560 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1561 * opening anything that's not in our cwd or anything beneath
1562 * it. Thus, explicitly disallow traversing up the file-system
1563 * or using absolute paths.
1564 */
1565
1566 name = *bufp + pos;
1567 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
60e1e752 1568 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
80387638
SW
1569 return(ROFF_ERR);
1570 }
1571
1572 *offs = pos;
1573 return(ROFF_SO);
1574}
1575
1576/* ARGSUSED */
1577static enum rofferr
1578roff_userdef(ROFF_ARGS)
1579{
1580 const char *arg[9];
1581 char *cp, *n1, *n2;
1582 int i;
1583
1584 /*
1585 * Collect pointers to macro argument strings
1586 * and null-terminate them.
1587 */
1588 cp = *bufp + pos;
1589 for (i = 0; i < 9; i++)
1590 arg[i] = '\0' == *cp ? "" :
60e1e752 1591 mandoc_getarg(r->parse, &cp, ln, &pos);
80387638
SW
1592
1593 /*
1594 * Expand macro arguments.
1595 */
1596 *szp = 0;
1597 n1 = cp = mandoc_strdup(r->current_string);
1598 while (NULL != (cp = strstr(cp, "\\$"))) {
1599 i = cp[2] - '1';
1600 if (0 > i || 8 < i) {
1601 /* Not an argument invocation. */
1602 cp += 2;
1603 continue;
1604 }
1605
1606 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1607 n2 = mandoc_malloc(*szp);
1608
1609 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1610 strlcat(n2, arg[i], *szp);
1611 strlcat(n2, cp + 3, *szp);
1612
1613 cp = n2 + (cp - n1);
1614 free(n1);
1615 n1 = n2;
1616 }
1617
1618 /*
1619 * Replace the macro invocation
1620 * by the expanded macro.
1621 */
1622 free(*bufp);
1623 *bufp = n1;
1624 if (0 == *szp)
1625 *szp = strlen(*bufp) + 1;
1626
1627 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1628 ROFF_REPARSE : ROFF_APPEND);
1629}
1630
60e1e752
SW
1631static char *
1632roff_getname(struct roff *r, char **cpp, int ln, int pos)
1633{
1634 char *name, *cp;
1635
1636 name = *cpp;
1637 if ('\0' == *name)
1638 return(name);
1639
1640 /* Read until end of name. */
1641 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1642 if ('\\' != *cp)
1643 continue;
1644 cp++;
1645 if ('\\' == *cp)
1646 continue;
1647 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1648 *cp = '\0';
1649 name = cp;
1650 }
1651
1652 /* Nil-terminate name. */
1653 if ('\0' != *cp)
1654 *(cp++) = '\0';
1655
1656 /* Read past spaces. */
1657 while (' ' == *cp)
1658 cp++;
1659
1660 *cpp = cp;
1661 return(name);
1662}
1663
80387638
SW
1664/*
1665 * Store *string into the user-defined string called *name.
1666 * In multiline mode, append to an existing entry and append '\n';
1667 * else replace the existing entry, if there is one.
1668 * To clear an existing entry, call with (*r, *name, NULL, 0).
1669 */
1670static void
1671roff_setstr(struct roff *r, const char *name, const char *string,
1672 int multiline)
1673{
36342e81
SW
1674
1675 roff_setstrn(&r->strtab, name, strlen(name), string,
1676 string ? strlen(string) : 0, multiline);
1677}
1678
1679static void
1680roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1681 const char *string, size_t stringsz, int multiline)
1682{
1683 struct roffkv *n;
1684 char *c;
1685 int i;
1686 size_t oldch, newch;
80387638
SW
1687
1688 /* Search for an existing string with the same name. */
36342e81
SW
1689 n = *r;
1690
1691 while (n && strcmp(name, n->key.p))
80387638
SW
1692 n = n->next;
1693
1694 if (NULL == n) {
1695 /* Create a new string table entry. */
36342e81
SW
1696 n = mandoc_malloc(sizeof(struct roffkv));
1697 n->key.p = mandoc_strndup(name, namesz);
1698 n->key.sz = namesz;
1699 n->val.p = NULL;
1700 n->val.sz = 0;
1701 n->next = *r;
1702 *r = n;
80387638
SW
1703 } else if (0 == multiline) {
1704 /* In multiline mode, append; else replace. */
36342e81
SW
1705 free(n->val.p);
1706 n->val.p = NULL;
1707 n->val.sz = 0;
80387638
SW
1708 }
1709
1710 if (NULL == string)
1711 return;
1712
1713 /*
1714 * One additional byte for the '\n' in multiline mode,
1715 * and one for the terminating '\0'.
1716 */
36342e81
SW
1717 newch = stringsz + (multiline ? 2u : 1u);
1718
1719 if (NULL == n->val.p) {
1720 n->val.p = mandoc_malloc(newch);
1721 *n->val.p = '\0';
80387638
SW
1722 oldch = 0;
1723 } else {
36342e81
SW
1724 oldch = n->val.sz;
1725 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
80387638
SW
1726 }
1727
1728 /* Skip existing content in the destination buffer. */
36342e81 1729 c = n->val.p + (int)oldch;
80387638
SW
1730
1731 /* Append new content to the destination buffer. */
36342e81
SW
1732 i = 0;
1733 while (i < (int)stringsz) {
80387638
SW
1734 /*
1735 * Rudimentary roff copy mode:
1736 * Handle escaped backslashes.
1737 */
36342e81
SW
1738 if ('\\' == string[i] && '\\' == string[i + 1])
1739 i++;
1740 *c++ = string[i++];
80387638
SW
1741 }
1742
1743 /* Append terminating bytes. */
1744 if (multiline)
1745 *c++ = '\n';
36342e81 1746
80387638 1747 *c = '\0';
36342e81 1748 n->val.sz = (int)(c - n->val.p);
80387638
SW
1749}
1750
80387638
SW
1751static const char *
1752roff_getstrn(const struct roff *r, const char *name, size_t len)
1753{
36342e81 1754 const struct roffkv *n;
80387638 1755
36342e81
SW
1756 for (n = r->strtab; n; n = n->next)
1757 if (0 == strncmp(name, n->key.p, len) &&
1758 '\0' == n->key.p[(int)len])
1759 return(n->val.p);
80387638 1760
36342e81 1761 return(NULL);
80387638
SW
1762}
1763
80387638 1764static void
36342e81 1765roff_freestr(struct roffkv *r)
80387638 1766{
36342e81 1767 struct roffkv *n, *nn;
80387638 1768
36342e81
SW
1769 for (n = r; n; n = nn) {
1770 free(n->key.p);
1771 free(n->val.p);
80387638
SW
1772 nn = n->next;
1773 free(n);
1774 }
80387638
SW
1775}
1776
1777const struct tbl_span *
1778roff_span(const struct roff *r)
1779{
1780
1781 return(r->tbl ? tbl_span(r->tbl) : NULL);
1782}
60e1e752
SW
1783
1784const struct eqn *
1785roff_eqn(const struct roff *r)
1786{
1787
1788 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1789}
36342e81
SW
1790
1791/*
1792 * Duplicate an input string, making the appropriate character
1793 * conversations (as stipulated by `tr') along the way.
1794 * Returns a heap-allocated string with all the replacements made.
1795 */
1796char *
1797roff_strdup(const struct roff *r, const char *p)
1798{
1799 const struct roffkv *cp;
1800 char *res;
1801 const char *pp;
1802 size_t ssz, sz;
1803 enum mandoc_esc esc;
1804
1805 if (NULL == r->xmbtab && NULL == r->xtab)
1806 return(mandoc_strdup(p));
1807 else if ('\0' == *p)
1808 return(mandoc_strdup(""));
1809
1810 /*
1811 * Step through each character looking for term matches
1812 * (remember that a `tr' can be invoked with an escape, which is
1813 * a glyph but the escape is multi-character).
1814 * We only do this if the character hash has been initialised
1815 * and the string is >0 length.
1816 */
1817
1818 res = NULL;
1819 ssz = 0;
1820
1821 while ('\0' != *p) {
1822 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
1823 sz = r->xtab[(int)*p].sz;
1824 res = mandoc_realloc(res, ssz + sz + 1);
1825 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
1826 ssz += sz;
1827 p++;
1828 continue;
1829 } else if ('\\' != *p) {
1830 res = mandoc_realloc(res, ssz + 2);
1831 res[ssz++] = *p++;
1832 continue;
1833 }
1834
1835 /* Search for term matches. */
1836 for (cp = r->xmbtab; cp; cp = cp->next)
1837 if (0 == strncmp(p, cp->key.p, cp->key.sz))
1838 break;
1839
1840 if (NULL != cp) {
1841 /*
1842 * A match has been found.
1843 * Append the match to the array and move
1844 * forward by its keysize.
1845 */
1846 res = mandoc_realloc
1847 (res, ssz + cp->val.sz + 1);
1848 memcpy(res + ssz, cp->val.p, cp->val.sz);
1849 ssz += cp->val.sz;
1850 p += (int)cp->key.sz;
1851 continue;
1852 }
1853
1854 /*
1855 * Handle escapes carefully: we need to copy
1856 * over just the escape itself, or else we might
1857 * do replacements within the escape itself.
1858 * Make sure to pass along the bogus string.
1859 */
1860 pp = p++;
1861 esc = mandoc_escape(&p, NULL, NULL);
1862 if (ESCAPE_ERROR == esc) {
1863 sz = strlen(pp);
1864 res = mandoc_realloc(res, ssz + sz + 1);
1865 memcpy(res + ssz, pp, sz);
1866 break;
1867 }
1868 /*
1869 * We bail out on bad escapes.
1870 * No need to warn: we already did so when
1871 * roff_res() was called.
1872 */
1873 sz = (int)(p - pp);
1874 res = mandoc_realloc(res, ssz + sz + 1);
1875 memcpy(res + ssz, pp, sz);
1876 ssz += sz;
1877 }
1878
1879 res[(int)ssz] = '\0';
1880 return(res);
1881}
f88b6c16
FF
1882
1883/*
1884 * Find out whether a line is a macro line or not.
1885 * If it is, adjust the current position and return one; if it isn't,
1886 * return zero and don't change the current position.
1887 * If the control character has been set with `.cc', then let that grain
1888 * precedence.
1889 * This is slighly contrary to groff, where using the non-breaking
1890 * control character when `cc' has been invoked will cause the
1891 * non-breaking macro contents to be printed verbatim.
1892 */
1893int
1894roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
1895{
1896 int pos;
1897
1898 pos = *ppos;
1899
1900 if (0 != r->control && cp[pos] == r->control)
1901 pos++;
1902 else if (0 != r->control)
1903 return(0);
1904 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
1905 pos += 2;
1906 else if ('.' == cp[pos] || '\'' == cp[pos])
1907 pos++;
1908 else
1909 return(0);
1910
1911 while (' ' == cp[pos] || '\t' == cp[pos])
1912 pos++;
1913
1914 *ppos = pos;
1915 return(1);
1916}