Import mdocml-1.13.1
[dragonfly.git] / contrib / mdocml / mdoc.c
CommitLineData
070c62a6 1/* $Id: mdoc.c,v 1.223 2014/08/06 15:09:05 schwarze Exp $ */
80387638 2/*
60e1e752 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
070c62a6 4 * Copyright (c) 2010, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
80387638
SW
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#ifdef HAVE_CONFIG_H
19#include "config.h"
20#endif
21
22#include <sys/types.h>
23
24#include <assert.h>
070c62a6 25#include <ctype.h>
80387638
SW
26#include <stdarg.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <time.h>
31
60e1e752 32#include "mdoc.h"
80387638 33#include "mandoc.h"
070c62a6 34#include "mandoc_aux.h"
80387638
SW
35#include "libmdoc.h"
36#include "libmandoc.h"
37
070c62a6 38const char *const __mdoc_macronames[MDOC_MAX + 1] = {
80387638
SW
39 "Ap", "Dd", "Dt", "Os",
40 "Sh", "Ss", "Pp", "D1",
41 "Dl", "Bd", "Ed", "Bl",
42 "El", "It", "Ad", "An",
43 "Ar", "Cd", "Cm", "Dv",
44 "Er", "Ev", "Ex", "Fa",
45 "Fd", "Fl", "Fn", "Ft",
46 "Ic", "In", "Li", "Nd",
47 "Nm", "Op", "Ot", "Pa",
48 "Rv", "St", "Va", "Vt",
80387638 49 "Xr", "%A", "%B", "%D",
80387638 50 "%I", "%J", "%N", "%O",
80387638
SW
51 "%P", "%R", "%T", "%V",
52 "Ac", "Ao", "Aq", "At",
53 "Bc", "Bf", "Bo", "Bq",
54 "Bsx", "Bx", "Db", "Dc",
55 "Do", "Dq", "Ec", "Ef",
56 "Em", "Eo", "Fx", "Ms",
57 "No", "Ns", "Nx", "Ox",
58 "Pc", "Pf", "Po", "Pq",
59 "Qc", "Ql", "Qo", "Qq",
60 "Re", "Rs", "Sc", "So",
61 "Sq", "Sm", "Sx", "Sy",
62 "Tn", "Ux", "Xc", "Xo",
63 "Fo", "Fc", "Oo", "Oc",
64 "Bk", "Ek", "Bt", "Hf",
65 "Fr", "Ud", "Lb", "Lp",
66 "Lk", "Mt", "Brq", "Bro",
80387638 67 "Brc", "%C", "Es", "En",
80387638 68 "Dx", "%Q", "br", "sp",
070c62a6 69 "%U", "Ta", "ll", "text",
80387638
SW
70 };
71
070c62a6 72const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
80387638 73 "split", "nosplit", "ragged",
070c62a6
FF
74 "unfilled", "literal", "file",
75 "offset", "bullet", "dash",
76 "hyphen", "item", "enum",
77 "tag", "diag", "hang",
78 "ohang", "inset", "column",
79 "width", "compact", "std",
80387638
SW
80 "filled", "words", "emphasis",
81 "symbolic", "nested", "centered"
82 };
83
84const char * const *mdoc_macronames = __mdoc_macronames;
85const char * const *mdoc_argnames = __mdoc_argnames;
86
87static void mdoc_node_free(struct mdoc_node *);
070c62a6 88static void mdoc_node_unlink(struct mdoc *,
80387638
SW
89 struct mdoc_node *);
90static void mdoc_free1(struct mdoc *);
91static void mdoc_alloc1(struct mdoc *);
070c62a6 92static struct mdoc_node *node_alloc(struct mdoc *, int, int,
80387638 93 enum mdoct, enum mdoc_type);
070c62a6 94static int node_append(struct mdoc *,
80387638 95 struct mdoc_node *);
36342e81
SW
96#if 0
97static int mdoc_preptext(struct mdoc *, int, char *, int);
98#endif
80387638
SW
99static int mdoc_ptext(struct mdoc *, int, char *, int);
100static int mdoc_pmacro(struct mdoc *, int, char *, int);
80387638 101
070c62a6 102
80387638 103const struct mdoc_node *
f88b6c16 104mdoc_node(const struct mdoc *mdoc)
80387638
SW
105{
106
f88b6c16 107 return(mdoc->first);
80387638
SW
108}
109
80387638 110const struct mdoc_meta *
f88b6c16 111mdoc_meta(const struct mdoc *mdoc)
80387638
SW
112{
113
f88b6c16 114 return(&mdoc->meta);
80387638
SW
115}
116
80387638
SW
117/*
118 * Frees volatile resources (parse tree, meta-data, fields).
119 */
120static void
121mdoc_free1(struct mdoc *mdoc)
122{
123
124 if (mdoc->first)
125 mdoc_node_delete(mdoc, mdoc->first);
070c62a6
FF
126 free(mdoc->meta.msec);
127 free(mdoc->meta.vol);
128 free(mdoc->meta.arch);
129 free(mdoc->meta.date);
130 free(mdoc->meta.title);
131 free(mdoc->meta.os);
132 free(mdoc->meta.name);
80387638
SW
133}
134
80387638
SW
135/*
136 * Allocate all volatile resources (parse tree, meta-data, fields).
137 */
138static void
139mdoc_alloc1(struct mdoc *mdoc)
140{
141
142 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta));
143 mdoc->flags = 0;
144 mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
145 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node));
146 mdoc->first = mdoc->last;
147 mdoc->last->type = MDOC_ROOT;
36342e81 148 mdoc->last->tok = MDOC_MAX;
80387638
SW
149 mdoc->next = MDOC_NEXT_CHILD;
150}
151
80387638
SW
152/*
153 * Free up volatile resources (see mdoc_free1()) then re-initialises the
154 * data with mdoc_alloc1(). After invocation, parse data has been reset
155 * and the parser is ready for re-invocation on a new tree; however,
156 * cross-parse non-volatile data is kept intact.
157 */
158void
159mdoc_reset(struct mdoc *mdoc)
160{
161
162 mdoc_free1(mdoc);
163 mdoc_alloc1(mdoc);
164}
165
80387638
SW
166/*
167 * Completely free up all volatile and non-volatile parse resources.
168 * After invocation, the pointer is no longer usable.
169 */
170void
171mdoc_free(struct mdoc *mdoc)
172{
173
174 mdoc_free1(mdoc);
175 free(mdoc);
176}
177
80387638 178/*
070c62a6 179 * Allocate volatile and non-volatile parse resources.
80387638
SW
180 */
181struct mdoc *
070c62a6
FF
182mdoc_alloc(struct roff *roff, struct mparse *parse,
183 const char *defos, int quick)
80387638
SW
184{
185 struct mdoc *p;
186
187 p = mandoc_calloc(1, sizeof(struct mdoc));
188
60e1e752 189 p->parse = parse;
f88b6c16 190 p->defos = defos;
070c62a6 191 p->quick = quick;
36342e81 192 p->roff = roff;
80387638
SW
193
194 mdoc_hash_init();
195 mdoc_alloc1(p);
196 return(p);
197}
198
80387638 199int
f88b6c16 200mdoc_endparse(struct mdoc *mdoc)
80387638
SW
201{
202
070c62a6 203 return(mdoc_macroend(mdoc));
80387638
SW
204}
205
60e1e752 206int
f88b6c16 207mdoc_addeqn(struct mdoc *mdoc, const struct eqn *ep)
60e1e752
SW
208{
209 struct mdoc_node *n;
210
f88b6c16 211 n = node_alloc(mdoc, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN);
60e1e752
SW
212 n->eqn = ep;
213
f88b6c16 214 if ( ! node_append(mdoc, n))
60e1e752
SW
215 return(0);
216
f88b6c16 217 mdoc->next = MDOC_NEXT_SIBLING;
60e1e752
SW
218 return(1);
219}
220
80387638 221int
f88b6c16 222mdoc_addspan(struct mdoc *mdoc, const struct tbl_span *sp)
80387638 223{
60e1e752 224 struct mdoc_node *n;
80387638 225
f88b6c16 226 n = node_alloc(mdoc, sp->line, 0, MDOC_MAX, MDOC_TBL);
60e1e752
SW
227 n->span = sp;
228
f88b6c16 229 if ( ! node_append(mdoc, n))
60e1e752
SW
230 return(0);
231
f88b6c16 232 mdoc->next = MDOC_NEXT_SIBLING;
60e1e752 233 return(1);
80387638
SW
234}
235
80387638
SW
236/*
237 * Main parse routine. Parses a single line -- really just hands off to
238 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
239 */
240int
f88b6c16 241mdoc_parseln(struct mdoc *mdoc, int ln, char *buf, int offs)
80387638
SW
242{
243
f88b6c16 244 mdoc->flags |= MDOC_NEWLINE;
80387638
SW
245
246 /*
247 * Let the roff nS register switch SYNOPSIS mode early,
248 * such that the parser knows at all times
249 * whether this mode is on or off.
250 * Note that this mode is also switched by the Sh macro.
251 */
7888c61d
FF
252 if (roff_getreg(mdoc->roff, "nS"))
253 mdoc->flags |= MDOC_SYNOPSIS;
254 else
255 mdoc->flags &= ~MDOC_SYNOPSIS;
80387638 256
f88b6c16 257 return(roff_getcontrol(mdoc->roff, buf, &offs) ?
070c62a6
FF
258 mdoc_pmacro(mdoc, ln, buf, offs) :
259 mdoc_ptext(mdoc, ln, buf, offs));
80387638
SW
260}
261
80387638
SW
262int
263mdoc_macro(MACRO_PROT_ARGS)
264{
265 assert(tok < MDOC_MAX);
266
070c62a6
FF
267 if (mdoc->flags & MDOC_PBODY) {
268 if (tok == MDOC_Dt) {
269 mandoc_vmsg(MANDOCERR_DT_LATE,
270 mdoc->parse, line, ppos,
271 "Dt %s", buf + *pos);
272 return(1);
273 }
274 } else if ( ! (mdoc_macros[tok].flags & MDOC_PROLOGUE)) {
275 if (mdoc->meta.title == NULL) {
276 mandoc_vmsg(MANDOCERR_DT_NOTITLE,
277 mdoc->parse, line, ppos, "%s %s",
278 mdoc_macronames[tok], buf + *pos);
279 mdoc->meta.title = mandoc_strdup("UNTITLED");
280 }
f88b6c16
FF
281 if (NULL == mdoc->meta.vol)
282 mdoc->meta.vol = mandoc_strdup("LOCAL");
f88b6c16 283 mdoc->flags |= MDOC_PBODY;
80387638
SW
284 }
285
f88b6c16 286 return((*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf));
80387638
SW
287}
288
289
290static int
291node_append(struct mdoc *mdoc, struct mdoc_node *p)
292{
293
294 assert(mdoc->last);
295 assert(mdoc->first);
296 assert(MDOC_ROOT != p->type);
297
298 switch (mdoc->next) {
070c62a6 299 case MDOC_NEXT_SIBLING:
80387638
SW
300 mdoc->last->next = p;
301 p->prev = mdoc->last;
302 p->parent = mdoc->last->parent;
303 break;
070c62a6 304 case MDOC_NEXT_CHILD:
80387638
SW
305 mdoc->last->child = p;
306 p->parent = mdoc->last;
307 break;
308 default:
309 abort();
310 /* NOTREACHED */
311 }
312
313 p->parent->nchild++;
314
315 /*
316 * Copy over the normalised-data pointer of our parent. Not
317 * everybody has one, but copying a null pointer is fine.
318 */
319
320 switch (p->type) {
070c62a6 321 case MDOC_BODY:
f88b6c16
FF
322 if (ENDBODY_NOT != p->end)
323 break;
80387638 324 /* FALLTHROUGH */
070c62a6 325 case MDOC_TAIL:
80387638 326 /* FALLTHROUGH */
070c62a6 327 case MDOC_HEAD:
80387638
SW
328 p->norm = p->parent->norm;
329 break;
330 default:
331 break;
332 }
333
334 if ( ! mdoc_valid_pre(mdoc, p))
335 return(0);
336
337 switch (p->type) {
070c62a6 338 case MDOC_HEAD:
80387638
SW
339 assert(MDOC_BLOCK == p->parent->type);
340 p->parent->head = p;
341 break;
070c62a6 342 case MDOC_TAIL:
80387638
SW
343 assert(MDOC_BLOCK == p->parent->type);
344 p->parent->tail = p;
345 break;
070c62a6 346 case MDOC_BODY:
80387638
SW
347 if (p->end)
348 break;
349 assert(MDOC_BLOCK == p->parent->type);
350 p->parent->body = p;
351 break;
352 default:
353 break;
354 }
355
356 mdoc->last = p;
357
358 switch (p->type) {
070c62a6 359 case MDOC_TBL:
80387638 360 /* FALLTHROUGH */
070c62a6 361 case MDOC_TEXT:
80387638
SW
362 if ( ! mdoc_valid_post(mdoc))
363 return(0);
364 break;
365 default:
366 break;
367 }
368
369 return(1);
370}
371
80387638 372static struct mdoc_node *
070c62a6 373node_alloc(struct mdoc *mdoc, int line, int pos,
80387638
SW
374 enum mdoct tok, enum mdoc_type type)
375{
376 struct mdoc_node *p;
377
378 p = mandoc_calloc(1, sizeof(struct mdoc_node));
f88b6c16 379 p->sec = mdoc->lastsec;
80387638
SW
380 p->line = line;
381 p->pos = pos;
7888c61d 382 p->lastline = line;
80387638
SW
383 p->tok = tok;
384 p->type = type;
385
386 /* Flag analysis. */
387
f88b6c16 388 if (MDOC_SYNOPSIS & mdoc->flags)
80387638
SW
389 p->flags |= MDOC_SYNPRETTY;
390 else
391 p->flags &= ~MDOC_SYNPRETTY;
f88b6c16 392 if (MDOC_NEWLINE & mdoc->flags)
80387638 393 p->flags |= MDOC_LINE;
f88b6c16 394 mdoc->flags &= ~MDOC_NEWLINE;
80387638
SW
395
396 return(p);
397}
398
80387638 399int
f88b6c16 400mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok)
80387638
SW
401{
402 struct mdoc_node *p;
403
f88b6c16
FF
404 p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL);
405 if ( ! node_append(mdoc, p))
80387638 406 return(0);
f88b6c16 407 mdoc->next = MDOC_NEXT_CHILD;
80387638
SW
408 return(1);
409}
410
80387638 411int
f88b6c16 412mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok)
80387638
SW
413{
414 struct mdoc_node *p;
415
f88b6c16
FF
416 assert(mdoc->first);
417 assert(mdoc->last);
80387638 418
f88b6c16
FF
419 p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD);
420 if ( ! node_append(mdoc, p))
80387638 421 return(0);
f88b6c16 422 mdoc->next = MDOC_NEXT_CHILD;
80387638
SW
423 return(1);
424}
425
80387638 426int
f88b6c16 427mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok)
80387638
SW
428{
429 struct mdoc_node *p;
430
f88b6c16
FF
431 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
432 if ( ! node_append(mdoc, p))
80387638 433 return(0);
f88b6c16 434 mdoc->next = MDOC_NEXT_CHILD;
80387638
SW
435 return(1);
436}
437
80387638 438int
f88b6c16 439mdoc_endbody_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok,
80387638
SW
440 struct mdoc_node *body, enum mdoc_endbody end)
441{
442 struct mdoc_node *p;
443
f88b6c16 444 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
80387638 445 p->pending = body;
f88b6c16 446 p->norm = body->norm;
80387638 447 p->end = end;
f88b6c16 448 if ( ! node_append(mdoc, p))
80387638 449 return(0);
f88b6c16 450 mdoc->next = MDOC_NEXT_SIBLING;
80387638
SW
451 return(1);
452}
453
80387638 454int
070c62a6 455mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
80387638
SW
456 enum mdoct tok, struct mdoc_arg *args)
457{
458 struct mdoc_node *p;
459
f88b6c16 460 p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK);
80387638
SW
461 p->args = args;
462 if (p->args)
463 (args->refcnt)++;
464
465 switch (tok) {
070c62a6
FF
466 case MDOC_Bd:
467 /* FALLTHROUGH */
468 case MDOC_Bf:
80387638 469 /* FALLTHROUGH */
070c62a6 470 case MDOC_Bl:
80387638 471 /* FALLTHROUGH */
070c62a6 472 case MDOC_En:
80387638 473 /* FALLTHROUGH */
070c62a6 474 case MDOC_Rs:
80387638
SW
475 p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
476 break;
477 default:
478 break;
479 }
480
f88b6c16 481 if ( ! node_append(mdoc, p))
80387638 482 return(0);
f88b6c16 483 mdoc->next = MDOC_NEXT_CHILD;
80387638
SW
484 return(1);
485}
486
80387638 487int
070c62a6 488mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
80387638
SW
489 enum mdoct tok, struct mdoc_arg *args)
490{
491 struct mdoc_node *p;
492
f88b6c16 493 p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM);
80387638
SW
494 p->args = args;
495 if (p->args)
496 (args->refcnt)++;
497
498 switch (tok) {
070c62a6 499 case MDOC_An:
80387638
SW
500 p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
501 break;
502 default:
503 break;
504 }
505
f88b6c16 506 if ( ! node_append(mdoc, p))
80387638 507 return(0);
f88b6c16 508 mdoc->next = MDOC_NEXT_CHILD;
80387638
SW
509 return(1);
510}
511
80387638 512int
f88b6c16 513mdoc_word_alloc(struct mdoc *mdoc, int line, int pos, const char *p)
80387638
SW
514{
515 struct mdoc_node *n;
80387638 516
f88b6c16
FF
517 n = node_alloc(mdoc, line, pos, MDOC_MAX, MDOC_TEXT);
518 n->string = roff_strdup(mdoc->roff, p);
80387638 519
f88b6c16 520 if ( ! node_append(mdoc, n))
80387638
SW
521 return(0);
522
f88b6c16 523 mdoc->next = MDOC_NEXT_SIBLING;
80387638
SW
524 return(1);
525}
526
7888c61d
FF
527void
528mdoc_word_append(struct mdoc *mdoc, const char *p)
529{
530 struct mdoc_node *n;
531 char *addstr, *newstr;
532
533 n = mdoc->last;
534 addstr = roff_strdup(mdoc->roff, p);
070c62a6 535 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
7888c61d
FF
536 free(addstr);
537 free(n->string);
538 n->string = newstr;
539 mdoc->next = MDOC_NEXT_SIBLING;
540}
80387638
SW
541
542static void
543mdoc_node_free(struct mdoc_node *p)
544{
545
546 if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type)
547 free(p->norm);
548 if (p->string)
549 free(p->string);
550 if (p->args)
551 mdoc_argv_free(p->args);
552 free(p);
553}
554
80387638 555static void
f88b6c16 556mdoc_node_unlink(struct mdoc *mdoc, struct mdoc_node *n)
80387638
SW
557{
558
559 /* Adjust siblings. */
560
561 if (n->prev)
562 n->prev->next = n->next;
563 if (n->next)
564 n->next->prev = n->prev;
565
566 /* Adjust parent. */
567
568 if (n->parent) {
569 n->parent->nchild--;
570 if (n->parent->child == n)
571 n->parent->child = n->prev ? n->prev : n->next;
572 if (n->parent->last == n)
573 n->parent->last = n->prev ? n->prev : NULL;
574 }
575
576 /* Adjust parse point, if applicable. */
577
f88b6c16 578 if (mdoc && mdoc->last == n) {
80387638 579 if (n->prev) {
f88b6c16
FF
580 mdoc->last = n->prev;
581 mdoc->next = MDOC_NEXT_SIBLING;
80387638 582 } else {
f88b6c16
FF
583 mdoc->last = n->parent;
584 mdoc->next = MDOC_NEXT_CHILD;
80387638
SW
585 }
586 }
587
f88b6c16
FF
588 if (mdoc && mdoc->first == n)
589 mdoc->first = NULL;
80387638
SW
590}
591
80387638 592void
f88b6c16 593mdoc_node_delete(struct mdoc *mdoc, struct mdoc_node *p)
80387638
SW
594{
595
596 while (p->child) {
597 assert(p->nchild);
f88b6c16 598 mdoc_node_delete(mdoc, p->child);
80387638
SW
599 }
600 assert(0 == p->nchild);
601
f88b6c16 602 mdoc_node_unlink(mdoc, p);
80387638
SW
603 mdoc_node_free(p);
604}
605
f88b6c16
FF
606int
607mdoc_node_relink(struct mdoc *mdoc, struct mdoc_node *p)
608{
609
610 mdoc_node_unlink(mdoc, p);
611 return(node_append(mdoc, p));
612}
613
36342e81
SW
614#if 0
615/*
616 * Pre-treat a text line.
617 * Text lines can consist of equations, which must be handled apart from
618 * the regular text.
619 * Thus, use this function to step through a line checking if it has any
620 * equations embedded in it.
621 * This must handle multiple equations AND equations that do not end at
622 * the end-of-line, i.e., will re-enter in the next roff parse.
623 */
624static int
f88b6c16 625mdoc_preptext(struct mdoc *mdoc, int line, char *buf, int offs)
36342e81
SW
626{
627 char *start, *end;
628 char delim;
629
630 while ('\0' != buf[offs]) {
631 /* Mark starting position if eqn is set. */
632 start = NULL;
f88b6c16 633 if ('\0' != (delim = roff_eqndelim(mdoc->roff)))
36342e81
SW
634 if (NULL != (start = strchr(buf + offs, delim)))
635 *start++ = '\0';
636
637 /* Parse text as normal. */
f88b6c16 638 if ( ! mdoc_ptext(mdoc, line, buf, offs))
36342e81
SW
639 return(0);
640
641 /* Continue only if an equation exists. */
642 if (NULL == start)
643 break;
644
645 /* Read past the end of the equation. */
646 offs += start - (buf + offs);
647 assert(start == &buf[offs]);
648 if (NULL != (end = strchr(buf + offs, delim))) {
649 *end++ = '\0';
650 while (' ' == *end)
651 end++;
652 }
653
654 /* Parse the equation itself. */
f88b6c16 655 roff_openeqn(mdoc->roff, NULL, line, offs, buf);
36342e81
SW
656
657 /* Process a finished equation? */
f88b6c16
FF
658 if (roff_closeeqn(mdoc->roff))
659 if ( ! mdoc_addeqn(mdoc, roff_eqn(mdoc->roff)))
36342e81
SW
660 return(0);
661 offs += (end - (buf + offs));
070c62a6 662 }
36342e81
SW
663
664 return(1);
665}
666#endif
80387638
SW
667
668/*
669 * Parse free-form text, that is, a line that does not begin with the
670 * control character.
671 */
672static int
f88b6c16 673mdoc_ptext(struct mdoc *mdoc, int line, char *buf, int offs)
80387638
SW
674{
675 char *c, *ws, *end;
676 struct mdoc_node *n;
677
f88b6c16
FF
678 assert(mdoc->last);
679 n = mdoc->last;
80387638
SW
680
681 /*
682 * Divert directly to list processing if we're encountering a
683 * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry
684 * (a MDOC_BODY means it's already open, in which case we should
685 * process within its context in the normal way).
686 */
687
688 if (MDOC_Bl == n->tok && MDOC_BODY == n->type &&
070c62a6 689 LIST_column == n->norm->Bl.type) {
80387638 690 /* `Bl' is open without any children. */
f88b6c16
FF
691 mdoc->flags |= MDOC_FREECOL;
692 return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf));
80387638
SW
693 }
694
695 if (MDOC_It == n->tok && MDOC_BLOCK == n->type &&
070c62a6
FF
696 NULL != n->parent &&
697 MDOC_Bl == n->parent->tok &&
698 LIST_column == n->parent->norm->Bl.type) {
80387638 699 /* `Bl' has block-level `It' children. */
f88b6c16
FF
700 mdoc->flags |= MDOC_FREECOL;
701 return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf));
80387638
SW
702 }
703
704 /*
705 * Search for the beginning of unescaped trailing whitespace (ws)
706 * and for the first character not to be output (end).
707 */
708
709 /* FIXME: replace with strcspn(). */
710 ws = NULL;
711 for (c = end = buf + offs; *c; c++) {
712 switch (*c) {
80387638
SW
713 case ' ':
714 if (NULL == ws)
715 ws = c;
716 continue;
717 case '\t':
718 /*
719 * Always warn about trailing tabs,
720 * even outside literal context,
721 * where they should be put on the next line.
722 */
723 if (NULL == ws)
724 ws = c;
725 /*
726 * Strip trailing tabs in literal context only;
727 * outside, they affect the next line.
728 */
f88b6c16 729 if (MDOC_LITERAL & mdoc->flags)
80387638
SW
730 continue;
731 break;
732 case '\\':
733 /* Skip the escaped character, too, if any. */
734 if (c[1])
735 c++;
736 /* FALLTHROUGH */
737 default:
738 ws = NULL;
739 break;
740 }
741 end = c + 1;
742 }
743 *end = '\0';
744
745 if (ws)
070c62a6
FF
746 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
747 line, (int)(ws-buf), NULL);
80387638 748
f88b6c16 749 if ('\0' == buf[offs] && ! (MDOC_LITERAL & mdoc->flags)) {
070c62a6
FF
750 mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse,
751 line, (int)(c - buf), NULL);
80387638
SW
752
753 /*
754 * Insert a `sp' in the case of a blank line. Technically,
755 * blank lines aren't allowed, but enough manuals assume this
756 * behaviour that we want to work around it.
757 */
f88b6c16 758 if ( ! mdoc_elem_alloc(mdoc, line, offs, MDOC_sp, NULL))
80387638
SW
759 return(0);
760
f88b6c16
FF
761 mdoc->next = MDOC_NEXT_SIBLING;
762
763 return(mdoc_valid_post(mdoc));
80387638
SW
764 }
765
f88b6c16 766 if ( ! mdoc_word_alloc(mdoc, line, offs, buf+offs))
80387638
SW
767 return(0);
768
f88b6c16 769 if (MDOC_LITERAL & mdoc->flags)
80387638
SW
770 return(1);
771
772 /*
773 * End-of-sentence check. If the last character is an unescaped
774 * EOS character, then flag the node as being the end of a
775 * sentence. The front-end will know how to interpret this.
776 */
777
778 assert(buf < end);
779
070c62a6 780 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs)))
f88b6c16 781 mdoc->last->flags |= MDOC_EOS;
80387638
SW
782
783 return(1);
784}
785
80387638
SW
786/*
787 * Parse a macro line, that is, a line beginning with the control
788 * character.
789 */
790static int
f88b6c16 791mdoc_pmacro(struct mdoc *mdoc, int ln, char *buf, int offs)
80387638
SW
792{
793 enum mdoct tok;
60e1e752 794 int i, sv;
80387638
SW
795 char mac[5];
796 struct mdoc_node *n;
797
60e1e752 798 /* Empty post-control lines are ignored. */
80387638 799
60e1e752 800 if ('"' == buf[offs]) {
070c62a6
FF
801 mandoc_msg(MANDOCERR_COMMENT_BAD, mdoc->parse,
802 ln, offs, NULL);
60e1e752
SW
803 return(1);
804 } else if ('\0' == buf[offs])
80387638
SW
805 return(1);
806
60e1e752 807 sv = offs;
80387638 808
070c62a6 809 /*
80387638
SW
810 * Copy the first word into a nil-terminated buffer.
811 * Stop copying when a tab, space, or eoln is encountered.
812 */
813
60e1e752 814 i = 0;
070c62a6
FF
815 while (i < 4 && '\0' != buf[offs] && ' ' != buf[offs] &&
816 '\t' != buf[offs])
60e1e752
SW
817 mac[i++] = buf[offs++];
818
819 mac[i] = '\0';
820
070c62a6 821 tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : MDOC_MAX;
80387638 822
80387638 823 if (MDOC_MAX == tok) {
070c62a6
FF
824 mandoc_msg(MANDOCERR_MACRO, mdoc->parse,
825 ln, sv, buf + sv - 1);
80387638
SW
826 return(1);
827 }
828
829 /* Disregard the first trailing tab, if applicable. */
830
60e1e752
SW
831 if ('\t' == buf[offs])
832 offs++;
80387638
SW
833
834 /* Jump to the next non-whitespace word. */
835
60e1e752
SW
836 while (buf[offs] && ' ' == buf[offs])
837 offs++;
80387638 838
070c62a6 839 /*
80387638
SW
840 * Trailing whitespace. Note that tabs are allowed to be passed
841 * into the parser as "text", so we only warn about spaces here.
842 */
843
60e1e752 844 if ('\0' == buf[offs] && ' ' == buf[offs - 1])
070c62a6
FF
845 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
846 ln, offs - 1, NULL);
80387638
SW
847
848 /*
849 * If an initial macro or a list invocation, divert directly
850 * into macro processing.
851 */
852
070c62a6
FF
853 if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok)
854 return(mdoc_macro(mdoc, tok, ln, sv, &offs, buf));
80387638 855
f88b6c16
FF
856 n = mdoc->last;
857 assert(mdoc->last);
80387638
SW
858
859 /*
860 * If the first macro of a `Bl -column', open an `It' block
861 * context around the parsed macro.
862 */
863
864 if (MDOC_Bl == n->tok && MDOC_BODY == n->type &&
070c62a6 865 LIST_column == n->norm->Bl.type) {
f88b6c16 866 mdoc->flags |= MDOC_FREECOL;
070c62a6 867 return(mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf));
80387638
SW
868 }
869
870 /*
871 * If we're following a block-level `It' within a `Bl -column'
872 * context (perhaps opened in the above block or in ptext()),
873 * then open an `It' block context around the parsed macro.
874 */
875
876 if (MDOC_It == n->tok && MDOC_BLOCK == n->type &&
070c62a6
FF
877 NULL != n->parent &&
878 MDOC_Bl == n->parent->tok &&
879 LIST_column == n->parent->norm->Bl.type) {
f88b6c16 880 mdoc->flags |= MDOC_FREECOL;
070c62a6 881 return(mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf));
80387638
SW
882 }
883
884 /* Normal processing of a macro. */
885
070c62a6
FF
886 if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf))
887 return(0);
80387638 888
070c62a6 889 /* In quick mode (for mandocdb), abort after the NAME section. */
80387638 890
070c62a6
FF
891 if (mdoc->quick && MDOC_Sh == tok &&
892 SEC_NAME != mdoc->last->sec)
893 return(2);
80387638 894
070c62a6 895 return(1);
80387638
SW
896}
897
60e1e752
SW
898enum mdelim
899mdoc_isdelim(const char *p)
900{
901
902 if ('\0' == p[0])
903 return(DELIM_NONE);
80387638 904
60e1e752
SW
905 if ('\0' == p[1])
906 switch (p[0]) {
070c62a6 907 case '(':
60e1e752 908 /* FALLTHROUGH */
070c62a6 909 case '[':
60e1e752 910 return(DELIM_OPEN);
070c62a6 911 case '|':
60e1e752 912 return(DELIM_MIDDLE);
070c62a6 913 case '.':
60e1e752 914 /* FALLTHROUGH */
070c62a6 915 case ',':
60e1e752 916 /* FALLTHROUGH */
070c62a6 917 case ';':
60e1e752 918 /* FALLTHROUGH */
070c62a6 919 case ':':
60e1e752 920 /* FALLTHROUGH */
070c62a6 921 case '?':
60e1e752 922 /* FALLTHROUGH */
070c62a6 923 case '!':
60e1e752 924 /* FALLTHROUGH */
070c62a6 925 case ')':
60e1e752 926 /* FALLTHROUGH */
070c62a6 927 case ']':
60e1e752
SW
928 return(DELIM_CLOSE);
929 default:
930 return(DELIM_NONE);
931 }
932
933 if ('\\' != p[0])
934 return(DELIM_NONE);
935
936 if (0 == strcmp(p + 1, "."))
937 return(DELIM_CLOSE);
f88b6c16 938 if (0 == strcmp(p + 1, "fR|\\fP"))
60e1e752
SW
939 return(DELIM_MIDDLE);
940
941 return(DELIM_NONE);
942}
070c62a6
FF
943
944void
945mdoc_deroff(char **dest, const struct mdoc_node *n)
946{
947 char *cp;
948 size_t sz;
949
950 if (MDOC_TEXT != n->type) {
951 for (n = n->child; n; n = n->next)
952 mdoc_deroff(dest, n);
953 return;
954 }
955
956 /* Skip leading whitespace. */
957
958 for (cp = n->string; '\0' != *cp; cp++)
959 if (0 == isspace((unsigned char)*cp))
960 break;
961
962 /* Skip trailing whitespace. */
963
964 for (sz = strlen(cp); sz; sz--)
965 if (0 == isspace((unsigned char)cp[sz-1]))
966 break;
967
968 /* Skip empty strings. */
969
970 if (0 == sz)
971 return;
972
973 if (NULL == *dest) {
974 *dest = mandoc_strndup(cp, sz);
975 return;
976 }
977
978 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
979 free(*dest);
980 *dest = cp;
981}