mandoc(1): Update to 1.9.13.
[dragonfly.git] / usr.bin / mandoc / mdoc.c
CommitLineData
32c903ac 1/* $Id: mdoc.c,v 1.113 2009/10/30 05:58:38 kristaps Exp $ */
589e7c1d
SW
2/*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17#include <sys/types.h>
18
19#include <assert.h>
20#include <ctype.h>
21#include <stdarg.h>
22#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25
26#include "libmdoc.h"
32c903ac 27#include "libmandoc.h"
589e7c1d
SW
28
29const char *const __mdoc_merrnames[MERRMAX] = {
30 "trailing whitespace", /* ETAILWS */
31 "unexpected quoted parameter", /* EQUOTPARM */
32 "unterminated quoted parameter", /* EQUOTTERM */
589e7c1d
SW
33 "argument parameter suggested", /* EARGVAL */
34 "macro disallowed in prologue", /* EBODYPROL */
35 "macro disallowed in body", /* EPROLBODY */
36 "text disallowed in prologue", /* ETEXTPROL */
37 "blank line disallowed", /* ENOBLANK */
38 "text parameter too long", /* ETOOLONG */
39 "invalid escape sequence", /* EESCAPE */
40 "invalid character", /* EPRINT */
41 "document has no body", /* ENODAT */
42 "document has no prologue", /* ENOPROLOGUE */
43 "expected line arguments", /* ELINE */
44 "invalid AT&T argument", /* EATT */
45 "default name not yet set", /* ENAME */
46 "missing list type", /* ELISTTYPE */
47 "missing display type", /* EDISPTYPE */
48 "too many display types", /* EMULTIDISP */
49 "too many list types", /* EMULTILIST */
50 "NAME section must be first", /* ESECNAME */
51 "badly-formed NAME section", /* ENAMESECINC */
52 "argument repeated", /* EARGREP */
53 "expected boolean parameter", /* EBOOL */
54 "inconsistent column syntax", /* ECOLMIS */
55 "nested display invalid", /* ENESTDISP */
56 "width argument missing", /* EMISSWIDTH */
57 "invalid section for this manual section", /* EWRONGMSEC */
58 "section out of conventional order", /* ESECOOO */
59 "section repeated", /* ESECREP */
60 "invalid standard argument", /* EBADSTAND */
61 "multi-line arguments discouraged", /* ENOMULTILINE */
62 "multi-line arguments suggested", /* EMULTILINE */
63 "line arguments discouraged", /* ENOLINE */
64 "prologue macro out of conventional order", /* EPROLOOO */
65 "prologue macro repeated", /* EPROLREP */
66 "invalid manual section", /* EBADMSEC */
67 "invalid section", /* EBADSEC */
68 "invalid font mode", /* EFONT */
69 "invalid date syntax", /* EBADDATE */
70 "invalid number format", /* ENUMFMT */
71 "superfluous width argument", /* ENOWIDTH */
72 "system: utsname error", /* EUTSNAME */
73 "obsolete macro", /* EOBS */
74 "end-of-line scope violation", /* EIMPBRK */
75 "empty macro ignored", /* EIGNE */
76 "unclosed explicit scope", /* EOPEN */
77 "unterminated quoted phrase", /* EQUOTPHR */
78 "closure macro without prior context", /* ENOCTX */
79 "no description found for library", /* ELIB */
80 "bad child for parent context", /* EBADCHILD */
81 "list arguments preceding type", /* ENOTYPE */
82};
83
84const char *const __mdoc_macronames[MDOC_MAX] = {
85 "Ap", "Dd", "Dt", "Os",
86 "Sh", "Ss", "Pp", "D1",
87 "Dl", "Bd", "Ed", "Bl",
88 "El", "It", "Ad", "An",
89 "Ar", "Cd", "Cm", "Dv",
90 "Er", "Ev", "Ex", "Fa",
91 "Fd", "Fl", "Fn", "Ft",
92 "Ic", "In", "Li", "Nd",
93 "Nm", "Op", "Ot", "Pa",
94 "Rv", "St", "Va", "Vt",
95 /* LINTED */
96 "Xr", "\%A", "\%B", "\%D",
97 /* LINTED */
98 "\%I", "\%J", "\%N", "\%O",
99 /* LINTED */
100 "\%P", "\%R", "\%T", "\%V",
101 "Ac", "Ao", "Aq", "At",
102 "Bc", "Bf", "Bo", "Bq",
103 "Bsx", "Bx", "Db", "Dc",
104 "Do", "Dq", "Ec", "Ef",
105 "Em", "Eo", "Fx", "Ms",
106 "No", "Ns", "Nx", "Ox",
107 "Pc", "Pf", "Po", "Pq",
108 "Qc", "Ql", "Qo", "Qq",
109 "Re", "Rs", "Sc", "So",
110 "Sq", "Sm", "Sx", "Sy",
111 "Tn", "Ux", "Xc", "Xo",
112 "Fo", "Fc", "Oo", "Oc",
113 "Bk", "Ek", "Bt", "Hf",
114 "Fr", "Ud", "Lb", "Lp",
115 "Lk", "Mt", "Brq", "Bro",
116 /* LINTED */
117 "Brc", "\%C", "Es", "En",
118 /* LINTED */
cbce6d97
SW
119 "Dx", "\%Q", "br", "sp",
120 /* LINTED */
121 "\%U"
589e7c1d
SW
122 };
123
124const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
125 "split", "nosplit", "ragged",
126 "unfilled", "literal", "file",
127 "offset", "bullet", "dash",
128 "hyphen", "item", "enum",
129 "tag", "diag", "hang",
130 "ohang", "inset", "column",
131 "width", "compact", "std",
132 "filled", "words", "emphasis",
133 "symbolic", "nested", "centered"
134 };
135
136const char * const *mdoc_macronames = __mdoc_macronames;
137const char * const *mdoc_argnames = __mdoc_argnames;
138
139static void mdoc_free1(struct mdoc *);
32c903ac 140static void mdoc_alloc1(struct mdoc *);
589e7c1d
SW
141static struct mdoc_node *node_alloc(struct mdoc *, int, int,
142 int, enum mdoc_type);
143static int node_append(struct mdoc *,
144 struct mdoc_node *);
145static int parsetext(struct mdoc *, int, char *);
146static int parsemacro(struct mdoc *, int, char *);
147static int macrowarn(struct mdoc *, int, const char *);
148static int pstring(struct mdoc *, int, int,
149 const char *, size_t);
150
32c903ac
SW
151#ifdef __linux__
152extern size_t strlcpy(char *, const char *, size_t);
153#endif
154
589e7c1d
SW
155
156const struct mdoc_node *
157mdoc_node(const struct mdoc *m)
158{
159
160 return(MDOC_HALT & m->flags ? NULL : m->first);
161}
162
163
164const struct mdoc_meta *
165mdoc_meta(const struct mdoc *m)
166{
167
168 return(MDOC_HALT & m->flags ? NULL : &m->meta);
169}
170
171
172/*
173 * Frees volatile resources (parse tree, meta-data, fields).
174 */
175static void
176mdoc_free1(struct mdoc *mdoc)
177{
178
179 if (mdoc->first)
180 mdoc_node_freelist(mdoc->first);
181 if (mdoc->meta.title)
182 free(mdoc->meta.title);
183 if (mdoc->meta.os)
184 free(mdoc->meta.os);
185 if (mdoc->meta.name)
186 free(mdoc->meta.name);
187 if (mdoc->meta.arch)
188 free(mdoc->meta.arch);
189 if (mdoc->meta.vol)
190 free(mdoc->meta.vol);
191}
192
193
194/*
195 * Allocate all volatile resources (parse tree, meta-data, fields).
196 */
32c903ac 197static void
589e7c1d
SW
198mdoc_alloc1(struct mdoc *mdoc)
199{
200
32c903ac 201 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta));
589e7c1d
SW
202 mdoc->flags = 0;
203 mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
32c903ac 204 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node));
589e7c1d
SW
205 mdoc->first = mdoc->last;
206 mdoc->last->type = MDOC_ROOT;
207 mdoc->next = MDOC_NEXT_CHILD;
589e7c1d
SW
208}
209
210
211/*
212 * Free up volatile resources (see mdoc_free1()) then re-initialises the
213 * data with mdoc_alloc1(). After invocation, parse data has been reset
214 * and the parser is ready for re-invocation on a new tree; however,
215 * cross-parse non-volatile data is kept intact.
216 */
32c903ac 217void
589e7c1d
SW
218mdoc_reset(struct mdoc *mdoc)
219{
220
221 mdoc_free1(mdoc);
32c903ac 222 mdoc_alloc1(mdoc);
589e7c1d
SW
223}
224
225
226/*
227 * Completely free up all volatile and non-volatile parse resources.
228 * After invocation, the pointer is no longer usable.
229 */
230void
231mdoc_free(struct mdoc *mdoc)
232{
233
234 mdoc_free1(mdoc);
235 free(mdoc);
236}
237
238
239/*
240 * Allocate volatile and non-volatile parse resources.
241 */
242struct mdoc *
243mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
244{
245 struct mdoc *p;
246
32c903ac 247 p = mandoc_calloc(1, sizeof(struct mdoc));
589e7c1d 248
32c903ac
SW
249 if (cb)
250 memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
589e7c1d
SW
251
252 p->data = data;
253 p->pflags = pflags;
254
32c903ac
SW
255 mdoc_hash_init();
256 mdoc_alloc1(p);
257 return(p);
589e7c1d
SW
258}
259
260
261/*
262 * Climb back up the parse tree, validating open scopes. Mostly calls
263 * through to macro_end() in macro.c.
264 */
265int
266mdoc_endparse(struct mdoc *m)
267{
268
269 if (MDOC_HALT & m->flags)
270 return(0);
271 else if (mdoc_macroend(m))
272 return(1);
273 m->flags |= MDOC_HALT;
274 return(0);
275}
276
277
278/*
279 * Main parse routine. Parses a single line -- really just hands off to
280 * the macro (parsemacro()) or text parser (parsetext()).
281 */
282int
283mdoc_parseln(struct mdoc *m, int ln, char *buf)
284{
285
286 if (MDOC_HALT & m->flags)
287 return(0);
288
289 return('.' == *buf ? parsemacro(m, ln, buf) :
290 parsetext(m, ln, buf));
291}
292
293
294int
295mdoc_verr(struct mdoc *mdoc, int ln, int pos,
296 const char *fmt, ...)
297{
298 char buf[256];
299 va_list ap;
300
301 if (NULL == mdoc->cb.mdoc_err)
302 return(0);
303
304 va_start(ap, fmt);
305 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
306 va_end(ap);
307
308 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
309}
310
311
312int
313mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
314{
315 char buf[256];
316 va_list ap;
317
318 if (NULL == mdoc->cb.mdoc_warn)
319 return(0);
320
321 va_start(ap, fmt);
322 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
323 va_end(ap);
324
325 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf));
326}
327
328
329int
330mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type)
331{
332 const char *p;
333
334 p = __mdoc_merrnames[(int)type];
335 assert(p);
336
337 if (iserr)
338 return(mdoc_verr(m, line, pos, p));
339
340 return(mdoc_vwarn(m, line, pos, p));
341}
342
343
344int
345mdoc_macro(struct mdoc *m, int tok,
346 int ln, int pp, int *pos, char *buf)
347{
348 /*
349 * If we're in the prologue, deny "body" macros. Similarly, if
350 * we're in the body, deny prologue calls.
351 */
352 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
353 MDOC_PBODY & m->flags)
354 return(mdoc_perr(m, ln, pp, EPROLBODY));
355 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
356 ! (MDOC_PBODY & m->flags))
357 return(mdoc_perr(m, ln, pp, EBODYPROL));
358
359 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
360}
361
362
363static int
364node_append(struct mdoc *mdoc, struct mdoc_node *p)
365{
366
367 assert(mdoc->last);
368 assert(mdoc->first);
369 assert(MDOC_ROOT != p->type);
370
371 switch (mdoc->next) {
372 case (MDOC_NEXT_SIBLING):
373 mdoc->last->next = p;
374 p->prev = mdoc->last;
375 p->parent = mdoc->last->parent;
376 break;
377 case (MDOC_NEXT_CHILD):
378 mdoc->last->child = p;
379 p->parent = mdoc->last;
380 break;
381 default:
382 abort();
383 /* NOTREACHED */
384 }
385
386 p->parent->nchild++;
387
388 if ( ! mdoc_valid_pre(mdoc, p))
389 return(0);
390 if ( ! mdoc_action_pre(mdoc, p))
391 return(0);
392
393 switch (p->type) {
394 case (MDOC_HEAD):
395 assert(MDOC_BLOCK == p->parent->type);
396 p->parent->head = p;
397 break;
398 case (MDOC_TAIL):
399 assert(MDOC_BLOCK == p->parent->type);
400 p->parent->tail = p;
401 break;
402 case (MDOC_BODY):
403 assert(MDOC_BLOCK == p->parent->type);
404 p->parent->body = p;
405 break;
406 default:
407 break;
408 }
409
410 mdoc->last = p;
411
412 switch (p->type) {
413 case (MDOC_TEXT):
414 if ( ! mdoc_valid_post(mdoc))
415 return(0);
416 if ( ! mdoc_action_post(mdoc))
417 return(0);
418 break;
419 default:
420 break;
421 }
422
423 return(1);
424}
425
426
427static struct mdoc_node *
428node_alloc(struct mdoc *m, int line,
429 int pos, int tok, enum mdoc_type type)
430{
431 struct mdoc_node *p;
432
32c903ac 433 p = mandoc_calloc(1, sizeof(struct mdoc_node));
589e7c1d
SW
434 p->sec = m->lastsec;
435 p->line = line;
436 p->pos = pos;
437 p->tok = tok;
438 if (MDOC_TEXT != (p->type = type))
439 assert(p->tok >= 0);
440
441 return(p);
442}
443
444
445int
446mdoc_tail_alloc(struct mdoc *m, int line, int pos, int tok)
447{
448 struct mdoc_node *p;
449
450 p = node_alloc(m, line, pos, tok, MDOC_TAIL);
589e7c1d
SW
451 if ( ! node_append(m, p))
452 return(0);
453 m->next = MDOC_NEXT_CHILD;
454 return(1);
455}
456
457
458int
459mdoc_head_alloc(struct mdoc *m, int line, int pos, int tok)
460{
461 struct mdoc_node *p;
462
463 assert(m->first);
464 assert(m->last);
465
466 p = node_alloc(m, line, pos, tok, MDOC_HEAD);
589e7c1d
SW
467 if ( ! node_append(m, p))
468 return(0);
469 m->next = MDOC_NEXT_CHILD;
470 return(1);
471}
472
473
474int
475mdoc_body_alloc(struct mdoc *m, int line, int pos, int tok)
476{
477 struct mdoc_node *p;
478
479 p = node_alloc(m, line, pos, tok, MDOC_BODY);
589e7c1d
SW
480 if ( ! node_append(m, p))
481 return(0);
482 m->next = MDOC_NEXT_CHILD;
483 return(1);
484}
485
486
487int
488mdoc_block_alloc(struct mdoc *m, int line, int pos,
489 int tok, struct mdoc_arg *args)
490{
491 struct mdoc_node *p;
492
493 p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
589e7c1d
SW
494 p->args = args;
495 if (p->args)
496 (args->refcnt)++;
497 if ( ! node_append(m, p))
498 return(0);
499 m->next = MDOC_NEXT_CHILD;
500 return(1);
501}
502
503
504int
505mdoc_elem_alloc(struct mdoc *m, int line, int pos,
506 int tok, struct mdoc_arg *args)
507{
508 struct mdoc_node *p;
509
510 p = node_alloc(m, line, pos, tok, MDOC_ELEM);
589e7c1d
SW
511 p->args = args;
512 if (p->args)
513 (args->refcnt)++;
514 if ( ! node_append(m, p))
515 return(0);
516 m->next = MDOC_NEXT_CHILD;
517 return(1);
518}
519
520
521static int
522pstring(struct mdoc *m, int line, int pos, const char *p, size_t len)
523{
524 struct mdoc_node *n;
525 size_t sv;
526
527 n = node_alloc(m, line, pos, -1, MDOC_TEXT);
32c903ac 528 n->string = mandoc_malloc(len + 1);
589e7c1d
SW
529 sv = strlcpy(n->string, p, len + 1);
530
531 /* Prohibit truncation. */
532 assert(sv < len + 1);
533
534 if ( ! node_append(m, n))
535 return(0);
536 m->next = MDOC_NEXT_SIBLING;
537 return(1);
538}
539
540
541int
542mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
543{
544
545 return(pstring(m, line, pos, p, strlen(p)));
546}
547
548
549void
550mdoc_node_free(struct mdoc_node *p)
551{
552
553 if (p->parent)
554 p->parent->nchild--;
555 if (p->string)
556 free(p->string);
557 if (p->args)
558 mdoc_argv_free(p->args);
559 free(p);
560}
561
562
563void
564mdoc_node_freelist(struct mdoc_node *p)
565{
566
567 if (p->child)
568 mdoc_node_freelist(p->child);
569 if (p->next)
570 mdoc_node_freelist(p->next);
571
572 assert(0 == p->nchild);
573 mdoc_node_free(p);
574}
575
576
577/*
578 * Parse free-form text, that is, a line that does not begin with the
579 * control character.
580 */
581static int
582parsetext(struct mdoc *m, int line, char *buf)
583{
584 int i, j;
585
586 if (SEC_NONE == m->lastnamed)
587 return(mdoc_perr(m, line, 0, ETEXTPROL));
588
589 /*
590 * If in literal mode, then pass the buffer directly to the
591 * back-end, as it should be preserved as a single term.
592 */
593
594 if (MDOC_LITERAL & m->flags)
595 return(mdoc_word_alloc(m, line, 0, buf));
596
597 /* Disallow blank/white-space lines in non-literal mode. */
598
599 for (i = 0; ' ' == buf[i]; i++)
600 /* Skip leading whitespace. */ ;
601 if (0 == buf[i])
602 return(mdoc_perr(m, line, 0, ENOBLANK));
603
604 /*
605 * Break apart a free-form line into tokens. Spaces are
606 * stripped out of the input.
607 */
608
609 for (j = i; buf[i]; i++) {
610 if (' ' != buf[i])
611 continue;
612
613 /* Escaped whitespace. */
614 if (i && ' ' == buf[i] && '\\' == buf[i - 1])
615 continue;
616
617 buf[i++] = 0;
618 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
619 return(0);
620
621 for ( ; ' ' == buf[i]; i++)
622 /* Skip trailing whitespace. */ ;
623
624 j = i;
625 if (0 == buf[i])
626 break;
627 }
628
629 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
630 return(0);
631
632 m->next = MDOC_NEXT_SIBLING;
633 return(1);
634}
635
636
637
589e7c1d
SW
638static int
639macrowarn(struct mdoc *m, int ln, const char *buf)
640{
641 if ( ! (MDOC_IGN_MACRO & m->pflags))
642 return(mdoc_verr(m, ln, 0,
643 "unknown macro: %s%s",
644 buf, strlen(buf) > 3 ? "..." : ""));
645 return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s",
646 buf, strlen(buf) > 3 ? "..." : ""));
647}
648
649
650/*
651 * Parse a macro line, that is, a line beginning with the control
652 * character.
653 */
654int
655parsemacro(struct mdoc *m, int ln, char *buf)
656{
657 int i, j, c;
658 char mac[5];
659
660 /* Empty lines are ignored. */
661
662 if (0 == buf[1])
663 return(1);
664
665 i = 1;
666
667 /* Accept whitespace after the initial control char. */
668
669 if (' ' == buf[i]) {
670 i++;
671 while (buf[i] && ' ' == buf[i])
672 i++;
673 if (0 == buf[i])
674 return(1);
675 }
676
677 /* Copy the first word into a nil-terminated buffer. */
678
679 for (j = 0; j < 4; j++, i++) {
680 if (0 == (mac[j] = buf[i]))
681 break;
682 else if (' ' == buf[i])
683 break;
684
685 /* Check for invalid characters. */
686
687 if (isgraph((u_char)buf[i]))
688 continue;
689 return(mdoc_perr(m, ln, i, EPRINT));
690 }
691
692 mac[j] = 0;
693
694 if (j == 4 || j < 2) {
695 if ( ! macrowarn(m, ln, mac))
696 goto err;
697 return(1);
698 }
699
700 if (MDOC_MAX == (c = mdoc_hash_find(mac))) {
701 if ( ! macrowarn(m, ln, mac))
702 goto err;
703 return(1);
704 }
705
706 /* The macro is sane. Jump to the next word. */
707
708 while (buf[i] && ' ' == buf[i])
709 i++;
710
711 /*
712 * Begin recursive parse sequence. Since we're at the start of
713 * the line, we don't need to do callable/parseable checks.
714 */
715 if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
716 goto err;
717
718 return(1);
719
720err: /* Error out. */
721
722 m->flags |= MDOC_HALT;
723 return(0);
724}