Import mdocml-1.13.1
[dragonfly.git] / contrib / mdocml / read.c
CommitLineData
070c62a6 1/* $Id: read.c,v 1.79 2014/08/06 15:09:05 schwarze Exp $ */
60e1e752
SW
2/*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
070c62a6
FF
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
60e1e752
SW
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
a4c7eb57
SW
19#ifdef HAVE_CONFIG_H
20#include "config.h"
21#endif
22
23#ifdef HAVE_MMAP
24# include <sys/stat.h>
25# include <sys/mman.h>
26#endif
60e1e752
SW
27
28#include <assert.h>
29#include <ctype.h>
070c62a6 30#include <errno.h>
60e1e752
SW
31#include <fcntl.h>
32#include <stdarg.h>
36342e81 33#include <stdint.h>
60e1e752
SW
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#include <unistd.h>
38
39#include "mandoc.h"
070c62a6 40#include "mandoc_aux.h"
60e1e752
SW
41#include "libmandoc.h"
42#include "mdoc.h"
43#include "man.h"
36342e81 44#include "main.h"
60e1e752 45
60e1e752
SW
46#define REPARSE_LIMIT 1000
47
48struct buf {
070c62a6 49 char *buf; /* binary input buffer */
60e1e752
SW
50 size_t sz; /* size of binary buffer */
51};
52
53struct mparse {
54 enum mandoclevel file_status; /* status of current parse */
55 enum mandoclevel wlevel; /* ignore messages below this */
56 int line; /* line number in the file */
070c62a6 57 int options; /* parser options */
60e1e752
SW
58 struct man *pman; /* persistent man parser */
59 struct mdoc *pmdoc; /* persistent mdoc parser */
60 struct man *man; /* man parser */
61 struct mdoc *mdoc; /* mdoc parser */
62 struct roff *roff; /* roff parser (!NULL) */
070c62a6 63 char *sodest; /* filename pointed to by .so */
60e1e752
SW
64 int reparse_count; /* finite interp. stack */
65 mandocmsg mmsg; /* warning/error message handler */
070c62a6 66 const char *file;
36342e81 67 struct buf *secondary;
070c62a6 68 const char *defos; /* default operating system */
60e1e752
SW
69};
70
71static void resize_buf(struct buf *, size_t);
72static void mparse_buf_r(struct mparse *, struct buf, int);
60e1e752 73static void pset(const char *, int, struct mparse *);
070c62a6
FF
74static int read_whole_file(struct mparse *, const char *, int,
75 struct buf *, int *);
60e1e752 76static void mparse_end(struct mparse *);
f88b6c16
FF
77static void mparse_parse_buffer(struct mparse *, struct buf,
78 const char *);
60e1e752
SW
79
80static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
81 MANDOCERR_OK,
82 MANDOCERR_WARNING,
83 MANDOCERR_WARNING,
84 MANDOCERR_ERROR,
85 MANDOCERR_FATAL,
86 MANDOCERR_MAX,
87 MANDOCERR_MAX
88};
89
90static const char * const mandocerrs[MANDOCERR_MAX] = {
91 "ok",
92
93 "generic warning",
94
95 /* related to the prologue */
070c62a6
FF
96 "missing manual title, using UNTITLED",
97 "missing manual title, using \"\"",
98 "lower case character in document title",
99 "missing manual section, using \"\"",
60e1e752 100 "unknown manual section",
f88b6c16 101 "unknown manual volume or arch",
070c62a6 102 "missing date, using today's date",
60e1e752 103 "cannot parse date, using it verbatim",
070c62a6 104 "missing Os macro, using \"\"",
60e1e752 105 "duplicate prologue macro",
070c62a6
FF
106 "late prologue macro",
107 "skipping late title macro",
108 "prologue macros out of order",
60e1e752
SW
109
110 /* related to document structure */
111 ".so is fragile, better use ln(1)",
070c62a6
FF
112 "no document body",
113 "content before first section header",
114 "first section is not \"NAME\"",
60e1e752 115 "bad NAME section contents",
60e1e752 116 "sections out of conventional order",
070c62a6
FF
117 "duplicate section title",
118 "unexpected section",
60e1e752
SW
119
120 /* related to macros and nesting */
070c62a6 121 "obsolete macro",
60e1e752 122 "skipping paragraph macro",
f88b6c16 123 "moving paragraph macro out of list",
60e1e752
SW
124 "skipping no-space macro",
125 "blocks badly nested",
60e1e752 126 "nested displays are not portable",
070c62a6
FF
127 "moving content out of list",
128 ".Vt block has child macro",
129 "fill mode already enabled, skipping",
130 "fill mode already disabled, skipping",
60e1e752
SW
131 "line scope broken",
132
133 /* related to missing macro arguments */
070c62a6
FF
134 "skipping empty request",
135 "conditional request controls empty scope",
60e1e752 136 "skipping empty macro",
070c62a6 137 "empty argument, using 0n",
60e1e752 138 "argument count wrong",
070c62a6
FF
139 "missing display type, using -ragged",
140 "list type is not the first argument",
141 "missing -width in -tag list, using 8n",
142 "missing utility name, using \"\"",
143 "empty head in list item",
144 "empty list item",
145 "missing font type, using \\fR",
146 "unknown font type, using \\fR",
147 "missing -std argument, adding it",
60e1e752
SW
148
149 /* related to bad macro arguments */
070c62a6 150 "unterminated quoted argument",
60e1e752 151 "duplicate argument",
070c62a6
FF
152 "skipping duplicate argument",
153 "skipping duplicate display type",
154 "skipping duplicate list type",
155 "skipping -width argument",
60e1e752 156 "unknown AT&T UNIX version",
070c62a6
FF
157 "invalid content in Rs block",
158 "invalid Boolean argument",
159 "unknown font, skipping request",
60e1e752
SW
160
161 /* related to plain text */
070c62a6
FF
162 "blank line in fill mode, using .sp",
163 "tab in filled text",
164 "whitespace at end of input line",
60e1e752 165 "bad comment style",
070c62a6
FF
166 "invalid escape sequence",
167 "undefined string, using \"\"",
36342e81 168
60e1e752
SW
169 "generic error",
170
36342e81
SW
171 /* related to equations */
172 "unexpected equation scope closure",
173 "equation scope open on exit",
174 "overlapping equation scopes",
175 "unexpected end of equation",
176 "equation syntax error",
177
60e1e752
SW
178 /* related to tables */
179 "bad table syntax",
180 "bad table option",
181 "bad table layout",
182 "no table layout cells specified",
183 "no table data cells specified",
184 "ignore data in cell",
185 "data block still open",
186 "ignoring extra data cells",
187
070c62a6 188 /* related to document structure and macros */
60e1e752
SW
189 "input stack limit exceeded, infinite loop?",
190 "skipping bad character",
60e1e752 191 "skipping unknown macro",
070c62a6 192 "skipping item outside list",
f88b6c16 193 "skipping column outside column list",
60e1e752 194 "skipping end of block that is not open",
070c62a6
FF
195 "inserting missing end of block",
196 "appending missing end of block",
197
198 /* related to request and macro arguments */
199 "escaped character not allowed in a name",
200 "argument count wrong",
201 "missing list type, using -item",
202 "missing manual name, using \"\"",
203 "uname(3) system call failed, using UNKNOWN",
204 "unknown standard specifier",
205 "skipping request without numeric argument",
206 "skipping all arguments",
207 "skipping excess arguments",
60e1e752
SW
208
209 "generic fatal error",
210
070c62a6
FF
211 "input too large",
212 "NOT IMPLEMENTED: Bd -file",
60e1e752 213 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
070c62a6
FF
214 ".so request failed",
215
216 /* system errors */
217 NULL,
218 "cannot stat file",
219 "cannot read file",
60e1e752
SW
220};
221
222static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
223 "SUCCESS",
224 "RESERVED",
225 "WARNING",
226 "ERROR",
227 "FATAL",
228 "BADARG",
229 "SYSERR"
230};
231
070c62a6 232
60e1e752
SW
233static void
234resize_buf(struct buf *buf, size_t initial)
235{
236
237 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
238 buf->buf = mandoc_realloc(buf->buf, buf->sz);
239}
240
241static void
242pset(const char *buf, int pos, struct mparse *curp)
243{
244 int i;
245
246 /*
247 * Try to intuit which kind of manual parser should be used. If
248 * passed in by command-line (-man, -mdoc), then use that
249 * explicitly. If passed as -mandoc, then try to guess from the
250 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
251 * default to -man, which is more lenient.
252 *
253 * Separate out pmdoc/pman from mdoc/man: the first persists
254 * through all parsers, while the latter is used per-parse.
255 */
256
257 if ('.' == buf[0] || '\'' == buf[0]) {
258 for (i = 1; buf[i]; i++)
259 if (' ' != buf[i] && '\t' != buf[i])
260 break;
261 if ('\0' == buf[i])
262 return;
263 }
264
070c62a6 265 if (MPARSE_MDOC & curp->options) {
60e1e752
SW
266 curp->mdoc = curp->pmdoc;
267 return;
070c62a6 268 } else if (MPARSE_MAN & curp->options) {
60e1e752
SW
269 curp->man = curp->pman;
270 return;
60e1e752
SW
271 }
272
273 if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) {
070c62a6
FF
274 if (NULL == curp->pmdoc)
275 curp->pmdoc = mdoc_alloc(
276 curp->roff, curp, curp->defos,
277 MPARSE_QUICK & curp->options ? 1 : 0);
60e1e752
SW
278 assert(curp->pmdoc);
279 curp->mdoc = curp->pmdoc;
280 return;
070c62a6 281 }
60e1e752 282
070c62a6
FF
283 if (NULL == curp->pman)
284 curp->pman = man_alloc(curp->roff, curp,
285 MPARSE_QUICK & curp->options ? 1 : 0);
60e1e752
SW
286 assert(curp->pman);
287 curp->man = curp->pman;
288}
289
290/*
291 * Main parse routine for an opened file. This is called for each
292 * opened file and simply loops around the full input file, possibly
293 * nesting (i.e., with `so').
294 */
295static void
296mparse_buf_r(struct mparse *curp, struct buf blk, int start)
297{
298 const struct tbl_span *span;
299 struct buf ln;
300 enum rofferr rr;
301 int i, of, rc;
302 int pos; /* byte number in the ln buffer */
303 int lnn; /* line number in the real file */
304 unsigned char c;
305
306 memset(&ln, 0, sizeof(struct buf));
307
070c62a6
FF
308 lnn = curp->line;
309 pos = 0;
60e1e752
SW
310
311 for (i = 0; i < (int)blk.sz; ) {
312 if (0 == pos && '\0' == blk.buf[i])
313 break;
314
315 if (start) {
316 curp->line = lnn;
317 curp->reparse_count = 0;
318 }
319
320 while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
321
322 /*
323 * When finding an unescaped newline character,
324 * leave the character loop to process the line.
325 * Skip a preceding carriage return, if any.
326 */
327
328 if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
329 '\n' == blk.buf[i + 1])
330 ++i;
331 if ('\n' == blk.buf[i]) {
332 ++i;
333 ++lnn;
334 break;
335 }
336
f88b6c16
FF
337 /*
338 * Make sure we have space for at least
339 * one backslash and one other character
340 * and the trailing NUL byte.
341 */
342
343 if (pos + 2 >= (int)ln.sz)
344 resize_buf(&ln, 256);
345
070c62a6 346 /*
60e1e752
SW
347 * Warn about bogus characters. If you're using
348 * non-ASCII encoding, you're screwing your
349 * readers. Since I'd rather this not happen,
36342e81
SW
350 * I'll be helpful and replace these characters
351 * with "?", so we don't display gibberish.
352 * Note to manual writers: use special characters.
60e1e752
SW
353 */
354
355 c = (unsigned char) blk.buf[i];
356
070c62a6
FF
357 if ( ! (isascii(c) &&
358 (isgraph(c) || isblank(c)))) {
359 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
360 curp->line, pos, "0x%x", c);
60e1e752 361 i++;
36342e81 362 ln.buf[pos++] = '?';
60e1e752
SW
363 continue;
364 }
365
366 /* Trailing backslash = a plain char. */
367
368 if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
60e1e752
SW
369 ln.buf[pos++] = blk.buf[i++];
370 continue;
371 }
372
373 /*
374 * Found escape and at least one other character.
375 * When it's a newline character, skip it.
376 * When there is a carriage return in between,
377 * skip that one as well.
378 */
379
380 if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
381 '\n' == blk.buf[i + 2])
382 ++i;
383 if ('\n' == blk.buf[i + 1]) {
384 i += 2;
385 ++lnn;
386 continue;
387 }
388
a4c7eb57 389 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
60e1e752
SW
390 i += 2;
391 /* Comment, skip to end of line */
392 for (; i < (int)blk.sz; ++i) {
393 if ('\n' == blk.buf[i]) {
394 ++i;
395 ++lnn;
396 break;
397 }
398 }
399
400 /* Backout trailing whitespaces */
401 for (; pos > 0; --pos) {
402 if (ln.buf[pos - 1] != ' ')
403 break;
404 if (pos > 2 && ln.buf[pos - 2] == '\\')
405 break;
406 }
407 break;
408 }
409
f88b6c16 410 /* Catch escaped bogus characters. */
60e1e752 411
f88b6c16
FF
412 c = (unsigned char) blk.buf[i+1];
413
070c62a6
FF
414 if ( ! (isascii(c) &&
415 (isgraph(c) || isblank(c)))) {
416 mandoc_vmsg(MANDOCERR_BADCHAR, curp,
417 curp->line, pos, "0x%x", c);
f88b6c16
FF
418 i += 2;
419 ln.buf[pos++] = '?';
420 continue;
421 }
422
423 /* Some other escape sequence, copy & cont. */
60e1e752
SW
424
425 ln.buf[pos++] = blk.buf[i++];
426 ln.buf[pos++] = blk.buf[i++];
427 }
428
070c62a6 429 if (pos >= (int)ln.sz)
60e1e752
SW
430 resize_buf(&ln, 256);
431
432 ln.buf[pos] = '\0';
433
434 /*
435 * A significant amount of complexity is contained by
436 * the roff preprocessor. It's line-oriented but can be
437 * expressed on one line, so we need at times to
438 * readjust our starting point and re-run it. The roff
439 * preprocessor can also readjust the buffers with new
440 * data, so we pass them in wholesale.
441 */
442
443 of = 0;
444
36342e81
SW
445 /*
446 * Maintain a lookaside buffer of all parsed lines. We
447 * only do this if mparse_keep() has been invoked (the
448 * buffer may be accessed with mparse_getkeep()).
449 */
450
451 if (curp->secondary) {
070c62a6
FF
452 curp->secondary->buf = mandoc_realloc(
453 curp->secondary->buf,
454 curp->secondary->sz + pos + 2);
455 memcpy(curp->secondary->buf +
456 curp->secondary->sz,
457 ln.buf, pos);
36342e81
SW
458 curp->secondary->sz += pos;
459 curp->secondary->buf
460 [curp->secondary->sz] = '\n';
461 curp->secondary->sz++;
462 curp->secondary->buf
463 [curp->secondary->sz] = '\0';
464 }
60e1e752 465rerun:
070c62a6
FF
466 rr = roff_parseln(curp->roff, curp->line,
467 &ln.buf, &ln.sz, of, &of);
60e1e752
SW
468
469 switch (rr) {
070c62a6 470 case ROFF_REPARSE:
60e1e752
SW
471 if (REPARSE_LIMIT >= ++curp->reparse_count)
472 mparse_buf_r(curp, ln, 0);
473 else
474 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
070c62a6 475 curp->line, pos, NULL);
60e1e752
SW
476 pos = 0;
477 continue;
070c62a6 478 case ROFF_APPEND:
60e1e752
SW
479 pos = (int)strlen(ln.buf);
480 continue;
070c62a6 481 case ROFF_RERUN:
60e1e752 482 goto rerun;
070c62a6 483 case ROFF_IGN:
60e1e752
SW
484 pos = 0;
485 continue;
070c62a6 486 case ROFF_ERR:
60e1e752
SW
487 assert(MANDOCLEVEL_FATAL <= curp->file_status);
488 break;
070c62a6
FF
489 case ROFF_SO:
490 if (0 == (MPARSE_SO & curp->options) &&
491 (i >= (int)blk.sz || '\0' == blk.buf[i])) {
492 curp->sodest = mandoc_strdup(ln.buf + of);
493 free(ln.buf);
494 return;
495 }
36342e81
SW
496 /*
497 * We remove `so' clauses from our lookaside
498 * buffer because we're going to descend into
499 * the file recursively.
500 */
070c62a6 501 if (curp->secondary)
36342e81 502 curp->secondary->sz -= pos + 1;
f88b6c16 503 mparse_readfd(curp, -1, ln.buf + of);
070c62a6
FF
504 if (MANDOCLEVEL_FATAL <= curp->file_status) {
505 mandoc_vmsg(MANDOCERR_SO_FAIL,
506 curp, curp->line, pos,
507 ".so %s", ln.buf + of);
60e1e752 508 break;
070c62a6 509 }
60e1e752
SW
510 pos = 0;
511 continue;
512 default:
513 break;
514 }
515
516 /*
517 * If we encounter errors in the recursive parse, make
518 * sure we don't continue parsing.
519 */
520
521 if (MANDOCLEVEL_FATAL <= curp->file_status)
522 break;
523
524 /*
525 * If input parsers have not been allocated, do so now.
a4c7eb57 526 * We keep these instanced between parsers, but set them
60e1e752
SW
527 * locally per parse routine since we can use different
528 * parsers with each one.
529 */
530
531 if ( ! (curp->man || curp->mdoc))
532 pset(ln.buf + of, pos - of, curp);
533
070c62a6 534 /*
60e1e752
SW
535 * Lastly, push down into the parsers themselves. One
536 * of these will have already been set in the pset()
537 * routine.
538 * If libroff returns ROFF_TBL, then add it to the
539 * currently open parse. Since we only get here if
540 * there does exist data (see tbl_data.c), we're
541 * guaranteed that something's been allocated.
542 * Do the same for ROFF_EQN.
543 */
544
545 rc = -1;
546
547 if (ROFF_TBL == rr)
548 while (NULL != (span = roff_span(curp->roff))) {
549 rc = curp->man ?
070c62a6
FF
550 man_addspan(curp->man, span) :
551 mdoc_addspan(curp->mdoc, span);
60e1e752
SW
552 if (0 == rc)
553 break;
554 }
555 else if (ROFF_EQN == rr)
070c62a6
FF
556 rc = curp->mdoc ?
557 mdoc_addeqn(curp->mdoc,
558 roff_eqn(curp->roff)) :
559 man_addeqn(curp->man,
560 roff_eqn(curp->roff));
60e1e752
SW
561 else if (curp->man || curp->mdoc)
562 rc = curp->man ?
070c62a6
FF
563 man_parseln(curp->man,
564 curp->line, ln.buf, of) :
565 mdoc_parseln(curp->mdoc,
566 curp->line, ln.buf, of);
60e1e752
SW
567
568 if (0 == rc) {
569 assert(MANDOCLEVEL_FATAL <= curp->file_status);
570 break;
070c62a6
FF
571 } else if (2 == rc)
572 break;
60e1e752
SW
573
574 /* Temporary buffers typically are not full. */
575
576 if (0 == start && '\0' == blk.buf[i])
577 break;
578
579 /* Start the next input line. */
580
581 pos = 0;
582 }
583
584 free(ln.buf);
585}
586
60e1e752 587static int
070c62a6
FF
588read_whole_file(struct mparse *curp, const char *file, int fd,
589 struct buf *fb, int *with_mmap)
60e1e752 590{
60e1e752
SW
591 size_t off;
592 ssize_t ssz;
593
a4c7eb57
SW
594#ifdef HAVE_MMAP
595 struct stat st;
60e1e752 596 if (-1 == fstat(fd, &st)) {
070c62a6
FF
597 curp->file_status = MANDOCLEVEL_SYSERR;
598 if (curp->mmsg)
599 (*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
600 file, 0, 0, strerror(errno));
60e1e752
SW
601 return(0);
602 }
603
604 /*
605 * If we're a regular file, try just reading in the whole entry
606 * via mmap(). This is faster than reading it into blocks, and
607 * since each file is only a few bytes to begin with, I'm not
608 * concerned that this is going to tank any machines.
609 */
610
611 if (S_ISREG(st.st_mode)) {
612 if (st.st_size >= (1U << 31)) {
070c62a6
FF
613 curp->file_status = MANDOCLEVEL_FATAL;
614 if (curp->mmsg)
615 (*curp->mmsg)(MANDOCERR_TOOLARGE,
616 curp->file_status, file, 0, 0, NULL);
60e1e752
SW
617 return(0);
618 }
619 *with_mmap = 1;
620 fb->sz = (size_t)st.st_size;
f88b6c16 621 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
60e1e752
SW
622 if (fb->buf != MAP_FAILED)
623 return(1);
624 }
a4c7eb57 625#endif
60e1e752
SW
626
627 /*
628 * If this isn't a regular file (like, say, stdin), then we must
629 * go the old way and just read things in bit by bit.
630 */
631
632 *with_mmap = 0;
633 off = 0;
634 fb->sz = 0;
635 fb->buf = NULL;
636 for (;;) {
637 if (off == fb->sz) {
638 if (fb->sz == (1U << 31)) {
070c62a6
FF
639 curp->file_status = MANDOCLEVEL_FATAL;
640 if (curp->mmsg)
641 (*curp->mmsg)(MANDOCERR_TOOLARGE,
642 curp->file_status,
643 file, 0, 0, NULL);
60e1e752
SW
644 break;
645 }
646 resize_buf(fb, 65536);
647 }
648 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
649 if (ssz == 0) {
650 fb->sz = off;
651 return(1);
652 }
653 if (ssz == -1) {
070c62a6
FF
654 curp->file_status = MANDOCLEVEL_SYSERR;
655 if (curp->mmsg)
656 (*curp->mmsg)(MANDOCERR_SYSREAD,
657 curp->file_status, file, 0, 0,
658 strerror(errno));
60e1e752
SW
659 break;
660 }
661 off += (size_t)ssz;
662 }
663
664 free(fb->buf);
665 fb->buf = NULL;
666 return(0);
667}
668
669static void
670mparse_end(struct mparse *curp)
671{
672
673 if (MANDOCLEVEL_FATAL <= curp->file_status)
674 return;
675
070c62a6
FF
676 if (curp->mdoc == NULL &&
677 curp->man == NULL &&
678 curp->sodest == NULL) {
679 if (curp->options & MPARSE_MDOC)
680 curp->mdoc = curp->pmdoc;
681 else {
682 if (curp->pman == NULL)
683 curp->pman = man_alloc(curp->roff, curp,
684 curp->options & MPARSE_QUICK ? 1 : 0);
685 curp->man = curp->pman;
686 }
687 }
688
60e1e752
SW
689 if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
690 assert(MANDOCLEVEL_FATAL <= curp->file_status);
691 return;
692 }
693
694 if (curp->man && ! man_endparse(curp->man)) {
695 assert(MANDOCLEVEL_FATAL <= curp->file_status);
696 return;
697 }
698
60e1e752
SW
699 roff_endparse(curp->roff);
700}
701
702static void
f88b6c16 703mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
60e1e752
SW
704{
705 const char *svfile;
f88b6c16
FF
706 static int recursion_depth;
707
708 if (64 < recursion_depth) {
709 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
710 return;
711 }
60e1e752 712
36342e81
SW
713 /* Line number is per-file. */
714 svfile = curp->file;
715 curp->file = file;
716 curp->line = 1;
f88b6c16 717 recursion_depth++;
36342e81
SW
718
719 mparse_buf_r(curp, blk, 1);
720
f88b6c16 721 if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
36342e81
SW
722 mparse_end(curp);
723
724 curp->file = svfile;
725}
726
727enum mandoclevel
728mparse_readmem(struct mparse *curp, const void *buf, size_t len,
729 const char *file)
730{
731 struct buf blk;
732
733 blk.buf = UNCONST(buf);
734 blk.sz = len;
735
f88b6c16 736 mparse_parse_buffer(curp, blk, file);
36342e81
SW
737 return(curp->file_status);
738}
739
f88b6c16
FF
740enum mandoclevel
741mparse_readfd(struct mparse *curp, int fd, const char *file)
36342e81
SW
742{
743 struct buf blk;
744 int with_mmap;
745
070c62a6
FF
746 if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) {
747 curp->file_status = MANDOCLEVEL_SYSERR;
748 if (curp->mmsg)
749 (*curp->mmsg)(MANDOCERR_SYSOPEN,
750 curp->file_status,
751 file, 0, 0, strerror(errno));
752 goto out;
753 }
754
36342e81
SW
755 /*
756 * Run for each opened file; may be called more than once for
757 * each full parse sequence if the opened file is nested (i.e.,
758 * from `so'). Simply sucks in the whole file and moves into
759 * the parse phase for the file.
760 */
60e1e752 761
070c62a6 762 if ( ! read_whole_file(curp, file, fd, &blk, &with_mmap))
f88b6c16 763 goto out;
60e1e752 764
f88b6c16 765 mparse_parse_buffer(curp, blk, file);
60e1e752 766
36342e81
SW
767#ifdef HAVE_MMAP
768 if (with_mmap)
769 munmap(blk.buf, blk.sz);
770 else
771#endif
772 free(blk.buf);
60e1e752
SW
773
774 if (STDIN_FILENO != fd && -1 == close(fd))
775 perror(file);
f88b6c16 776out:
60e1e752
SW
777 return(curp->file_status);
778}
779
780struct mparse *
070c62a6
FF
781mparse_alloc(int options, enum mandoclevel wlevel,
782 mandocmsg mmsg, const char *defos)
60e1e752
SW
783{
784 struct mparse *curp;
785
786 assert(wlevel <= MANDOCLEVEL_FATAL);
787
788 curp = mandoc_calloc(1, sizeof(struct mparse));
789
070c62a6 790 curp->options = options;
60e1e752
SW
791 curp->wlevel = wlevel;
792 curp->mmsg = mmsg;
f88b6c16 793 curp->defos = defos;
60e1e752 794
070c62a6
FF
795 curp->roff = roff_alloc(curp, options);
796 if (curp->options & MPARSE_MDOC)
797 curp->pmdoc = mdoc_alloc(
798 curp->roff, curp, curp->defos,
799 curp->options & MPARSE_QUICK ? 1 : 0);
800 if (curp->options & MPARSE_MAN)
801 curp->pman = man_alloc(curp->roff, curp,
802 curp->options & MPARSE_QUICK ? 1 : 0);
803
60e1e752
SW
804 return(curp);
805}
806
807void
808mparse_reset(struct mparse *curp)
809{
810
60e1e752
SW
811 roff_reset(curp->roff);
812
813 if (curp->mdoc)
814 mdoc_reset(curp->mdoc);
815 if (curp->man)
816 man_reset(curp->man);
36342e81
SW
817 if (curp->secondary)
818 curp->secondary->sz = 0;
60e1e752
SW
819
820 curp->file_status = MANDOCLEVEL_OK;
821 curp->mdoc = NULL;
822 curp->man = NULL;
070c62a6
FF
823
824 free(curp->sodest);
825 curp->sodest = NULL;
60e1e752
SW
826}
827
828void
829mparse_free(struct mparse *curp)
830{
831
832 if (curp->pmdoc)
833 mdoc_free(curp->pmdoc);
834 if (curp->pman)
835 man_free(curp->pman);
836 if (curp->roff)
837 roff_free(curp->roff);
36342e81
SW
838 if (curp->secondary)
839 free(curp->secondary->buf);
60e1e752 840
36342e81 841 free(curp->secondary);
070c62a6 842 free(curp->sodest);
60e1e752
SW
843 free(curp);
844}
845
846void
070c62a6
FF
847mparse_result(struct mparse *curp,
848 struct mdoc **mdoc, struct man **man, char **sodest)
60e1e752
SW
849{
850
070c62a6
FF
851 if (sodest && NULL != (*sodest = curp->sodest)) {
852 *mdoc = NULL;
853 *man = NULL;
854 return;
855 }
60e1e752
SW
856 if (mdoc)
857 *mdoc = curp->mdoc;
858 if (man)
859 *man = curp->man;
860}
861
862void
863mandoc_vmsg(enum mandocerr t, struct mparse *m,
864 int ln, int pos, const char *fmt, ...)
865{
866 char buf[256];
867 va_list ap;
868
869 va_start(ap, fmt);
070c62a6 870 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
60e1e752
SW
871 va_end(ap);
872
873 mandoc_msg(t, m, ln, pos, buf);
874}
875
876void
070c62a6 877mandoc_msg(enum mandocerr er, struct mparse *m,
60e1e752
SW
878 int ln, int col, const char *msg)
879{
880 enum mandoclevel level;
881
882 level = MANDOCLEVEL_FATAL;
883 while (er < mandoclimits[level])
884 level--;
885
886 if (level < m->wlevel)
887 return;
888
889 if (m->mmsg)
890 (*m->mmsg)(er, level, m->file, ln, col, msg);
891
892 if (m->file_status < level)
893 m->file_status = level;
894}
895
896const char *
897mparse_strerror(enum mandocerr er)
898{
899
900 return(mandocerrs[er]);
901}
902
903const char *
904mparse_strlevel(enum mandoclevel lvl)
905{
906 return(mandoclevels[lvl]);
907}
36342e81
SW
908
909void
910mparse_keep(struct mparse *p)
911{
912
913 assert(NULL == p->secondary);
914 p->secondary = mandoc_calloc(1, sizeof(struct buf));
915}
916
917const char *
918mparse_getkeep(const struct mparse *p)
919{
920
921 assert(p->secondary);
922 return(p->secondary->sz ? p->secondary->buf : NULL);
923}