1 /* $Id: mdoc_argv.c,v 1.62 2010/12/24 14:00:40 kristaps Exp $ */
3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include <sys/types.h>
31 #include "libmandoc.h"
34 * Routines to parse arguments of macros. Arguments follow the syntax
35 * of `-arg [val [valN...]]'. Arguments come in all types: quoted
36 * arguments, multiple arguments per value, no-value arguments, etc.
38 * There's no limit to the number or arguments that may be allocated.
41 #define ARGV_NONE (1 << 0)
42 #define ARGV_SINGLE (1 << 1)
43 #define ARGV_MULTI (1 << 2)
44 #define ARGV_OPT_SINGLE (1 << 3)
48 static enum mdocargt argv_a2arg(enum mdoct, const char *);
49 static enum margserr args(struct mdoc *, int, int *,
50 char *, int, char **);
51 static int argv(struct mdoc *, int,
52 struct mdoc_argv *, int *, char *);
53 static int argv_single(struct mdoc *, int,
54 struct mdoc_argv *, int *, char *);
55 static int argv_opt_single(struct mdoc *, int,
56 struct mdoc_argv *, int *, char *);
57 static int argv_multi(struct mdoc *, int,
58 struct mdoc_argv *, int *, char *);
60 /* Per-argument flags. */
62 static int mdoc_argvflags[MDOC_ARG_MAX] = {
63 ARGV_NONE, /* MDOC_Split */
64 ARGV_NONE, /* MDOC_Nosplit */
65 ARGV_NONE, /* MDOC_Ragged */
66 ARGV_NONE, /* MDOC_Unfilled */
67 ARGV_NONE, /* MDOC_Literal */
68 ARGV_SINGLE, /* MDOC_File */
69 ARGV_OPT_SINGLE, /* MDOC_Offset */
70 ARGV_NONE, /* MDOC_Bullet */
71 ARGV_NONE, /* MDOC_Dash */
72 ARGV_NONE, /* MDOC_Hyphen */
73 ARGV_NONE, /* MDOC_Item */
74 ARGV_NONE, /* MDOC_Enum */
75 ARGV_NONE, /* MDOC_Tag */
76 ARGV_NONE, /* MDOC_Diag */
77 ARGV_NONE, /* MDOC_Hang */
78 ARGV_NONE, /* MDOC_Ohang */
79 ARGV_NONE, /* MDOC_Inset */
80 ARGV_MULTI, /* MDOC_Column */
81 ARGV_SINGLE, /* MDOC_Width */
82 ARGV_NONE, /* MDOC_Compact */
83 ARGV_NONE, /* MDOC_Std */
84 ARGV_NONE, /* MDOC_Filled */
85 ARGV_NONE, /* MDOC_Words */
86 ARGV_NONE, /* MDOC_Emphasis */
87 ARGV_NONE, /* MDOC_Symbolic */
88 ARGV_NONE /* MDOC_Symbolic */
91 static int mdoc_argflags[MDOC_MAX] = {
152 ARGS_DELIM, /* Bsx */
202 ARGS_DELIM, /* Brq */
204 ARGS_DELIM, /* Brc */
218 * Parse an argument from line text. This comes in the form of -key
219 * [value0...], which may either have a single mandatory value, at least
220 * one mandatory value, an optional single value, or no value.
223 mdoc_argv(struct mdoc *m, int line, enum mdoct tok,
224 struct mdoc_arg **v, int *pos, char *buf)
227 struct mdoc_argv tmp;
228 struct mdoc_arg *arg;
230 if ('\0' == buf[*pos])
233 assert(' ' != buf[*pos]);
235 /* Parse through to the first unescaped space. */
243 if (' ' == buf[*pos])
244 if ('\\' != buf[*pos - 1])
249 /* XXX - save zeroed byte, if not an argument. */
254 buf[(*pos)++] = '\0';
257 (void)memset(&tmp, 0, sizeof(struct mdoc_argv));
261 /* See if our token accepts the argument. */
263 if (MDOC_ARG_MAX == (tmp.arg = argv_a2arg(tok, p))) {
264 /* XXX - restore saved zeroed byte. */
270 while (buf[*pos] && ' ' == buf[*pos])
273 if ( ! argv(m, line, &tmp, pos, buf))
276 if (NULL == (arg = *v))
277 arg = *v = mandoc_calloc(1, sizeof(struct mdoc_arg));
280 arg->argv = mandoc_realloc
281 (arg->argv, arg->argc * sizeof(struct mdoc_argv));
283 (void)memcpy(&arg->argv[(int)arg->argc - 1],
284 &tmp, sizeof(struct mdoc_argv));
291 mdoc_argv_free(struct mdoc_arg *p)
305 for (i = (int)p->argc - 1; i >= 0; i--)
306 mdoc_argn_free(p, i);
314 mdoc_argn_free(struct mdoc_arg *p, int iarg)
316 struct mdoc_argv *arg;
319 arg = &p->argv[iarg];
321 if (arg->sz && arg->value) {
322 for (j = (int)arg->sz - 1; j >= 0; j--)
327 for (--p->argc; iarg < (int)p->argc; iarg++)
328 p->argv[iarg] = p->argv[iarg+1];
333 mdoc_zargs(struct mdoc *m, int line, int *pos,
334 char *buf, int flags, char **v)
337 return(args(m, line, pos, buf, flags, v));
342 mdoc_args(struct mdoc *m, int line, int *pos,
343 char *buf, enum mdoct tok, char **v)
348 fl = mdoc_argflags[tok];
351 return(args(m, line, pos, buf, fl, v));
354 * We know that we're in an `It', so it's reasonable to expect
355 * us to be sitting in a `Bl'. Someday this may not be the case
356 * (if we allow random `It's sitting out there), so provide a
357 * safe fall-back into the default behaviour.
360 for (n = m->last; n; n = n->parent)
361 if (MDOC_Bl == n->tok)
364 if (n && LIST_column == n->norm->Bl.type) {
369 return(args(m, line, pos, buf, fl, v));
374 args(struct mdoc *m, int line, int *pos,
375 char *buf, int fl, char **v)
383 * Parse out the terms (like `val' in `.Xx -arg val' or simply
384 * `.Xx val'), which can have all sorts of properties:
386 * ARGS_DELIM: use special handling if encountering trailing
387 * delimiters in the form of [[::delim::][ ]+]+.
389 * ARGS_NOWARN: don't post warnings. This is only used when
390 * re-parsing delimiters, as the warnings have already been
393 * ARGS_TABSEP: use special handling for tab/`Ta' separated
394 * phrases like in `Bl -column'.
397 assert(' ' != buf[*pos]);
399 if ('\0' == buf[*pos]) {
400 if (MDOC_PPHRASE & m->flags)
403 * If we're not in a partial phrase and the flag for
404 * being a phrase literal is still set, the punctuation
407 if (MDOC_PHRASELIT & m->flags)
408 if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE))
411 m->flags &= ~MDOC_PHRASELIT;
416 * If the first character is a closing delimiter and we're to
417 * look for delimited strings, then pass down the buffer seeing
418 * if it follows the pattern of [[::delim::][ ]+]+. Note that
419 * we ONLY care about closing delimiters.
422 if ((fl & ARGS_DELIM) && DELIM_CLOSE == mdoc_iscdelim(buf[*pos])) {
423 for (i = *pos; buf[i]; ) {
424 d = mdoc_iscdelim(buf[i]);
425 if (DELIM_NONE == d || DELIM_OPEN == d)
428 if ('\0' == buf[i] || ' ' != buf[i])
431 while (buf[i] && ' ' == buf[i])
435 if ('\0' == buf[i]) {
437 if (i && ' ' != buf[i - 1])
439 if (ARGS_NOWARN & fl)
441 if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE))
450 * First handle TABSEP items, restricted to `Bl -column'. This
451 * ignores conventional token parsing and instead uses tabs or
452 * `Ta' macros to separate phrases. Phrases are parsed again
453 * for arguments at a later phase.
456 if (ARGS_TABSEP & fl) {
457 /* Scan ahead to tab (can't be escaped). */
458 p = strchr(*v, '\t');
461 /* Scan ahead to unescaped `Ta'. */
462 if ( ! (MDOC_PHRASELIT & m->flags))
463 for (pp = *v; ; pp++) {
464 if (NULL == (pp = strstr(pp, "Ta")))
466 if (pp > *v && ' ' != *(pp - 1))
468 if (' ' == *(pp + 2) || '\0' == *(pp + 2))
472 /* By default, assume a phrase. */
476 * Adjust new-buffer position to be beyond delimiter
477 * mark (e.g., Ta -> end + 2).
480 *pos += pp < p ? 2 : 1;
481 rc = pp < p ? ARGS_PHRASE : ARGS_PPHRASE;
483 } else if (p && ! pp) {
486 } else if (pp && ! p) {
494 /* Whitespace check for eoln case... */
495 if ('\0' == *p && ' ' == *(p - 1) && ! (ARGS_NOWARN & fl))
496 if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE))
499 *pos += (int)(p - *v);
501 /* Strip delimiter's preceding whitespace. */
503 while (pp > *v && ' ' == *pp) {
504 if (pp > *v && '\\' == *(pp - 1))
510 /* Strip delimiter's proceeding whitespace. */
511 for (pp = &buf[*pos]; ' ' == *pp; pp++, (*pos)++)
518 * Process a quoted literal. A quote begins with a double-quote
519 * and ends with a double-quote NOT preceded by a double-quote.
520 * Whitespace is NOT involved in literal termination.
523 if (MDOC_PHRASELIT & m->flags || '\"' == buf[*pos]) {
524 if ( ! (MDOC_PHRASELIT & m->flags))
527 if (MDOC_PPHRASE & m->flags)
528 m->flags |= MDOC_PHRASELIT;
530 for ( ; buf[*pos]; (*pos)++) {
531 if ('\"' != buf[*pos])
533 if ('\"' != buf[*pos + 1])
538 if ('\0' == buf[*pos]) {
539 if (ARGS_NOWARN & fl || MDOC_PPHRASE & m->flags)
541 if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE))
546 m->flags &= ~MDOC_PHRASELIT;
547 buf[(*pos)++] = '\0';
549 if ('\0' == buf[*pos])
552 while (' ' == buf[*pos])
555 if (0 == buf[*pos] && ! (ARGS_NOWARN & fl))
556 if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE))
563 * A non-quoted term progresses until either the end of line or
564 * a non-escaped whitespace.
567 for ( ; buf[*pos]; (*pos)++)
568 if (*pos && ' ' == buf[*pos] && '\\' != buf[*pos - 1])
571 if ('\0' == buf[*pos])
574 buf[(*pos)++] = '\0';
576 while (' ' == buf[*pos])
579 if ('\0' == buf[*pos] && ! (ARGS_NOWARN & fl))
580 if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE))
588 argv_a2arg(enum mdoct tok, const char *p)
592 * Parse an argument identifier from its text. XXX - this
593 * should really be table-driven to clarify the code.
595 * If you add an argument to the list, make sure that you
596 * register it here with its one or more macros!
601 if (0 == strcmp(p, "split"))
603 else if (0 == strcmp(p, "nosplit"))
604 return(MDOC_Nosplit);
608 if (0 == strcmp(p, "ragged"))
610 else if (0 == strcmp(p, "unfilled"))
611 return(MDOC_Unfilled);
612 else if (0 == strcmp(p, "filled"))
614 else if (0 == strcmp(p, "literal"))
615 return(MDOC_Literal);
616 else if (0 == strcmp(p, "file"))
618 else if (0 == strcmp(p, "offset"))
620 else if (0 == strcmp(p, "compact"))
621 return(MDOC_Compact);
622 else if (0 == strcmp(p, "centered"))
623 return(MDOC_Centred);
627 if (0 == strcmp(p, "emphasis"))
628 return(MDOC_Emphasis);
629 else if (0 == strcmp(p, "literal"))
630 return(MDOC_Literal);
631 else if (0 == strcmp(p, "symbolic"))
632 return(MDOC_Symbolic);
636 if (0 == strcmp(p, "words"))
641 if (0 == strcmp(p, "bullet"))
643 else if (0 == strcmp(p, "dash"))
645 else if (0 == strcmp(p, "hyphen"))
647 else if (0 == strcmp(p, "item"))
649 else if (0 == strcmp(p, "enum"))
651 else if (0 == strcmp(p, "tag"))
653 else if (0 == strcmp(p, "diag"))
655 else if (0 == strcmp(p, "hang"))
657 else if (0 == strcmp(p, "ohang"))
659 else if (0 == strcmp(p, "inset"))
661 else if (0 == strcmp(p, "column"))
663 else if (0 == strcmp(p, "width"))
665 else if (0 == strcmp(p, "offset"))
667 else if (0 == strcmp(p, "compact"))
668 return(MDOC_Compact);
669 else if (0 == strcmp(p, "nested"))
676 if (0 == strcmp(p, "std"))
683 return(MDOC_ARG_MAX);
688 argv_multi(struct mdoc *m, int line,
689 struct mdoc_argv *v, int *pos, char *buf)
694 for (v->sz = 0; ; v->sz++) {
695 if ('-' == buf[*pos])
697 ac = args(m, line, pos, buf, 0, &p);
698 if (ARGS_ERROR == ac)
700 else if (ARGS_EOLN == ac)
703 if (0 == v->sz % MULTI_STEP)
704 v->value = mandoc_realloc(v->value,
705 (v->sz + MULTI_STEP) * sizeof(char *));
707 v->value[(int)v->sz] = mandoc_strdup(p);
715 argv_opt_single(struct mdoc *m, int line,
716 struct mdoc_argv *v, int *pos, char *buf)
721 if ('-' == buf[*pos])
724 ac = args(m, line, pos, buf, 0, &p);
725 if (ARGS_ERROR == ac)
731 v->value = mandoc_malloc(sizeof(char *));
732 v->value[0] = mandoc_strdup(p);
739 * Parse a single, mandatory value from the stream.
742 argv_single(struct mdoc *m, int line,
743 struct mdoc_argv *v, int *pos, char *buf)
751 ac = args(m, line, pos, buf, 0, &p);
752 if (ARGS_EOLN == ac) {
753 mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTARGVCOUNT);
755 } else if (ARGS_ERROR == ac)
759 v->value = mandoc_malloc(sizeof(char *));
760 v->value[0] = mandoc_strdup(p);
767 * Determine rules for parsing arguments. Arguments can either accept
768 * no parameters, an optional single parameter, one parameter, or
769 * multiple parameters.
772 argv(struct mdoc *mdoc, int line,
773 struct mdoc_argv *v, int *pos, char *buf)
779 switch (mdoc_argvflags[v->arg]) {
781 return(argv_single(mdoc, line, v, pos, buf));
783 return(argv_multi(mdoc, line, v, pos, buf));
784 case (ARGV_OPT_SINGLE):
785 return(argv_opt_single(mdoc, line, v, pos, buf));