1 /* $Id: mdoc_argv.c,v 1.77 2011/05/12 23:44:01 kristaps Exp $ */
3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include <sys/types.h>
32 #include "libmandoc.h"
34 #define MULTI_STEP 5 /* pre-allocate argument values */
35 #define DELIMSZ 6 /* max possible size of a delimiter */
39 ARGSFL_DELIM, /* handle delimiters of [[::delim::][ ]+]+ */
40 ARGSFL_TABSEP /* handle tab/`Ta' separated phrases */
44 ARGV_NONE, /* no args to flag (e.g., -split) */
45 ARGV_SINGLE, /* one arg to flag (e.g., -file xxx) */
46 ARGV_MULTI, /* multiple args (e.g., -column xxx yyy) */
47 ARGV_OPT_SINGLE /* optional arg (e.g., -offset [xxx]) */
50 static enum mdocargt argv_a2arg(enum mdoct, const char *);
51 static enum margserr args(struct mdoc *, int, int *,
52 char *, enum argsflag, char **);
53 static int args_checkpunct(const char *, int);
54 static int argv(struct mdoc *, int,
55 struct mdoc_argv *, int *, char *);
56 static int argv_single(struct mdoc *, int,
57 struct mdoc_argv *, int *, char *);
58 static int argv_opt_single(struct mdoc *, int,
59 struct mdoc_argv *, int *, char *);
60 static int argv_multi(struct mdoc *, int,
61 struct mdoc_argv *, int *, char *);
62 static void argn_free(struct mdoc_arg *, int);
64 static const enum argvflag argvflags[MDOC_ARG_MAX] = {
65 ARGV_NONE, /* MDOC_Split */
66 ARGV_NONE, /* MDOC_Nosplit */
67 ARGV_NONE, /* MDOC_Ragged */
68 ARGV_NONE, /* MDOC_Unfilled */
69 ARGV_NONE, /* MDOC_Literal */
70 ARGV_SINGLE, /* MDOC_File */
71 ARGV_OPT_SINGLE, /* MDOC_Offset */
72 ARGV_NONE, /* MDOC_Bullet */
73 ARGV_NONE, /* MDOC_Dash */
74 ARGV_NONE, /* MDOC_Hyphen */
75 ARGV_NONE, /* MDOC_Item */
76 ARGV_NONE, /* MDOC_Enum */
77 ARGV_NONE, /* MDOC_Tag */
78 ARGV_NONE, /* MDOC_Diag */
79 ARGV_NONE, /* MDOC_Hang */
80 ARGV_NONE, /* MDOC_Ohang */
81 ARGV_NONE, /* MDOC_Inset */
82 ARGV_MULTI, /* MDOC_Column */
83 ARGV_SINGLE, /* MDOC_Width */
84 ARGV_NONE, /* MDOC_Compact */
85 ARGV_NONE, /* MDOC_Std */
86 ARGV_NONE, /* MDOC_Filled */
87 ARGV_NONE, /* MDOC_Words */
88 ARGV_NONE, /* MDOC_Emphasis */
89 ARGV_NONE, /* MDOC_Symbolic */
90 ARGV_NONE /* MDOC_Symbolic */
93 static const enum argsflag argflags[MDOC_MAX] = {
100 ARGSFL_NONE, /* Pp */
101 ARGSFL_DELIM, /* D1 */
102 ARGSFL_DELIM, /* Dl */
103 ARGSFL_NONE, /* Bd */
104 ARGSFL_NONE, /* Ed */
105 ARGSFL_NONE, /* Bl */
106 ARGSFL_NONE, /* El */
107 ARGSFL_NONE, /* It */
108 ARGSFL_DELIM, /* Ad */
109 ARGSFL_DELIM, /* An */
110 ARGSFL_DELIM, /* Ar */
111 ARGSFL_NONE, /* Cd */
112 ARGSFL_DELIM, /* Cm */
113 ARGSFL_DELIM, /* Dv */
114 ARGSFL_DELIM, /* Er */
115 ARGSFL_DELIM, /* Ev */
116 ARGSFL_NONE, /* Ex */
117 ARGSFL_DELIM, /* Fa */
118 ARGSFL_NONE, /* Fd */
119 ARGSFL_DELIM, /* Fl */
120 ARGSFL_DELIM, /* Fn */
121 ARGSFL_DELIM, /* Ft */
122 ARGSFL_DELIM, /* Ic */
123 ARGSFL_NONE, /* In */
124 ARGSFL_DELIM, /* Li */
125 ARGSFL_NONE, /* Nd */
126 ARGSFL_DELIM, /* Nm */
127 ARGSFL_DELIM, /* Op */
128 ARGSFL_NONE, /* Ot */
129 ARGSFL_DELIM, /* Pa */
130 ARGSFL_NONE, /* Rv */
131 ARGSFL_DELIM, /* St */
132 ARGSFL_DELIM, /* Va */
133 ARGSFL_DELIM, /* Vt */
134 ARGSFL_DELIM, /* Xr */
135 ARGSFL_NONE, /* %A */
136 ARGSFL_NONE, /* %B */
137 ARGSFL_NONE, /* %D */
138 ARGSFL_NONE, /* %I */
139 ARGSFL_NONE, /* %J */
140 ARGSFL_NONE, /* %N */
141 ARGSFL_NONE, /* %O */
142 ARGSFL_NONE, /* %P */
143 ARGSFL_NONE, /* %R */
144 ARGSFL_NONE, /* %T */
145 ARGSFL_NONE, /* %V */
146 ARGSFL_DELIM, /* Ac */
147 ARGSFL_NONE, /* Ao */
148 ARGSFL_DELIM, /* Aq */
149 ARGSFL_DELIM, /* At */
150 ARGSFL_DELIM, /* Bc */
151 ARGSFL_NONE, /* Bf */
152 ARGSFL_NONE, /* Bo */
153 ARGSFL_DELIM, /* Bq */
154 ARGSFL_DELIM, /* Bsx */
155 ARGSFL_DELIM, /* Bx */
156 ARGSFL_NONE, /* Db */
157 ARGSFL_DELIM, /* Dc */
158 ARGSFL_NONE, /* Do */
159 ARGSFL_DELIM, /* Dq */
160 ARGSFL_DELIM, /* Ec */
161 ARGSFL_NONE, /* Ef */
162 ARGSFL_DELIM, /* Em */
163 ARGSFL_NONE, /* Eo */
164 ARGSFL_DELIM, /* Fx */
165 ARGSFL_DELIM, /* Ms */
166 ARGSFL_DELIM, /* No */
167 ARGSFL_DELIM, /* Ns */
168 ARGSFL_DELIM, /* Nx */
169 ARGSFL_DELIM, /* Ox */
170 ARGSFL_DELIM, /* Pc */
171 ARGSFL_DELIM, /* Pf */
172 ARGSFL_NONE, /* Po */
173 ARGSFL_DELIM, /* Pq */
174 ARGSFL_DELIM, /* Qc */
175 ARGSFL_DELIM, /* Ql */
176 ARGSFL_NONE, /* Qo */
177 ARGSFL_DELIM, /* Qq */
178 ARGSFL_NONE, /* Re */
179 ARGSFL_NONE, /* Rs */
180 ARGSFL_DELIM, /* Sc */
181 ARGSFL_NONE, /* So */
182 ARGSFL_DELIM, /* Sq */
183 ARGSFL_NONE, /* Sm */
184 ARGSFL_DELIM, /* Sx */
185 ARGSFL_DELIM, /* Sy */
186 ARGSFL_DELIM, /* Tn */
187 ARGSFL_DELIM, /* Ux */
188 ARGSFL_DELIM, /* Xc */
189 ARGSFL_NONE, /* Xo */
190 ARGSFL_NONE, /* Fo */
191 ARGSFL_NONE, /* Fc */
192 ARGSFL_NONE, /* Oo */
193 ARGSFL_DELIM, /* Oc */
194 ARGSFL_NONE, /* Bk */
195 ARGSFL_NONE, /* Ek */
196 ARGSFL_NONE, /* Bt */
197 ARGSFL_NONE, /* Hf */
198 ARGSFL_NONE, /* Fr */
199 ARGSFL_NONE, /* Ud */
200 ARGSFL_NONE, /* Lb */
201 ARGSFL_NONE, /* Lp */
202 ARGSFL_DELIM, /* Lk */
203 ARGSFL_DELIM, /* Mt */
204 ARGSFL_DELIM, /* Brq */
205 ARGSFL_NONE, /* Bro */
206 ARGSFL_DELIM, /* Brc */
207 ARGSFL_NONE, /* %C */
208 ARGSFL_NONE, /* Es */
209 ARGSFL_NONE, /* En */
210 ARGSFL_NONE, /* Dx */
211 ARGSFL_NONE, /* %Q */
212 ARGSFL_NONE, /* br */
213 ARGSFL_NONE, /* sp */
214 ARGSFL_NONE, /* %U */
215 ARGSFL_NONE, /* Ta */
218 static const enum mdocargt args_Ex[] = {
223 static const enum mdocargt args_An[] = {
229 static const enum mdocargt args_Bd[] = {
241 static const enum mdocargt args_Bf[] = {
248 static const enum mdocargt args_Bk[] = {
253 static const enum mdocargt args_Bl[] = {
273 * Parse an argument from line text. This comes in the form of -key
274 * [value0...], which may either have a single mandatory value, at least
275 * one mandatory value, an optional single value, or no value.
278 mdoc_argv(struct mdoc *m, int line, enum mdoct tok,
279 struct mdoc_arg **v, int *pos, char *buf)
282 struct mdoc_argv tmp;
283 struct mdoc_arg *arg;
285 if ('\0' == buf[*pos])
288 assert(' ' != buf[*pos]);
290 /* Parse through to the first unescaped space. */
298 if (' ' == buf[*pos])
299 if ('\\' != buf[*pos - 1])
304 /* XXX - save zeroed byte, if not an argument. */
309 buf[(*pos)++] = '\0';
312 memset(&tmp, 0, sizeof(struct mdoc_argv));
316 /* See if our token accepts the argument. */
318 if (MDOC_ARG_MAX == (tmp.arg = argv_a2arg(tok, p))) {
319 /* XXX - restore saved zeroed byte. */
325 while (buf[*pos] && ' ' == buf[*pos])
328 if ( ! argv(m, line, &tmp, pos, buf))
331 if (NULL == (arg = *v))
332 arg = *v = mandoc_calloc(1, sizeof(struct mdoc_arg));
335 arg->argv = mandoc_realloc
336 (arg->argv, arg->argc * sizeof(struct mdoc_argv));
338 memcpy(&arg->argv[(int)arg->argc - 1],
339 &tmp, sizeof(struct mdoc_argv));
345 mdoc_argv_free(struct mdoc_arg *p)
359 for (i = (int)p->argc - 1; i >= 0; i--)
367 argn_free(struct mdoc_arg *p, int iarg)
369 struct mdoc_argv *arg;
372 arg = &p->argv[iarg];
374 if (arg->sz && arg->value) {
375 for (j = (int)arg->sz - 1; j >= 0; j--)
380 for (--p->argc; iarg < (int)p->argc; iarg++)
381 p->argv[iarg] = p->argv[iarg+1];
385 mdoc_zargs(struct mdoc *m, int line, int *pos, char *buf, char **v)
388 return(args(m, line, pos, buf, ARGSFL_NONE, v));
392 mdoc_args(struct mdoc *m, int line, int *pos,
393 char *buf, enum mdoct tok, char **v)
401 return(args(m, line, pos, buf, fl, v));
404 * We know that we're in an `It', so it's reasonable to expect
405 * us to be sitting in a `Bl'. Someday this may not be the case
406 * (if we allow random `It's sitting out there), so provide a
407 * safe fall-back into the default behaviour.
410 for (n = m->last; n; n = n->parent)
411 if (MDOC_Bl == n->tok)
412 if (LIST_column == n->norm->Bl.type) {
417 return(args(m, line, pos, buf, fl, v));
421 args(struct mdoc *m, int line, int *pos,
422 char *buf, enum argsflag fl, char **v)
427 assert(' ' != buf[*pos]);
429 if ('\0' == buf[*pos]) {
430 if (MDOC_PPHRASE & m->flags)
433 * If we're not in a partial phrase and the flag for
434 * being a phrase literal is still set, the punctuation
437 if (MDOC_PHRASELIT & m->flags)
438 mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE);
440 m->flags &= ~MDOC_PHRASELIT;
446 if (ARGSFL_DELIM == fl)
447 if (args_checkpunct(buf, *pos))
451 * First handle TABSEP items, restricted to `Bl -column'. This
452 * ignores conventional token parsing and instead uses tabs or
453 * `Ta' macros to separate phrases. Phrases are parsed again
454 * for arguments at a later phase.
457 if (ARGSFL_TABSEP == fl) {
458 /* Scan ahead to tab (can't be escaped). */
459 p = strchr(*v, '\t');
462 /* Scan ahead to unescaped `Ta'. */
463 if ( ! (MDOC_PHRASELIT & m->flags))
464 for (pp = *v; ; pp++) {
465 if (NULL == (pp = strstr(pp, "Ta")))
467 if (pp > *v && ' ' != *(pp - 1))
469 if (' ' == *(pp + 2) || '\0' == *(pp + 2))
473 /* By default, assume a phrase. */
477 * Adjust new-buffer position to be beyond delimiter
478 * mark (e.g., Ta -> end + 2).
481 *pos += pp < p ? 2 : 1;
482 rc = pp < p ? ARGS_PHRASE : ARGS_PPHRASE;
484 } else if (p && ! pp) {
487 } else if (pp && ! p) {
495 /* Whitespace check for eoln case... */
496 if ('\0' == *p && ' ' == *(p - 1))
497 mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE);
499 *pos += (int)(p - *v);
501 /* Strip delimiter's preceding whitespace. */
503 while (pp > *v && ' ' == *pp) {
504 if (pp > *v && '\\' == *(pp - 1))
510 /* Strip delimiter's proceeding whitespace. */
511 for (pp = &buf[*pos]; ' ' == *pp; pp++, (*pos)++)
518 * Process a quoted literal. A quote begins with a double-quote
519 * and ends with a double-quote NOT preceded by a double-quote.
520 * Whitespace is NOT involved in literal termination.
523 if (MDOC_PHRASELIT & m->flags || '\"' == buf[*pos]) {
524 if ( ! (MDOC_PHRASELIT & m->flags))
527 if (MDOC_PPHRASE & m->flags)
528 m->flags |= MDOC_PHRASELIT;
530 for ( ; buf[*pos]; (*pos)++) {
531 if ('\"' != buf[*pos])
533 if ('\"' != buf[*pos + 1])
538 if ('\0' == buf[*pos]) {
539 if (MDOC_PPHRASE & m->flags)
541 mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE);
545 m->flags &= ~MDOC_PHRASELIT;
546 buf[(*pos)++] = '\0';
548 if ('\0' == buf[*pos])
551 while (' ' == buf[*pos])
554 if ('\0' == buf[*pos])
555 mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE);
561 *v = mandoc_getarg(m->parse, &p, line, pos);
567 * Check if the string consists only of space-separated closing
568 * delimiters. This is a bit of a dance: the first must be a close
569 * delimiter, but it may be followed by middle delimiters. Arbitrary
570 * whitespace may separate these tokens.
573 args_checkpunct(const char *buf, int i)
579 /* First token must be a close-delimiter. */
581 for (j = 0; buf[i] && ' ' != buf[i] && j < DELIMSZ; j++, i++)
588 if (DELIM_CLOSE != mdoc_isdelim(dbuf))
591 while (' ' == buf[i])
594 /* Remaining must NOT be open/none. */
598 while (buf[i] && ' ' != buf[i] && j < DELIMSZ)
599 dbuf[j++] = buf[i++];
605 d = mdoc_isdelim(dbuf);
606 if (DELIM_NONE == d || DELIM_OPEN == d)
609 while (' ' == buf[i])
613 return('\0' == buf[i]);
617 * Match up an argument string (e.g., `-foo bar' having "foo") with the
618 * correrct identifier. It must apply to the given macro. If none was
619 * found (including bad matches), return MDOC_ARG_MAX.
622 argv_a2arg(enum mdoct tok, const char *p)
624 const enum mdocargt *argsp;
650 return(MDOC_ARG_MAX);
655 for ( ; MDOC_ARG_MAX != *argsp ; argsp++)
656 if (0 == strcmp(p, mdoc_argnames[*argsp]))
659 return(MDOC_ARG_MAX);
663 argv_multi(struct mdoc *m, int line,
664 struct mdoc_argv *v, int *pos, char *buf)
669 for (v->sz = 0; ; v->sz++) {
670 if ('-' == buf[*pos])
672 ac = args(m, line, pos, buf, ARGSFL_NONE, &p);
673 if (ARGS_ERROR == ac)
675 else if (ARGS_EOLN == ac)
678 if (0 == v->sz % MULTI_STEP)
679 v->value = mandoc_realloc(v->value,
680 (v->sz + MULTI_STEP) * sizeof(char *));
682 v->value[(int)v->sz] = mandoc_strdup(p);
689 argv_opt_single(struct mdoc *m, int line,
690 struct mdoc_argv *v, int *pos, char *buf)
695 if ('-' == buf[*pos])
698 ac = args(m, line, pos, buf, ARGSFL_NONE, &p);
699 if (ARGS_ERROR == ac)
705 v->value = mandoc_malloc(sizeof(char *));
706 v->value[0] = mandoc_strdup(p);
712 * Parse a single, mandatory value from the stream.
715 argv_single(struct mdoc *m, int line,
716 struct mdoc_argv *v, int *pos, char *buf)
724 ac = args(m, line, pos, buf, ARGSFL_NONE, &p);
725 if (ARGS_EOLN == ac) {
726 mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTARGVCOUNT);
728 } else if (ARGS_ERROR == ac)
732 v->value = mandoc_malloc(sizeof(char *));
733 v->value[0] = mandoc_strdup(p);
739 * Determine rules for parsing arguments. Arguments can either accept
740 * no parameters, an optional single parameter, one parameter, or
741 * multiple parameters.
744 argv(struct mdoc *mdoc, int line,
745 struct mdoc_argv *v, int *pos, char *buf)
751 switch (argvflags[v->arg]) {
753 return(argv_single(mdoc, line, v, pos, buf));
755 return(argv_multi(mdoc, line, v, pos, buf));
756 case (ARGV_OPT_SINGLE):
757 return(argv_opt_single(mdoc, line, v, pos, buf));