Commit | Line | Data |
---|---|---|
070c62a6 | 1 | /* $Id: mdoc_argv.c,v 1.95 2014/07/06 19:09:00 schwarze Exp $ */ |
80387638 | 2 | /* |
36342e81 | 3 | * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> |
f88b6c16 | 4 | * Copyright (c) 2012 Ingo Schwarze <schwarze@openbsd.org> |
80387638 SW |
5 | * |
6 | * Permission to use, copy, modify, and distribute this software for any | |
7 | * purpose with or without fee is hereby granted, provided that the above | |
8 | * copyright notice and this permission notice appear in all copies. | |
9 | * | |
10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
17 | */ | |
18 | #ifdef HAVE_CONFIG_H | |
19 | #include "config.h" | |
20 | #endif | |
21 | ||
22 | #include <sys/types.h> | |
23 | ||
24 | #include <assert.h> | |
80387638 SW |
25 | #include <stdlib.h> |
26 | #include <stdio.h> | |
27 | #include <string.h> | |
28 | ||
60e1e752 | 29 | #include "mdoc.h" |
80387638 | 30 | #include "mandoc.h" |
070c62a6 | 31 | #include "mandoc_aux.h" |
80387638 SW |
32 | #include "libmdoc.h" |
33 | #include "libmandoc.h" | |
34 | ||
60e1e752 | 35 | #define MULTI_STEP 5 /* pre-allocate argument values */ |
070c62a6 | 36 | #define DELIMSZ 6 /* max possible size of a delimiter */ |
a4c7eb57 SW |
37 | |
38 | enum argsflag { | |
39 | ARGSFL_NONE = 0, | |
40 | ARGSFL_DELIM, /* handle delimiters of [[::delim::][ ]+]+ */ | |
41 | ARGSFL_TABSEP /* handle tab/`Ta' separated phrases */ | |
42 | }; | |
43 | ||
44 | enum argvflag { | |
45 | ARGV_NONE, /* no args to flag (e.g., -split) */ | |
46 | ARGV_SINGLE, /* one arg to flag (e.g., -file xxx) */ | |
f88b6c16 | 47 | ARGV_MULTI /* multiple args (e.g., -column xxx yyy) */ |
a4c7eb57 | 48 | }; |
80387638 | 49 | |
36342e81 SW |
50 | struct mdocarg { |
51 | enum argsflag flags; | |
52 | const enum mdocargt *argvs; | |
53 | }; | |
54 | ||
55 | static void argn_free(struct mdoc_arg *, int); | |
070c62a6 | 56 | static enum margserr args(struct mdoc *, int, int *, |
a4c7eb57 SW |
57 | char *, enum argsflag, char **); |
58 | static int args_checkpunct(const char *, int); | |
070c62a6 | 59 | static int argv_multi(struct mdoc *, int, |
80387638 | 60 | struct mdoc_argv *, int *, char *); |
070c62a6 | 61 | static int argv_single(struct mdoc *, int, |
80387638 SW |
62 | struct mdoc_argv *, int *, char *); |
63 | ||
60e1e752 | 64 | static const enum argvflag argvflags[MDOC_ARG_MAX] = { |
80387638 SW |
65 | ARGV_NONE, /* MDOC_Split */ |
66 | ARGV_NONE, /* MDOC_Nosplit */ | |
67 | ARGV_NONE, /* MDOC_Ragged */ | |
68 | ARGV_NONE, /* MDOC_Unfilled */ | |
69 | ARGV_NONE, /* MDOC_Literal */ | |
70 | ARGV_SINGLE, /* MDOC_File */ | |
f88b6c16 | 71 | ARGV_SINGLE, /* MDOC_Offset */ |
80387638 SW |
72 | ARGV_NONE, /* MDOC_Bullet */ |
73 | ARGV_NONE, /* MDOC_Dash */ | |
74 | ARGV_NONE, /* MDOC_Hyphen */ | |
75 | ARGV_NONE, /* MDOC_Item */ | |
76 | ARGV_NONE, /* MDOC_Enum */ | |
77 | ARGV_NONE, /* MDOC_Tag */ | |
78 | ARGV_NONE, /* MDOC_Diag */ | |
79 | ARGV_NONE, /* MDOC_Hang */ | |
80 | ARGV_NONE, /* MDOC_Ohang */ | |
81 | ARGV_NONE, /* MDOC_Inset */ | |
82 | ARGV_MULTI, /* MDOC_Column */ | |
f88b6c16 | 83 | ARGV_SINGLE, /* MDOC_Width */ |
80387638 SW |
84 | ARGV_NONE, /* MDOC_Compact */ |
85 | ARGV_NONE, /* MDOC_Std */ | |
86 | ARGV_NONE, /* MDOC_Filled */ | |
87 | ARGV_NONE, /* MDOC_Words */ | |
88 | ARGV_NONE, /* MDOC_Emphasis */ | |
89 | ARGV_NONE, /* MDOC_Symbolic */ | |
90 | ARGV_NONE /* MDOC_Symbolic */ | |
91 | }; | |
92 | ||
60e1e752 SW |
93 | static const enum mdocargt args_Ex[] = { |
94 | MDOC_Std, | |
95 | MDOC_ARG_MAX | |
96 | }; | |
97 | ||
98 | static const enum mdocargt args_An[] = { | |
99 | MDOC_Split, | |
100 | MDOC_Nosplit, | |
101 | MDOC_ARG_MAX | |
102 | }; | |
103 | ||
104 | static const enum mdocargt args_Bd[] = { | |
105 | MDOC_Ragged, | |
106 | MDOC_Unfilled, | |
107 | MDOC_Filled, | |
108 | MDOC_Literal, | |
109 | MDOC_File, | |
110 | MDOC_Offset, | |
111 | MDOC_Compact, | |
112 | MDOC_Centred, | |
113 | MDOC_ARG_MAX | |
114 | }; | |
115 | ||
116 | static const enum mdocargt args_Bf[] = { | |
117 | MDOC_Emphasis, | |
118 | MDOC_Literal, | |
119 | MDOC_Symbolic, | |
120 | MDOC_ARG_MAX | |
121 | }; | |
122 | ||
123 | static const enum mdocargt args_Bk[] = { | |
124 | MDOC_Words, | |
125 | MDOC_ARG_MAX | |
126 | }; | |
127 | ||
128 | static const enum mdocargt args_Bl[] = { | |
129 | MDOC_Bullet, | |
130 | MDOC_Dash, | |
131 | MDOC_Hyphen, | |
132 | MDOC_Item, | |
133 | MDOC_Enum, | |
134 | MDOC_Tag, | |
135 | MDOC_Diag, | |
136 | MDOC_Hang, | |
137 | MDOC_Ohang, | |
138 | MDOC_Inset, | |
139 | MDOC_Column, | |
140 | MDOC_Width, | |
141 | MDOC_Offset, | |
142 | MDOC_Compact, | |
143 | MDOC_Nested, | |
144 | MDOC_ARG_MAX | |
145 | }; | |
80387638 | 146 | |
36342e81 | 147 | static const struct mdocarg mdocargs[MDOC_MAX] = { |
f88b6c16 | 148 | { ARGSFL_DELIM, NULL }, /* Ap */ |
36342e81 SW |
149 | { ARGSFL_NONE, NULL }, /* Dd */ |
150 | { ARGSFL_NONE, NULL }, /* Dt */ | |
151 | { ARGSFL_NONE, NULL }, /* Os */ | |
152 | { ARGSFL_NONE, NULL }, /* Sh */ | |
070c62a6 FF |
153 | { ARGSFL_NONE, NULL }, /* Ss */ |
154 | { ARGSFL_NONE, NULL }, /* Pp */ | |
36342e81 SW |
155 | { ARGSFL_DELIM, NULL }, /* D1 */ |
156 | { ARGSFL_DELIM, NULL }, /* Dl */ | |
157 | { ARGSFL_NONE, args_Bd }, /* Bd */ | |
158 | { ARGSFL_NONE, NULL }, /* Ed */ | |
159 | { ARGSFL_NONE, args_Bl }, /* Bl */ | |
160 | { ARGSFL_NONE, NULL }, /* El */ | |
161 | { ARGSFL_NONE, NULL }, /* It */ | |
070c62a6 | 162 | { ARGSFL_DELIM, NULL }, /* Ad */ |
36342e81 SW |
163 | { ARGSFL_DELIM, args_An }, /* An */ |
164 | { ARGSFL_DELIM, NULL }, /* Ar */ | |
f88b6c16 | 165 | { ARGSFL_DELIM, NULL }, /* Cd */ |
36342e81 | 166 | { ARGSFL_DELIM, NULL }, /* Cm */ |
070c62a6 FF |
167 | { ARGSFL_DELIM, NULL }, /* Dv */ |
168 | { ARGSFL_DELIM, NULL }, /* Er */ | |
169 | { ARGSFL_DELIM, NULL }, /* Ev */ | |
36342e81 | 170 | { ARGSFL_NONE, args_Ex }, /* Ex */ |
070c62a6 FF |
171 | { ARGSFL_DELIM, NULL }, /* Fa */ |
172 | { ARGSFL_NONE, NULL }, /* Fd */ | |
36342e81 | 173 | { ARGSFL_DELIM, NULL }, /* Fl */ |
070c62a6 FF |
174 | { ARGSFL_DELIM, NULL }, /* Fn */ |
175 | { ARGSFL_DELIM, NULL }, /* Ft */ | |
176 | { ARGSFL_DELIM, NULL }, /* Ic */ | |
177 | { ARGSFL_DELIM, NULL }, /* In */ | |
36342e81 | 178 | { ARGSFL_DELIM, NULL }, /* Li */ |
070c62a6 FF |
179 | { ARGSFL_NONE, NULL }, /* Nd */ |
180 | { ARGSFL_DELIM, NULL }, /* Nm */ | |
36342e81 | 181 | { ARGSFL_DELIM, NULL }, /* Op */ |
070c62a6 | 182 | { ARGSFL_DELIM, NULL }, /* Ot */ |
36342e81 SW |
183 | { ARGSFL_DELIM, NULL }, /* Pa */ |
184 | { ARGSFL_NONE, args_Ex }, /* Rv */ | |
070c62a6 | 185 | { ARGSFL_DELIM, NULL }, /* St */ |
36342e81 | 186 | { ARGSFL_DELIM, NULL }, /* Va */ |
070c62a6 | 187 | { ARGSFL_DELIM, NULL }, /* Vt */ |
36342e81 SW |
188 | { ARGSFL_DELIM, NULL }, /* Xr */ |
189 | { ARGSFL_NONE, NULL }, /* %A */ | |
190 | { ARGSFL_NONE, NULL }, /* %B */ | |
191 | { ARGSFL_NONE, NULL }, /* %D */ | |
192 | { ARGSFL_NONE, NULL }, /* %I */ | |
193 | { ARGSFL_NONE, NULL }, /* %J */ | |
194 | { ARGSFL_NONE, NULL }, /* %N */ | |
195 | { ARGSFL_NONE, NULL }, /* %O */ | |
196 | { ARGSFL_NONE, NULL }, /* %P */ | |
197 | { ARGSFL_NONE, NULL }, /* %R */ | |
198 | { ARGSFL_NONE, NULL }, /* %T */ | |
199 | { ARGSFL_NONE, NULL }, /* %V */ | |
200 | { ARGSFL_DELIM, NULL }, /* Ac */ | |
201 | { ARGSFL_NONE, NULL }, /* Ao */ | |
202 | { ARGSFL_DELIM, NULL }, /* Aq */ | |
203 | { ARGSFL_DELIM, NULL }, /* At */ | |
204 | { ARGSFL_DELIM, NULL }, /* Bc */ | |
070c62a6 | 205 | { ARGSFL_NONE, args_Bf }, /* Bf */ |
36342e81 SW |
206 | { ARGSFL_NONE, NULL }, /* Bo */ |
207 | { ARGSFL_DELIM, NULL }, /* Bq */ | |
208 | { ARGSFL_DELIM, NULL }, /* Bsx */ | |
209 | { ARGSFL_DELIM, NULL }, /* Bx */ | |
210 | { ARGSFL_NONE, NULL }, /* Db */ | |
211 | { ARGSFL_DELIM, NULL }, /* Dc */ | |
212 | { ARGSFL_NONE, NULL }, /* Do */ | |
213 | { ARGSFL_DELIM, NULL }, /* Dq */ | |
214 | { ARGSFL_DELIM, NULL }, /* Ec */ | |
215 | { ARGSFL_NONE, NULL }, /* Ef */ | |
070c62a6 | 216 | { ARGSFL_DELIM, NULL }, /* Em */ |
36342e81 SW |
217 | { ARGSFL_NONE, NULL }, /* Eo */ |
218 | { ARGSFL_DELIM, NULL }, /* Fx */ | |
219 | { ARGSFL_DELIM, NULL }, /* Ms */ | |
220 | { ARGSFL_DELIM, NULL }, /* No */ | |
221 | { ARGSFL_DELIM, NULL }, /* Ns */ | |
222 | { ARGSFL_DELIM, NULL }, /* Nx */ | |
223 | { ARGSFL_DELIM, NULL }, /* Ox */ | |
224 | { ARGSFL_DELIM, NULL }, /* Pc */ | |
225 | { ARGSFL_DELIM, NULL }, /* Pf */ | |
226 | { ARGSFL_NONE, NULL }, /* Po */ | |
227 | { ARGSFL_DELIM, NULL }, /* Pq */ | |
228 | { ARGSFL_DELIM, NULL }, /* Qc */ | |
229 | { ARGSFL_DELIM, NULL }, /* Ql */ | |
230 | { ARGSFL_NONE, NULL }, /* Qo */ | |
231 | { ARGSFL_DELIM, NULL }, /* Qq */ | |
232 | { ARGSFL_NONE, NULL }, /* Re */ | |
233 | { ARGSFL_NONE, NULL }, /* Rs */ | |
234 | { ARGSFL_DELIM, NULL }, /* Sc */ | |
235 | { ARGSFL_NONE, NULL }, /* So */ | |
236 | { ARGSFL_DELIM, NULL }, /* Sq */ | |
237 | { ARGSFL_NONE, NULL }, /* Sm */ | |
238 | { ARGSFL_DELIM, NULL }, /* Sx */ | |
239 | { ARGSFL_DELIM, NULL }, /* Sy */ | |
240 | { ARGSFL_DELIM, NULL }, /* Tn */ | |
241 | { ARGSFL_DELIM, NULL }, /* Ux */ | |
242 | { ARGSFL_DELIM, NULL }, /* Xc */ | |
243 | { ARGSFL_NONE, NULL }, /* Xo */ | |
070c62a6 FF |
244 | { ARGSFL_NONE, NULL }, /* Fo */ |
245 | { ARGSFL_DELIM, NULL }, /* Fc */ | |
36342e81 SW |
246 | { ARGSFL_NONE, NULL }, /* Oo */ |
247 | { ARGSFL_DELIM, NULL }, /* Oc */ | |
248 | { ARGSFL_NONE, args_Bk }, /* Bk */ | |
249 | { ARGSFL_NONE, NULL }, /* Ek */ | |
250 | { ARGSFL_NONE, NULL }, /* Bt */ | |
251 | { ARGSFL_NONE, NULL }, /* Hf */ | |
070c62a6 | 252 | { ARGSFL_DELIM, NULL }, /* Fr */ |
36342e81 | 253 | { ARGSFL_NONE, NULL }, /* Ud */ |
f88b6c16 | 254 | { ARGSFL_DELIM, NULL }, /* Lb */ |
36342e81 SW |
255 | { ARGSFL_NONE, NULL }, /* Lp */ |
256 | { ARGSFL_DELIM, NULL }, /* Lk */ | |
257 | { ARGSFL_DELIM, NULL }, /* Mt */ | |
258 | { ARGSFL_DELIM, NULL }, /* Brq */ | |
259 | { ARGSFL_NONE, NULL }, /* Bro */ | |
260 | { ARGSFL_DELIM, NULL }, /* Brc */ | |
261 | { ARGSFL_NONE, NULL }, /* %C */ | |
262 | { ARGSFL_NONE, NULL }, /* Es */ | |
070c62a6 | 263 | { ARGSFL_DELIM, NULL }, /* En */ |
7888c61d | 264 | { ARGSFL_DELIM, NULL }, /* Dx */ |
36342e81 SW |
265 | { ARGSFL_NONE, NULL }, /* %Q */ |
266 | { ARGSFL_NONE, NULL }, /* br */ | |
267 | { ARGSFL_NONE, NULL }, /* sp */ | |
268 | { ARGSFL_NONE, NULL }, /* %U */ | |
269 | { ARGSFL_NONE, NULL }, /* Ta */ | |
070c62a6 | 270 | { ARGSFL_NONE, NULL }, /* ll */ |
36342e81 SW |
271 | }; |
272 | ||
273 | ||
80387638 SW |
274 | /* |
275 | * Parse an argument from line text. This comes in the form of -key | |
276 | * [value0...], which may either have a single mandatory value, at least | |
277 | * one mandatory value, an optional single value, or no value. | |
278 | */ | |
279 | enum margverr | |
f88b6c16 | 280 | mdoc_argv(struct mdoc *mdoc, int line, enum mdoct tok, |
80387638 SW |
281 | struct mdoc_arg **v, int *pos, char *buf) |
282 | { | |
283 | char *p, sv; | |
284 | struct mdoc_argv tmp; | |
285 | struct mdoc_arg *arg; | |
36342e81 | 286 | const enum mdocargt *ap; |
80387638 SW |
287 | |
288 | if ('\0' == buf[*pos]) | |
289 | return(ARGV_EOLN); | |
36342e81 SW |
290 | else if (NULL == (ap = mdocargs[tok].argvs)) |
291 | return(ARGV_WORD); | |
292 | else if ('-' != buf[*pos]) | |
293 | return(ARGV_WORD); | |
80387638 | 294 | |
36342e81 | 295 | /* Seek to the first unescaped space. */ |
80387638 SW |
296 | |
297 | p = &buf[++(*pos)]; | |
298 | ||
299 | assert(*pos > 0); | |
300 | ||
36342e81 SW |
301 | for ( ; buf[*pos] ; (*pos)++) |
302 | if (' ' == buf[*pos] && '\\' != buf[*pos - 1]) | |
303 | break; | |
80387638 | 304 | |
070c62a6 | 305 | /* |
36342e81 SW |
306 | * We want to nil-terminate the word to look it up (it's easier |
307 | * that way). But we may not have a flag, in which case we need | |
308 | * to restore the line as-is. So keep around the stray byte, | |
309 | * which we'll reset upon exiting (if necessary). | |
310 | */ | |
80387638 | 311 | |
070c62a6 | 312 | if ('\0' != (sv = buf[*pos])) |
80387638 | 313 | buf[(*pos)++] = '\0'; |
36342e81 SW |
314 | |
315 | /* | |
316 | * Now look up the word as a flag. Use temporary storage that | |
317 | * we'll copy into the node's flags, if necessary. | |
318 | */ | |
80387638 | 319 | |
60e1e752 | 320 | memset(&tmp, 0, sizeof(struct mdoc_argv)); |
36342e81 | 321 | |
80387638 SW |
322 | tmp.line = line; |
323 | tmp.pos = *pos; | |
36342e81 | 324 | tmp.arg = MDOC_ARG_MAX; |
80387638 | 325 | |
36342e81 SW |
326 | while (MDOC_ARG_MAX != (tmp.arg = *ap++)) |
327 | if (0 == strcmp(p, mdoc_argnames[tmp.arg])) | |
328 | break; | |
80387638 | 329 | |
36342e81 | 330 | if (MDOC_ARG_MAX == tmp.arg) { |
070c62a6 | 331 | /* |
36342e81 SW |
332 | * The flag was not found. |
333 | * Restore saved zeroed byte and return as a word. | |
334 | */ | |
80387638 SW |
335 | if (sv) |
336 | buf[*pos - 1] = sv; | |
337 | return(ARGV_WORD); | |
338 | } | |
339 | ||
36342e81 SW |
340 | /* Read to the next word (the argument). */ |
341 | ||
80387638 SW |
342 | while (buf[*pos] && ' ' == buf[*pos]) |
343 | (*pos)++; | |
344 | ||
36342e81 | 345 | switch (argvflags[tmp.arg]) { |
070c62a6 | 346 | case ARGV_SINGLE: |
f88b6c16 | 347 | if ( ! argv_single(mdoc, line, &tmp, pos, buf)) |
36342e81 SW |
348 | return(ARGV_ERROR); |
349 | break; | |
070c62a6 | 350 | case ARGV_MULTI: |
f88b6c16 | 351 | if ( ! argv_multi(mdoc, line, &tmp, pos, buf)) |
36342e81 SW |
352 | return(ARGV_ERROR); |
353 | break; | |
070c62a6 | 354 | case ARGV_NONE: |
36342e81 SW |
355 | break; |
356 | } | |
80387638 SW |
357 | |
358 | if (NULL == (arg = *v)) | |
359 | arg = *v = mandoc_calloc(1, sizeof(struct mdoc_arg)); | |
360 | ||
361 | arg->argc++; | |
070c62a6 FF |
362 | arg->argv = mandoc_reallocarray(arg->argv, |
363 | arg->argc, sizeof(struct mdoc_argv)); | |
80387638 | 364 | |
070c62a6 FF |
365 | memcpy(&arg->argv[(int)arg->argc - 1], &tmp, |
366 | sizeof(struct mdoc_argv)); | |
80387638 SW |
367 | |
368 | return(ARGV_ARG); | |
369 | } | |
370 | ||
80387638 SW |
371 | void |
372 | mdoc_argv_free(struct mdoc_arg *p) | |
373 | { | |
374 | int i; | |
375 | ||
376 | if (NULL == p) | |
377 | return; | |
378 | ||
379 | if (p->refcnt) { | |
380 | --(p->refcnt); | |
381 | if (p->refcnt) | |
382 | return; | |
383 | } | |
384 | assert(p->argc); | |
385 | ||
386 | for (i = (int)p->argc - 1; i >= 0; i--) | |
60e1e752 | 387 | argn_free(p, i); |
80387638 SW |
388 | |
389 | free(p->argv); | |
390 | free(p); | |
391 | } | |
392 | ||
60e1e752 SW |
393 | static void |
394 | argn_free(struct mdoc_arg *p, int iarg) | |
80387638 SW |
395 | { |
396 | struct mdoc_argv *arg; | |
397 | int j; | |
398 | ||
399 | arg = &p->argv[iarg]; | |
400 | ||
401 | if (arg->sz && arg->value) { | |
070c62a6 | 402 | for (j = (int)arg->sz - 1; j >= 0; j--) |
80387638 SW |
403 | free(arg->value[j]); |
404 | free(arg->value); | |
405 | } | |
406 | ||
407 | for (--p->argc; iarg < (int)p->argc; iarg++) | |
408 | p->argv[iarg] = p->argv[iarg+1]; | |
409 | } | |
410 | ||
80387638 | 411 | enum margserr |
f88b6c16 | 412 | mdoc_zargs(struct mdoc *mdoc, int line, int *pos, char *buf, char **v) |
80387638 SW |
413 | { |
414 | ||
f88b6c16 | 415 | return(args(mdoc, line, pos, buf, ARGSFL_NONE, v)); |
80387638 SW |
416 | } |
417 | ||
80387638 | 418 | enum margserr |
070c62a6 | 419 | mdoc_args(struct mdoc *mdoc, int line, int *pos, |
80387638 SW |
420 | char *buf, enum mdoct tok, char **v) |
421 | { | |
a4c7eb57 | 422 | enum argsflag fl; |
80387638 SW |
423 | struct mdoc_node *n; |
424 | ||
36342e81 | 425 | fl = mdocargs[tok].flags; |
80387638 SW |
426 | |
427 | if (MDOC_It != tok) | |
f88b6c16 | 428 | return(args(mdoc, line, pos, buf, fl, v)); |
80387638 SW |
429 | |
430 | /* | |
431 | * We know that we're in an `It', so it's reasonable to expect | |
432 | * us to be sitting in a `Bl'. Someday this may not be the case | |
433 | * (if we allow random `It's sitting out there), so provide a | |
434 | * safe fall-back into the default behaviour. | |
435 | */ | |
436 | ||
f88b6c16 | 437 | for (n = mdoc->last; n; n = n->parent) |
80387638 | 438 | if (MDOC_Bl == n->tok) |
a4c7eb57 SW |
439 | if (LIST_column == n->norm->Bl.type) { |
440 | fl = ARGSFL_TABSEP; | |
441 | break; | |
442 | } | |
80387638 | 443 | |
f88b6c16 | 444 | return(args(mdoc, line, pos, buf, fl, v)); |
80387638 SW |
445 | } |
446 | ||
80387638 | 447 | static enum margserr |
070c62a6 | 448 | args(struct mdoc *mdoc, int line, int *pos, |
a4c7eb57 | 449 | char *buf, enum argsflag fl, char **v) |
80387638 | 450 | { |
80387638 | 451 | char *p, *pp; |
7888c61d | 452 | int pairs; |
80387638 | 453 | enum margserr rc; |
80387638 | 454 | |
80387638 | 455 | if ('\0' == buf[*pos]) { |
f88b6c16 | 456 | if (MDOC_PPHRASE & mdoc->flags) |
80387638 SW |
457 | return(ARGS_EOLN); |
458 | /* | |
459 | * If we're not in a partial phrase and the flag for | |
460 | * being a phrase literal is still set, the punctuation | |
461 | * is unterminated. | |
462 | */ | |
f88b6c16 | 463 | if (MDOC_PHRASELIT & mdoc->flags) |
070c62a6 FF |
464 | mandoc_msg(MANDOCERR_ARG_QUOTE, |
465 | mdoc->parse, line, *pos, NULL); | |
80387638 | 466 | |
f88b6c16 | 467 | mdoc->flags &= ~MDOC_PHRASELIT; |
80387638 SW |
468 | return(ARGS_EOLN); |
469 | } | |
470 | ||
80387638 SW |
471 | *v = &buf[*pos]; |
472 | ||
a4c7eb57 SW |
473 | if (ARGSFL_DELIM == fl) |
474 | if (args_checkpunct(buf, *pos)) | |
475 | return(ARGS_PUNCT); | |
60e1e752 | 476 | |
80387638 SW |
477 | /* |
478 | * First handle TABSEP items, restricted to `Bl -column'. This | |
479 | * ignores conventional token parsing and instead uses tabs or | |
480 | * `Ta' macros to separate phrases. Phrases are parsed again | |
481 | * for arguments at a later phase. | |
482 | */ | |
483 | ||
a4c7eb57 | 484 | if (ARGSFL_TABSEP == fl) { |
80387638 SW |
485 | /* Scan ahead to tab (can't be escaped). */ |
486 | p = strchr(*v, '\t'); | |
487 | pp = NULL; | |
488 | ||
489 | /* Scan ahead to unescaped `Ta'. */ | |
070c62a6 | 490 | if ( ! (MDOC_PHRASELIT & mdoc->flags)) |
80387638 SW |
491 | for (pp = *v; ; pp++) { |
492 | if (NULL == (pp = strstr(pp, "Ta"))) | |
493 | break; | |
494 | if (pp > *v && ' ' != *(pp - 1)) | |
495 | continue; | |
496 | if (' ' == *(pp + 2) || '\0' == *(pp + 2)) | |
497 | break; | |
498 | } | |
499 | ||
500 | /* By default, assume a phrase. */ | |
501 | rc = ARGS_PHRASE; | |
502 | ||
070c62a6 | 503 | /* |
80387638 SW |
504 | * Adjust new-buffer position to be beyond delimiter |
505 | * mark (e.g., Ta -> end + 2). | |
506 | */ | |
507 | if (p && pp) { | |
508 | *pos += pp < p ? 2 : 1; | |
509 | rc = pp < p ? ARGS_PHRASE : ARGS_PPHRASE; | |
510 | p = pp < p ? pp : p; | |
511 | } else if (p && ! pp) { | |
512 | rc = ARGS_PPHRASE; | |
513 | *pos += 1; | |
514 | } else if (pp && ! p) { | |
515 | p = pp; | |
516 | *pos += 2; | |
517 | } else { | |
518 | rc = ARGS_PEND; | |
519 | p = strchr(*v, 0); | |
520 | } | |
521 | ||
522 | /* Whitespace check for eoln case... */ | |
a4c7eb57 | 523 | if ('\0' == *p && ' ' == *(p - 1)) |
070c62a6 FF |
524 | mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, |
525 | line, *pos, NULL); | |
80387638 SW |
526 | |
527 | *pos += (int)(p - *v); | |
528 | ||
529 | /* Strip delimiter's preceding whitespace. */ | |
530 | pp = p - 1; | |
531 | while (pp > *v && ' ' == *pp) { | |
532 | if (pp > *v && '\\' == *(pp - 1)) | |
533 | break; | |
534 | pp--; | |
535 | } | |
536 | *(pp + 1) = 0; | |
537 | ||
538 | /* Strip delimiter's proceeding whitespace. */ | |
539 | for (pp = &buf[*pos]; ' ' == *pp; pp++, (*pos)++) | |
540 | /* Skip ahead. */ ; | |
541 | ||
542 | return(rc); | |
7888c61d | 543 | } |
80387638 | 544 | |
7888c61d | 545 | /* |
80387638 SW |
546 | * Process a quoted literal. A quote begins with a double-quote |
547 | * and ends with a double-quote NOT preceded by a double-quote. | |
7888c61d FF |
548 | * NUL-terminate the literal in place. |
549 | * Collapse pairs of quotes inside quoted literals. | |
80387638 SW |
550 | * Whitespace is NOT involved in literal termination. |
551 | */ | |
552 | ||
f88b6c16 FF |
553 | if (MDOC_PHRASELIT & mdoc->flags || '\"' == buf[*pos]) { |
554 | if ( ! (MDOC_PHRASELIT & mdoc->flags)) | |
80387638 SW |
555 | *v = &buf[++(*pos)]; |
556 | ||
f88b6c16 FF |
557 | if (MDOC_PPHRASE & mdoc->flags) |
558 | mdoc->flags |= MDOC_PHRASELIT; | |
80387638 | 559 | |
7888c61d | 560 | pairs = 0; |
80387638 | 561 | for ( ; buf[*pos]; (*pos)++) { |
7888c61d FF |
562 | /* Move following text left after quoted quotes. */ |
563 | if (pairs) | |
564 | buf[*pos - pairs] = buf[*pos]; | |
80387638 SW |
565 | if ('\"' != buf[*pos]) |
566 | continue; | |
7888c61d | 567 | /* Unquoted quotes end quoted args. */ |
80387638 SW |
568 | if ('\"' != buf[*pos + 1]) |
569 | break; | |
7888c61d FF |
570 | /* Quoted quotes collapse. */ |
571 | pairs++; | |
80387638 SW |
572 | (*pos)++; |
573 | } | |
7888c61d FF |
574 | if (pairs) |
575 | buf[*pos - pairs] = '\0'; | |
80387638 SW |
576 | |
577 | if ('\0' == buf[*pos]) { | |
f88b6c16 | 578 | if (MDOC_PPHRASE & mdoc->flags) |
80387638 | 579 | return(ARGS_QWORD); |
070c62a6 FF |
580 | mandoc_msg(MANDOCERR_ARG_QUOTE, |
581 | mdoc->parse, line, *pos, NULL); | |
80387638 SW |
582 | return(ARGS_QWORD); |
583 | } | |
584 | ||
f88b6c16 | 585 | mdoc->flags &= ~MDOC_PHRASELIT; |
80387638 SW |
586 | buf[(*pos)++] = '\0'; |
587 | ||
588 | if ('\0' == buf[*pos]) | |
589 | return(ARGS_QWORD); | |
590 | ||
591 | while (' ' == buf[*pos]) | |
592 | (*pos)++; | |
593 | ||
a4c7eb57 | 594 | if ('\0' == buf[*pos]) |
070c62a6 FF |
595 | mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, |
596 | line, *pos, NULL); | |
80387638 SW |
597 | |
598 | return(ARGS_QWORD); | |
599 | } | |
600 | ||
a4c7eb57 | 601 | p = &buf[*pos]; |
f88b6c16 | 602 | *v = mandoc_getarg(mdoc->parse, &p, line, pos); |
80387638 SW |
603 | |
604 | return(ARGS_WORD); | |
605 | } | |
606 | ||
070c62a6 | 607 | /* |
60e1e752 SW |
608 | * Check if the string consists only of space-separated closing |
609 | * delimiters. This is a bit of a dance: the first must be a close | |
610 | * delimiter, but it may be followed by middle delimiters. Arbitrary | |
611 | * whitespace may separate these tokens. | |
612 | */ | |
613 | static int | |
a4c7eb57 | 614 | args_checkpunct(const char *buf, int i) |
60e1e752 SW |
615 | { |
616 | int j; | |
617 | char dbuf[DELIMSZ]; | |
618 | enum mdelim d; | |
619 | ||
620 | /* First token must be a close-delimiter. */ | |
621 | ||
622 | for (j = 0; buf[i] && ' ' != buf[i] && j < DELIMSZ; j++, i++) | |
623 | dbuf[j] = buf[i]; | |
624 | ||
625 | if (DELIMSZ == j) | |
626 | return(0); | |
627 | ||
628 | dbuf[j] = '\0'; | |
629 | if (DELIM_CLOSE != mdoc_isdelim(dbuf)) | |
630 | return(0); | |
631 | ||
632 | while (' ' == buf[i]) | |
633 | i++; | |
634 | ||
635 | /* Remaining must NOT be open/none. */ | |
070c62a6 | 636 | |
60e1e752 SW |
637 | while (buf[i]) { |
638 | j = 0; | |
639 | while (buf[i] && ' ' != buf[i] && j < DELIMSZ) | |
640 | dbuf[j++] = buf[i++]; | |
641 | ||
642 | if (DELIMSZ == j) | |
643 | return(0); | |
644 | ||
645 | dbuf[j] = '\0'; | |
646 | d = mdoc_isdelim(dbuf); | |
647 | if (DELIM_NONE == d || DELIM_OPEN == d) | |
648 | return(0); | |
649 | ||
650 | while (' ' == buf[i]) | |
651 | i++; | |
652 | } | |
653 | ||
60e1e752 SW |
654 | return('\0' == buf[i]); |
655 | } | |
80387638 | 656 | |
80387638 | 657 | static int |
070c62a6 | 658 | argv_multi(struct mdoc *mdoc, int line, |
80387638 SW |
659 | struct mdoc_argv *v, int *pos, char *buf) |
660 | { | |
661 | enum margserr ac; | |
662 | char *p; | |
663 | ||
664 | for (v->sz = 0; ; v->sz++) { | |
665 | if ('-' == buf[*pos]) | |
666 | break; | |
f88b6c16 | 667 | ac = args(mdoc, line, pos, buf, ARGSFL_NONE, &p); |
80387638 SW |
668 | if (ARGS_ERROR == ac) |
669 | return(0); | |
670 | else if (ARGS_EOLN == ac) | |
671 | break; | |
672 | ||
673 | if (0 == v->sz % MULTI_STEP) | |
070c62a6 FF |
674 | v->value = mandoc_reallocarray(v->value, |
675 | v->sz + MULTI_STEP, sizeof(char *)); | |
80387638 SW |
676 | |
677 | v->value[(int)v->sz] = mandoc_strdup(p); | |
678 | } | |
679 | ||
680 | return(1); | |
681 | } | |
682 | ||
80387638 | 683 | static int |
070c62a6 | 684 | argv_single(struct mdoc *mdoc, int line, |
80387638 SW |
685 | struct mdoc_argv *v, int *pos, char *buf) |
686 | { | |
687 | enum margserr ac; | |
688 | char *p; | |
689 | ||
f88b6c16 | 690 | ac = args(mdoc, line, pos, buf, ARGSFL_NONE, &p); |
80387638 SW |
691 | if (ARGS_ERROR == ac) |
692 | return(0); | |
693 | if (ARGS_EOLN == ac) | |
694 | return(1); | |
695 | ||
696 | v->sz = 1; | |
697 | v->value = mandoc_malloc(sizeof(char *)); | |
698 | v->value[0] = mandoc_strdup(p); | |
699 | ||
700 | return(1); | |
701 | } |