| 1 | /* $Id: mandoc.h,v 1.99 2012/02/16 20:51:31 joerg Exp $ */ |
| 2 | /* |
| 3 | * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> |
| 4 | * |
| 5 | * Permission to use, copy, modify, and distribute this software for any |
| 6 | * purpose with or without fee is hereby granted, provided that the above |
| 7 | * copyright notice and this permission notice appear in all copies. |
| 8 | * |
| 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
| 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
| 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 16 | */ |
| 17 | #ifndef MANDOC_H |
| 18 | #define MANDOC_H |
| 19 | |
| 20 | #define ASCII_NBRSP 31 /* non-breaking space */ |
| 21 | #define ASCII_HYPH 30 /* breakable hyphen */ |
| 22 | |
| 23 | /* |
| 24 | * Status level. This refers to both internal status (i.e., whilst |
| 25 | * running, when warnings/errors are reported) and an indicator of a |
| 26 | * threshold of when to halt (when said internal state exceeds the |
| 27 | * threshold). |
| 28 | */ |
| 29 | enum mandoclevel { |
| 30 | MANDOCLEVEL_OK = 0, |
| 31 | MANDOCLEVEL_RESERVED, |
| 32 | MANDOCLEVEL_WARNING, /* warnings: syntax, whitespace, etc. */ |
| 33 | MANDOCLEVEL_ERROR, /* input has been thrown away */ |
| 34 | MANDOCLEVEL_FATAL, /* input is borked */ |
| 35 | MANDOCLEVEL_BADARG, /* bad argument in invocation */ |
| 36 | MANDOCLEVEL_SYSERR, /* system error */ |
| 37 | MANDOCLEVEL_MAX |
| 38 | }; |
| 39 | |
| 40 | /* |
| 41 | * All possible things that can go wrong within a parse, be it libroff, |
| 42 | * libmdoc, or libman. |
| 43 | */ |
| 44 | enum mandocerr { |
| 45 | MANDOCERR_OK, |
| 46 | |
| 47 | MANDOCERR_WARNING, /* ===== start of warnings ===== */ |
| 48 | |
| 49 | /* related to the prologue */ |
| 50 | MANDOCERR_NOTITLE, /* no title in document */ |
| 51 | MANDOCERR_UPPERCASE, /* document title should be all caps */ |
| 52 | MANDOCERR_BADMSEC, /* unknown manual section */ |
| 53 | MANDOCERR_NODATE, /* date missing, using today's date */ |
| 54 | MANDOCERR_BADDATE, /* cannot parse date, using it verbatim */ |
| 55 | MANDOCERR_PROLOGOOO, /* prologue macros out of order */ |
| 56 | MANDOCERR_PROLOGREP, /* duplicate prologue macro */ |
| 57 | MANDOCERR_BADPROLOG, /* macro not allowed in prologue */ |
| 58 | MANDOCERR_BADBODY, /* macro not allowed in body */ |
| 59 | |
| 60 | /* related to document structure */ |
| 61 | MANDOCERR_SO, /* .so is fragile, better use ln(1) */ |
| 62 | MANDOCERR_NAMESECFIRST, /* NAME section must come first */ |
| 63 | MANDOCERR_BADNAMESEC, /* bad NAME section contents */ |
| 64 | MANDOCERR_NONAME, /* manual name not yet set */ |
| 65 | MANDOCERR_SECOOO, /* sections out of conventional order */ |
| 66 | MANDOCERR_SECREP, /* duplicate section name */ |
| 67 | MANDOCERR_SECMSEC, /* section not in conventional manual section */ |
| 68 | |
| 69 | /* related to macros and nesting */ |
| 70 | MANDOCERR_MACROOBS, /* skipping obsolete macro */ |
| 71 | MANDOCERR_IGNPAR, /* skipping paragraph macro */ |
| 72 | MANDOCERR_IGNNS, /* skipping no-space macro */ |
| 73 | MANDOCERR_SCOPENEST, /* blocks badly nested */ |
| 74 | MANDOCERR_CHILD, /* child violates parent syntax */ |
| 75 | MANDOCERR_NESTEDDISP, /* nested displays are not portable */ |
| 76 | MANDOCERR_SCOPEREP, /* already in literal mode */ |
| 77 | MANDOCERR_LINESCOPE, /* line scope broken */ |
| 78 | |
| 79 | /* related to missing macro arguments */ |
| 80 | MANDOCERR_MACROEMPTY, /* skipping empty macro */ |
| 81 | MANDOCERR_ARGCWARN, /* argument count wrong */ |
| 82 | MANDOCERR_DISPTYPE, /* missing display type */ |
| 83 | MANDOCERR_LISTFIRST, /* list type must come first */ |
| 84 | MANDOCERR_NOWIDTHARG, /* tag lists require a width argument */ |
| 85 | MANDOCERR_FONTTYPE, /* missing font type */ |
| 86 | MANDOCERR_WNOSCOPE, /* skipping end of block that is not open */ |
| 87 | |
| 88 | /* related to bad macro arguments */ |
| 89 | MANDOCERR_IGNARGV, /* skipping argument */ |
| 90 | MANDOCERR_ARGVREP, /* duplicate argument */ |
| 91 | MANDOCERR_DISPREP, /* duplicate display type */ |
| 92 | MANDOCERR_LISTREP, /* duplicate list type */ |
| 93 | MANDOCERR_BADATT, /* unknown AT&T UNIX version */ |
| 94 | MANDOCERR_BADBOOL, /* bad Boolean value */ |
| 95 | MANDOCERR_BADFONT, /* unknown font */ |
| 96 | MANDOCERR_BADSTANDARD, /* unknown standard specifier */ |
| 97 | MANDOCERR_BADWIDTH, /* bad width argument */ |
| 98 | |
| 99 | /* related to plain text */ |
| 100 | MANDOCERR_NOBLANKLN, /* blank line in non-literal context */ |
| 101 | MANDOCERR_BADTAB, /* tab in non-literal context */ |
| 102 | MANDOCERR_EOLNSPACE, /* end of line whitespace */ |
| 103 | MANDOCERR_BADCOMMENT, /* bad comment style */ |
| 104 | MANDOCERR_BADESCAPE, /* unknown escape sequence */ |
| 105 | MANDOCERR_BADQUOTE, /* unterminated quoted string */ |
| 106 | |
| 107 | /* related to equations */ |
| 108 | MANDOCERR_EQNQUOTE, /* unexpected literal in equation */ |
| 109 | |
| 110 | MANDOCERR_ERROR, /* ===== start of errors ===== */ |
| 111 | |
| 112 | /* related to equations */ |
| 113 | MANDOCERR_EQNNSCOPE, /* unexpected equation scope closure*/ |
| 114 | MANDOCERR_EQNSCOPE, /* equation scope open on exit */ |
| 115 | MANDOCERR_EQNBADSCOPE, /* overlapping equation scopes */ |
| 116 | MANDOCERR_EQNEOF, /* unexpected end of equation */ |
| 117 | MANDOCERR_EQNSYNT, /* equation syntax error */ |
| 118 | |
| 119 | /* related to tables */ |
| 120 | MANDOCERR_TBL, /* bad table syntax */ |
| 121 | MANDOCERR_TBLOPT, /* bad table option */ |
| 122 | MANDOCERR_TBLLAYOUT, /* bad table layout */ |
| 123 | MANDOCERR_TBLNOLAYOUT, /* no table layout cells specified */ |
| 124 | MANDOCERR_TBLNODATA, /* no table data cells specified */ |
| 125 | MANDOCERR_TBLIGNDATA, /* ignore data in cell */ |
| 126 | MANDOCERR_TBLBLOCK, /* data block still open */ |
| 127 | MANDOCERR_TBLEXTRADAT, /* ignoring extra data cells */ |
| 128 | |
| 129 | MANDOCERR_ROFFLOOP, /* input stack limit exceeded, infinite loop? */ |
| 130 | MANDOCERR_BADCHAR, /* skipping bad character */ |
| 131 | MANDOCERR_NAMESC, /* escaped character not allowed in a name */ |
| 132 | MANDOCERR_NOTEXT, /* skipping text before the first section header */ |
| 133 | MANDOCERR_MACRO, /* skipping unknown macro */ |
| 134 | MANDOCERR_REQUEST, /* NOT IMPLEMENTED: skipping request */ |
| 135 | MANDOCERR_ARGCOUNT, /* argument count wrong */ |
| 136 | MANDOCERR_NOSCOPE, /* skipping end of block that is not open */ |
| 137 | MANDOCERR_SCOPEBROKEN, /* missing end of block */ |
| 138 | MANDOCERR_SCOPEEXIT, /* scope open on exit */ |
| 139 | MANDOCERR_UNAME, /* uname(3) system call failed */ |
| 140 | /* FIXME: merge following with MANDOCERR_ARGCOUNT */ |
| 141 | MANDOCERR_NOARGS, /* macro requires line argument(s) */ |
| 142 | MANDOCERR_NOBODY, /* macro requires body argument(s) */ |
| 143 | MANDOCERR_NOARGV, /* macro requires argument(s) */ |
| 144 | MANDOCERR_LISTTYPE, /* missing list type */ |
| 145 | MANDOCERR_ARGSLOST, /* line argument(s) will be lost */ |
| 146 | MANDOCERR_BODYLOST, /* body argument(s) will be lost */ |
| 147 | |
| 148 | MANDOCERR_FATAL, /* ===== start of fatal errors ===== */ |
| 149 | |
| 150 | MANDOCERR_NOTMANUAL, /* manual isn't really a manual */ |
| 151 | MANDOCERR_COLUMNS, /* column syntax is inconsistent */ |
| 152 | MANDOCERR_BADDISP, /* NOT IMPLEMENTED: .Bd -file */ |
| 153 | MANDOCERR_SYNTARGVCOUNT, /* argument count wrong, violates syntax */ |
| 154 | MANDOCERR_SYNTCHILD, /* child violates parent syntax */ |
| 155 | MANDOCERR_SYNTARGCOUNT, /* argument count wrong, violates syntax */ |
| 156 | MANDOCERR_SOPATH, /* NOT IMPLEMENTED: .so with absolute path or ".." */ |
| 157 | MANDOCERR_NODOCBODY, /* no document body */ |
| 158 | MANDOCERR_NODOCPROLOG, /* no document prologue */ |
| 159 | MANDOCERR_MEM, /* static buffer exhausted */ |
| 160 | MANDOCERR_MAX |
| 161 | }; |
| 162 | |
| 163 | struct tbl { |
| 164 | char tab; /* cell-separator */ |
| 165 | char decimal; /* decimal point */ |
| 166 | int linesize; |
| 167 | int opts; |
| 168 | #define TBL_OPT_CENTRE (1 << 0) |
| 169 | #define TBL_OPT_EXPAND (1 << 1) |
| 170 | #define TBL_OPT_BOX (1 << 2) |
| 171 | #define TBL_OPT_DBOX (1 << 3) |
| 172 | #define TBL_OPT_ALLBOX (1 << 4) |
| 173 | #define TBL_OPT_NOKEEP (1 << 5) |
| 174 | #define TBL_OPT_NOSPACE (1 << 6) |
| 175 | int cols; /* number of columns */ |
| 176 | }; |
| 177 | |
| 178 | enum tbl_headt { |
| 179 | TBL_HEAD_DATA, /* plug in data from tbl_dat */ |
| 180 | TBL_HEAD_VERT, /* vertical spacer */ |
| 181 | TBL_HEAD_DVERT /* double-vertical spacer */ |
| 182 | }; |
| 183 | |
| 184 | /* |
| 185 | * The head of a table specifies all of its columns. When formatting a |
| 186 | * tbl_span, iterate over these and plug in data from the tbl_span when |
| 187 | * appropriate, using tbl_cell as a guide to placement. |
| 188 | */ |
| 189 | struct tbl_head { |
| 190 | enum tbl_headt pos; |
| 191 | int ident; /* 0 <= unique id < cols */ |
| 192 | struct tbl_head *next; |
| 193 | struct tbl_head *prev; |
| 194 | }; |
| 195 | |
| 196 | enum tbl_cellt { |
| 197 | TBL_CELL_CENTRE, /* c, C */ |
| 198 | TBL_CELL_RIGHT, /* r, R */ |
| 199 | TBL_CELL_LEFT, /* l, L */ |
| 200 | TBL_CELL_NUMBER, /* n, N */ |
| 201 | TBL_CELL_SPAN, /* s, S */ |
| 202 | TBL_CELL_LONG, /* a, A */ |
| 203 | TBL_CELL_DOWN, /* ^ */ |
| 204 | TBL_CELL_HORIZ, /* _, - */ |
| 205 | TBL_CELL_DHORIZ, /* = */ |
| 206 | TBL_CELL_VERT, /* | */ |
| 207 | TBL_CELL_DVERT, /* || */ |
| 208 | TBL_CELL_MAX |
| 209 | }; |
| 210 | |
| 211 | /* |
| 212 | * A cell in a layout row. |
| 213 | */ |
| 214 | struct tbl_cell { |
| 215 | struct tbl_cell *next; |
| 216 | enum tbl_cellt pos; |
| 217 | size_t spacing; |
| 218 | int flags; |
| 219 | #define TBL_CELL_TALIGN (1 << 0) /* t, T */ |
| 220 | #define TBL_CELL_BALIGN (1 << 1) /* d, D */ |
| 221 | #define TBL_CELL_BOLD (1 << 2) /* fB, B, b */ |
| 222 | #define TBL_CELL_ITALIC (1 << 3) /* fI, I, i */ |
| 223 | #define TBL_CELL_EQUAL (1 << 4) /* e, E */ |
| 224 | #define TBL_CELL_UP (1 << 5) /* u, U */ |
| 225 | #define TBL_CELL_WIGN (1 << 6) /* z, Z */ |
| 226 | struct tbl_head *head; |
| 227 | }; |
| 228 | |
| 229 | /* |
| 230 | * A layout row. |
| 231 | */ |
| 232 | struct tbl_row { |
| 233 | struct tbl_row *next; |
| 234 | struct tbl_cell *first; |
| 235 | struct tbl_cell *last; |
| 236 | }; |
| 237 | |
| 238 | enum tbl_datt { |
| 239 | TBL_DATA_NONE, /* has no data */ |
| 240 | TBL_DATA_DATA, /* consists of data/string */ |
| 241 | TBL_DATA_HORIZ, /* horizontal line */ |
| 242 | TBL_DATA_DHORIZ, /* double-horizontal line */ |
| 243 | TBL_DATA_NHORIZ, /* squeezed horizontal line */ |
| 244 | TBL_DATA_NDHORIZ /* squeezed double-horizontal line */ |
| 245 | }; |
| 246 | |
| 247 | /* |
| 248 | * A cell within a row of data. The "string" field contains the actual |
| 249 | * string value that's in the cell. The rest is layout. |
| 250 | */ |
| 251 | struct tbl_dat { |
| 252 | struct tbl_cell *layout; /* layout cell */ |
| 253 | int spans; /* how many spans follow */ |
| 254 | struct tbl_dat *next; |
| 255 | char *string; /* data (NULL if not TBL_DATA_DATA) */ |
| 256 | enum tbl_datt pos; |
| 257 | }; |
| 258 | |
| 259 | enum tbl_spant { |
| 260 | TBL_SPAN_DATA, /* span consists of data */ |
| 261 | TBL_SPAN_HORIZ, /* span is horizontal line */ |
| 262 | TBL_SPAN_DHORIZ /* span is double horizontal line */ |
| 263 | }; |
| 264 | |
| 265 | /* |
| 266 | * A row of data in a table. |
| 267 | */ |
| 268 | struct tbl_span { |
| 269 | struct tbl *tbl; |
| 270 | struct tbl_head *head; |
| 271 | struct tbl_row *layout; /* layout row */ |
| 272 | struct tbl_dat *first; |
| 273 | struct tbl_dat *last; |
| 274 | int line; /* parse line */ |
| 275 | int flags; |
| 276 | #define TBL_SPAN_FIRST (1 << 0) |
| 277 | #define TBL_SPAN_LAST (1 << 1) |
| 278 | enum tbl_spant pos; |
| 279 | struct tbl_span *next; |
| 280 | }; |
| 281 | |
| 282 | enum eqn_boxt { |
| 283 | EQN_ROOT, /* root of parse tree */ |
| 284 | EQN_TEXT, /* text (number, variable, whatever) */ |
| 285 | EQN_SUBEXPR, /* nested `eqn' subexpression */ |
| 286 | EQN_LIST, /* subexpressions list */ |
| 287 | EQN_MATRIX /* matrix subexpression */ |
| 288 | }; |
| 289 | |
| 290 | enum eqn_markt { |
| 291 | EQNMARK_NONE = 0, |
| 292 | EQNMARK_DOT, |
| 293 | EQNMARK_DOTDOT, |
| 294 | EQNMARK_HAT, |
| 295 | EQNMARK_TILDE, |
| 296 | EQNMARK_VEC, |
| 297 | EQNMARK_DYAD, |
| 298 | EQNMARK_BAR, |
| 299 | EQNMARK_UNDER, |
| 300 | EQNMARK__MAX |
| 301 | }; |
| 302 | |
| 303 | enum eqn_fontt { |
| 304 | EQNFONT_NONE = 0, |
| 305 | EQNFONT_ROMAN, |
| 306 | EQNFONT_BOLD, |
| 307 | EQNFONT_FAT, |
| 308 | EQNFONT_ITALIC, |
| 309 | EQNFONT__MAX |
| 310 | }; |
| 311 | |
| 312 | enum eqn_post { |
| 313 | EQNPOS_NONE = 0, |
| 314 | EQNPOS_OVER, |
| 315 | EQNPOS_SUP, |
| 316 | EQNPOS_SUB, |
| 317 | EQNPOS_TO, |
| 318 | EQNPOS_FROM, |
| 319 | EQNPOS__MAX |
| 320 | }; |
| 321 | |
| 322 | enum eqn_pilet { |
| 323 | EQNPILE_NONE = 0, |
| 324 | EQNPILE_PILE, |
| 325 | EQNPILE_CPILE, |
| 326 | EQNPILE_RPILE, |
| 327 | EQNPILE_LPILE, |
| 328 | EQNPILE_COL, |
| 329 | EQNPILE_CCOL, |
| 330 | EQNPILE_RCOL, |
| 331 | EQNPILE_LCOL, |
| 332 | EQNPILE__MAX |
| 333 | }; |
| 334 | |
| 335 | /* |
| 336 | * A "box" is a parsed mathematical expression as defined by the eqn.7 |
| 337 | * grammar. |
| 338 | */ |
| 339 | struct eqn_box { |
| 340 | int size; /* font size of expression */ |
| 341 | #define EQN_DEFSIZE INT_MIN |
| 342 | enum eqn_boxt type; /* type of node */ |
| 343 | struct eqn_box *first; /* first child node */ |
| 344 | struct eqn_box *last; /* last child node */ |
| 345 | struct eqn_box *next; /* node sibling */ |
| 346 | struct eqn_box *parent; /* node sibling */ |
| 347 | char *text; /* text (or NULL) */ |
| 348 | char *left; |
| 349 | char *right; |
| 350 | enum eqn_post pos; /* position of next box */ |
| 351 | enum eqn_markt mark; /* a mark about the box */ |
| 352 | enum eqn_fontt font; /* font of box */ |
| 353 | enum eqn_pilet pile; /* equation piling */ |
| 354 | }; |
| 355 | |
| 356 | /* |
| 357 | * An equation consists of a tree of expressions starting at a given |
| 358 | * line and position. |
| 359 | */ |
| 360 | struct eqn { |
| 361 | char *name; /* identifier (or NULL) */ |
| 362 | struct eqn_box *root; /* root mathematical expression */ |
| 363 | int ln; /* invocation line */ |
| 364 | int pos; /* invocation position */ |
| 365 | }; |
| 366 | |
| 367 | /* |
| 368 | * The type of parse sequence. This value is usually passed via the |
| 369 | * mandoc(1) command line of -man and -mdoc. It's almost exclusively |
| 370 | * -mandoc but the others have been retained for compatibility. |
| 371 | */ |
| 372 | enum mparset { |
| 373 | MPARSE_AUTO, /* magically determine the document type */ |
| 374 | MPARSE_MDOC, /* assume -mdoc */ |
| 375 | MPARSE_MAN /* assume -man */ |
| 376 | }; |
| 377 | |
| 378 | enum mandoc_esc { |
| 379 | ESCAPE_ERROR = 0, /* bail! unparsable escape */ |
| 380 | ESCAPE_IGNORE, /* escape to be ignored */ |
| 381 | ESCAPE_SPECIAL, /* a regular special character */ |
| 382 | ESCAPE_FONT, /* a generic font mode */ |
| 383 | ESCAPE_FONTBOLD, /* bold font mode */ |
| 384 | ESCAPE_FONTITALIC, /* italic font mode */ |
| 385 | ESCAPE_FONTROMAN, /* roman font mode */ |
| 386 | ESCAPE_FONTPREV, /* previous font mode */ |
| 387 | ESCAPE_NUMBERED, /* a numbered glyph */ |
| 388 | ESCAPE_UNICODE, /* a unicode codepoint */ |
| 389 | ESCAPE_NOSPACE /* suppress space if the last on a line */ |
| 390 | }; |
| 391 | |
| 392 | typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel, |
| 393 | const char *, int, int, const char *); |
| 394 | |
| 395 | struct mparse; |
| 396 | struct mchars; |
| 397 | struct mdoc; |
| 398 | struct man; |
| 399 | |
| 400 | __BEGIN_DECLS |
| 401 | |
| 402 | void *mandoc_calloc(size_t, size_t); |
| 403 | enum mandoc_esc mandoc_escape(const char **, const char **, int *); |
| 404 | void *mandoc_malloc(size_t); |
| 405 | void *mandoc_realloc(void *, size_t); |
| 406 | char *mandoc_strdup(const char *); |
| 407 | char *mandoc_strndup(const char *, size_t); |
| 408 | struct mchars *mchars_alloc(void); |
| 409 | void mchars_free(struct mchars *); |
| 410 | char mchars_num2char(const char *, size_t); |
| 411 | int mchars_num2uc(const char *, size_t); |
| 412 | int mchars_spec2cp(const struct mchars *, |
| 413 | const char *, size_t); |
| 414 | const char *mchars_spec2str(const struct mchars *, |
| 415 | const char *, size_t, size_t *); |
| 416 | struct mparse *mparse_alloc(enum mparset, |
| 417 | enum mandoclevel, mandocmsg, void *); |
| 418 | void mparse_free(struct mparse *); |
| 419 | void mparse_keep(struct mparse *); |
| 420 | enum mandoclevel mparse_readfd(struct mparse *, int, const char *); |
| 421 | enum mandoclevel mparse_readmem(struct mparse *, const void *, size_t, |
| 422 | const char *); |
| 423 | void mparse_reset(struct mparse *); |
| 424 | void mparse_result(struct mparse *, |
| 425 | struct mdoc **, struct man **); |
| 426 | const char *mparse_getkeep(const struct mparse *); |
| 427 | const char *mparse_strerror(enum mandocerr); |
| 428 | const char *mparse_strlevel(enum mandoclevel); |
| 429 | |
| 430 | __END_DECLS |
| 431 | |
| 432 | #endif /*!MANDOC_H*/ |