| Commit | Line | Data |
|---|---|---|
| 32c903ac | 1 | .\" $Id: mdoc.3,v 1.35 2009/10/03 16:36:06 kristaps Exp $ |
| 589e7c1d SW |
2 | .\" |
| 3 | .\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se> | |
| 4 | .\" | |
| 5 | .\" Permission to use, copy, modify, and distribute this software for any | |
| 6 | .\" purpose with or without fee is hereby granted, provided that the above | |
| 7 | .\" copyright notice and this permission notice appear in all copies. | |
| 8 | .\" | |
| 9 | .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
| 10 | .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
| 11 | .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
| 12 | .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
| 13 | .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
| 14 | .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
| 15 | .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
| 16 | .\" | |
| 32c903ac | 17 | .Dd November 11, 2009 |
| 589e7c1d SW |
18 | .Dt MDOC 3 |
| 19 | .Os | |
| 20 | .\" SECTION | |
| 21 | .Sh NAME | |
| 22 | .Nm mdoc_alloc , | |
| 23 | .Nm mdoc_parseln , | |
| 24 | .Nm mdoc_endparse , | |
| 25 | .Nm mdoc_node , | |
| 26 | .Nm mdoc_meta , | |
| 27 | .Nm mdoc_free , | |
| 28 | .Nm mdoc_reset | |
| 29 | .Nd mdoc macro compiler library | |
| 30 | .\" SECTION | |
| 31 | .Sh SYNOPSIS | |
| 32 | .In mdoc.h | |
| 33 | .Vt extern const char * const * mdoc_macronames; | |
| 34 | .Vt extern const char * const * mdoc_argnames; | |
| 35 | .Ft "struct mdoc *" | |
| 36 | .Fn mdoc_alloc "void *data" "int pflags" "const struct mdoc_cb *cb" | |
| 37 | .Ft int | |
| 38 | .Fn mdoc_reset "struct mdoc *mdoc" | |
| 39 | .Ft void | |
| 40 | .Fn mdoc_free "struct mdoc *mdoc" | |
| 41 | .Ft int | |
| 42 | .Fn mdoc_parseln "struct mdoc *mdoc" "int line" "char *buf" | |
| 43 | .Ft "const struct mdoc_node *" | |
| 44 | .Fn mdoc_node "const struct mdoc *mdoc" | |
| 45 | .Ft "const struct mdoc_meta *" | |
| 46 | .Fn mdoc_meta "const struct mdoc *mdoc" | |
| 47 | .Ft int | |
| 48 | .Fn mdoc_endparse "struct mdoc *mdoc" | |
| 49 | .\" SECTION | |
| 50 | .Sh DESCRIPTION | |
| 51 | The | |
| 52 | .Nm mdoc | |
| 53 | library parses lines of | |
| 54 | .Xr mdoc 7 | |
| 55 | input (and | |
| 56 | .Em only | |
| 57 | mdoc) into an abstract syntax tree (AST). | |
| 58 | .\" PARAGRAPH | |
| 59 | .Pp | |
| 60 | In general, applications initiate a parsing sequence with | |
| 61 | .Fn mdoc_alloc , | |
| 62 | parse each line in a document with | |
| 63 | .Fn mdoc_parseln , | |
| 64 | close the parsing session with | |
| 65 | .Fn mdoc_endparse , | |
| 66 | operate over the syntax tree returned by | |
| 67 | .Fn mdoc_node | |
| 68 | and | |
| 69 | .Fn mdoc_meta , | |
| 70 | then free all allocated memory with | |
| 71 | .Fn mdoc_free . | |
| 72 | The | |
| 73 | .Fn mdoc_reset | |
| 74 | function may be used in order to reset the parser for another input | |
| 75 | sequence. See the | |
| 76 | .Sx EXAMPLES | |
| 77 | section for a full example. | |
| 78 | .\" PARAGRAPH | |
| 79 | .Pp | |
| 80 | This section further defines the | |
| 81 | .Sx Types , | |
| 82 | .Sx Functions | |
| 83 | and | |
| 84 | .Sx Variables | |
| 85 | available to programmers. Following that, the | |
| 86 | .Sx Abstract Syntax Tree | |
| 87 | section documents the output tree. | |
| 88 | .\" SUBSECTION | |
| 89 | .Ss Types | |
| 90 | Both functions (see | |
| 91 | .Sx Functions ) | |
| 92 | and variables (see | |
| 93 | .Sx Variables ) | |
| 94 | may use the following types: | |
| 95 | .Bl -ohang -offset "XXXX" | |
| 96 | .\" LIST-ITEM | |
| 97 | .It Vt struct mdoc | |
| 98 | An opaque type defined in | |
| 99 | .Pa mdoc.c . | |
| 100 | Its values are only used privately within the library. | |
| 101 | .\" LIST-ITEM | |
| 102 | .It Vt struct mdoc_cb | |
| 103 | A set of message callbacks defined in | |
| 104 | .Pa mdoc.h . | |
| 105 | .\" LIST-ITEM | |
| 106 | .It Vt struct mdoc_node | |
| 107 | A parsed node. Defined in | |
| 108 | .Pa mdoc.h . | |
| 109 | See | |
| 110 | .Sx Abstract Syntax Tree | |
| 111 | for details. | |
| 112 | .El | |
| 113 | .\" SUBSECTION | |
| 114 | .Ss Functions | |
| 115 | Function descriptions follow: | |
| 116 | .Bl -ohang -offset "XXXX" | |
| 117 | .\" LIST-ITEM | |
| 118 | .It Fn mdoc_alloc | |
| 119 | Allocates a parsing structure. The | |
| 120 | .Fa data | |
| 121 | pointer is passed to callbacks in | |
| 122 | .Fa cb , | |
| 123 | which are documented further in the header file. | |
| 124 | The | |
| 125 | .Fa pflags | |
| 126 | arguments are defined in | |
| 127 | .Pa mdoc.h . | |
| 128 | Returns NULL on failure. If non-NULL, the pointer must be freed with | |
| 129 | .Fn mdoc_free . | |
| 130 | .\" LIST-ITEM | |
| 131 | .It Fn mdoc_reset | |
| 132 | Reset the parser for another parse routine. After its use, | |
| 133 | .Fn mdoc_parseln | |
| 134 | behaves as if invoked for the first time. If it returns 0, memory could | |
| 135 | not be allocated. | |
| 136 | .\" LIST-ITEM | |
| 137 | .It Fn mdoc_free | |
| 138 | Free all resources of a parser. The pointer is no longer valid after | |
| 139 | invocation. | |
| 140 | .\" LIST-ITEM | |
| 141 | .It Fn mdoc_parseln | |
| 142 | Parse a nil-terminated line of input. This line should not contain the | |
| 143 | trailing newline. Returns 0 on failure, 1 on success. The input buffer | |
| 144 | .Fa buf | |
| 145 | is modified by this function. | |
| 146 | .\" LIST-ITEM | |
| 147 | .It Fn mdoc_endparse | |
| 148 | Signals that the parse is complete. Note that if | |
| 149 | .Fn mdoc_endparse | |
| 150 | is called subsequent to | |
| 151 | .Fn mdoc_node , | |
| 152 | the resulting tree is incomplete. Returns 0 on failure, 1 on success. | |
| 153 | .\" LIST-ITEM | |
| 154 | .It Fn mdoc_node | |
| 155 | Returns the first node of the parse. Note that if | |
| 156 | .Fn mdoc_parseln | |
| 157 | or | |
| 158 | .Fn mdoc_endparse | |
| 159 | return 0, the tree will be incomplete. | |
| 160 | .It Fn mdoc_meta | |
| 161 | Returns the document's parsed meta-data. If this information has not | |
| 162 | yet been supplied or | |
| 163 | .Fn mdoc_parseln | |
| 164 | or | |
| 165 | .Fn mdoc_endparse | |
| 166 | return 0, the data will be incomplete. | |
| 167 | .El | |
| 168 | .\" SUBSECTION | |
| 169 | .Ss Variables | |
| 170 | The following variables are also defined: | |
| 171 | .Bl -ohang -offset "XXXX" | |
| 172 | .\" LIST-ITEM | |
| 173 | .It Va mdoc_macronames | |
| 174 | An array of string-ified token names. | |
| 175 | .\" LIST-ITEM | |
| 176 | .It Va mdoc_argnames | |
| 177 | An array of string-ified token argument names. | |
| 178 | .El | |
| 179 | .\" SUBSECTION | |
| 180 | .Ss Abstract Syntax Tree | |
| 181 | The | |
| 182 | .Nm | |
| 183 | functions produce an abstract syntax tree (AST) describing input in a | |
| 184 | regular form. It may be reviewed at any time with | |
| 185 | .Fn mdoc_nodes ; | |
| 186 | however, if called before | |
| 187 | .Fn mdoc_endparse , | |
| 188 | or after | |
| 189 | .Fn mdoc_endparse | |
| 190 | or | |
| 191 | .Fn mdoc_parseln | |
| 192 | fail, it may be incomplete. | |
| 193 | .\" PARAGRAPH | |
| 194 | .Pp | |
| 195 | This AST is governed by the ontological | |
| 196 | rules dictated in | |
| 197 | .Xr mdoc 7 | |
| 198 | and derives its terminology accordingly. | |
| 199 | .Qq In-line | |
| 200 | elements described in | |
| 201 | .Xr mdoc 7 | |
| 202 | are described simply as | |
| 203 | .Qq elements . | |
| 204 | .\" PARAGRAPH | |
| 205 | .Pp | |
| 206 | The AST is composed of | |
| 207 | .Vt struct mdoc_node | |
| 208 | nodes with block, head, body, element, root and text types as declared | |
| 209 | by the | |
| 210 | .Va type | |
| 211 | field. Each node also provides its parse point (the | |
| 212 | .Va line , | |
| 213 | .Va sec , | |
| 214 | and | |
| 215 | .Va pos | |
| 216 | fields), its position in the tree (the | |
| 217 | .Va parent , | |
| 218 | .Va child , | |
| 219 | .Va next | |
| 220 | and | |
| 221 | .Va prev | |
| 222 | fields) and some type-specific data. | |
| 223 | .\" PARAGRAPH | |
| 224 | .Pp | |
| 225 | The tree itself is arranged according to the following normal form, | |
| 226 | where capitalised non-terminals represent nodes. | |
| 227 | .Pp | |
| 228 | .Bl -tag -width "ELEMENTXX" -compact -offset "XXXX" | |
| 229 | .\" LIST-ITEM | |
| 230 | .It ROOT | |
| 231 | \(<- mnode+ | |
| 232 | .It mnode | |
| 233 | \(<- BLOCK | ELEMENT | TEXT | |
| 234 | .It BLOCK | |
| 235 | \(<- (HEAD [TEXT])+ [BODY [TEXT]] [TAIL [TEXT]] | |
| 236 | .It BLOCK | |
| 237 | \(<- BODY [TEXT] [TAIL [TEXT]] | |
| 238 | .It ELEMENT | |
| 239 | \(<- TEXT* | |
| 240 | .It HEAD | |
| 241 | \(<- mnode+ | |
| 242 | .It BODY | |
| 243 | \(<- mnode+ | |
| 244 | .It TAIL | |
| 245 | \(<- mnode+ | |
| 246 | .It TEXT | |
| 247 | \(<- [[:alpha:]]* | |
| 248 | .El | |
| 249 | .\" PARAGRAPH | |
| 250 | .Pp | |
| 251 | Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of | |
| 252 | the BLOCK production. These refer to punctuation marks. Furthermore, | |
| 253 | although a TEXT node will generally have a non-zero-length string, in | |
| 254 | the specific case of | |
| 255 | .Sq \&.Bd \-literal , | |
| 256 | an empty line will produce a zero-length string. | |
| 257 | .\" SECTION | |
| 258 | .Sh EXAMPLES | |
| 259 | The following example reads lines from stdin and parses them, operating | |
| 260 | on the finished parse tree with | |
| 261 | .Fn parsed . | |
| 262 | Note that, if the last line of the file isn't newline-terminated, this | |
| 263 | will truncate the file's last character (see | |
| 264 | .Xr fgetln 3 ) . | |
| 265 | Further, this example does not error-check nor free memory upon failure. | |
| 266 | .Bd -literal -offset "XXXX" | |
| 267 | struct mdoc *mdoc; | |
| 268 | const struct mdoc_node *node; | |
| 269 | char *buf; | |
| 270 | size_t len; | |
| 271 | int line; | |
| 272 | ||
| 273 | line = 1; | |
| 274 | mdoc = mdoc_alloc(NULL, 0, NULL); | |
| 275 | ||
| 276 | while ((buf = fgetln(fp, &len))) { | |
| 277 | buf[len - 1] = '\\0'; | |
| 278 | if ( ! mdoc_parseln(mdoc, line, buf)) | |
| 279 | errx(1, "mdoc_parseln"); | |
| 280 | line++; | |
| 281 | } | |
| 282 | ||
| 283 | if ( ! mdoc_endparse(mdoc)) | |
| 284 | errx(1, "mdoc_endparse"); | |
| 285 | if (NULL == (node = mdoc_node(mdoc))) | |
| 286 | errx(1, "mdoc_node"); | |
| 287 | ||
| 288 | parsed(mdoc, node); | |
| 289 | mdoc_free(mdoc); | |
| 290 | .Ed | |
| 291 | .\" SECTION | |
| 292 | .Sh SEE ALSO | |
| 293 | .Xr mandoc 1 , | |
| 294 | .Xr mdoc 7 | |
| 295 | .\" SECTION | |
| 296 | .Sh AUTHORS | |
| 297 | The | |
| 298 | .Nm | |
| 299 | utility was written by | |
| 300 | .An Kristaps Dzonsons Aq kristaps@kth.se . | |
| 301 | .\" SECTION | |
| 302 | .Sh CAVEATS | |
| 303 | .Bl -dash -compact | |
| 304 | .\" LIST-ITEM | |
| 305 | .It | |
| 306 | The | |
| 307 | .Sq \&.Xc | |
| 308 | and | |
| 309 | .Sq \&.Xo | |
| 310 | macros aren't handled when used to span lines for the | |
| 311 | .Sq \&.It | |
| 312 | macro. | |
| 313 | .\" LIST-ITEM | |
| 314 | .It | |
| 315 | The | |
| 316 | .Sq \&.Bsx | |
| 317 | macro family doesn't yet understand version arguments. | |
| 318 | .\" LIST-ITEM | |
| 319 | .It | |
| 320 | If not given a value, the \-offset argument to | |
| 321 | .Sq \&.Bd | |
| 322 | and | |
| 323 | .Sq \&.Bl | |
| 324 | should be the width of | |
| 325 | .Qq <string> ; | |
| 326 | instead, a value of | |
| 327 | .Li 10n | |
| 328 | is provided. | |
| 329 | .\" LIST-ITEM | |
| 330 | .It | |
| 331 | Columns widths in | |
| 332 | .Sq \&.Bl \-column | |
| 333 | should default to width | |
| 334 | .Qq <stringx> | |
| 335 | if not included. | |
| 336 | .El |