| 1 | .\" $Id: man.3,v 1.10 2009/10/03 16:36:06 kristaps Exp $ |
| 2 | .\" |
| 3 | .\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se> |
| 4 | .\" |
| 5 | .\" Permission to use, copy, modify, and distribute this software for any |
| 6 | .\" purpose with or without fee is hereby granted, provided that the above |
| 7 | .\" copyright notice and this permission notice appear in all copies. |
| 8 | .\" |
| 9 | .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| 10 | .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| 11 | .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
| 12 | .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 13 | .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| 14 | .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
| 15 | .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 16 | .\" |
| 17 | .Dd November 6, 2009 |
| 18 | .Dt MAN 3 |
| 19 | .Os |
| 20 | .\" SECTION |
| 21 | .Sh NAME |
| 22 | .Nm man_alloc , |
| 23 | .Nm man_parseln , |
| 24 | .Nm man_endparse , |
| 25 | .Nm man_node , |
| 26 | .Nm man_meta , |
| 27 | .Nm man_free , |
| 28 | .Nm man_reset |
| 29 | .Nd man macro compiler library |
| 30 | .\" SECTION |
| 31 | .Sh SYNOPSIS |
| 32 | .In man.h |
| 33 | .Vt extern const char * const * man_macronames; |
| 34 | .Ft "struct man *" |
| 35 | .Fn man_alloc "void *data" "int pflags" "const struct man_cb *cb" |
| 36 | .Ft void |
| 37 | .Fn man_reset "struct man *man" |
| 38 | .Ft void |
| 39 | .Fn man_free "struct man *man" |
| 40 | .Ft int |
| 41 | .Fn man_parseln "struct man *man" "int line" "char *buf" |
| 42 | .Ft "const struct man_node *" |
| 43 | .Fn man_node "const struct man *man" |
| 44 | .Ft "const struct man_meta *" |
| 45 | .Fn man_meta "const struct man *man" |
| 46 | .Ft int |
| 47 | .Fn man_endparse "struct man *man" |
| 48 | .\" SECTION |
| 49 | .Sh DESCRIPTION |
| 50 | The |
| 51 | .Nm man |
| 52 | library parses lines of |
| 53 | .Xr man 7 |
| 54 | input (and |
| 55 | .Em only |
| 56 | man) into an abstract syntax tree (AST). |
| 57 | .\" PARAGRAPH |
| 58 | .Pp |
| 59 | In general, applications initiate a parsing sequence with |
| 60 | .Fn man_alloc , |
| 61 | parse each line in a document with |
| 62 | .Fn man_parseln , |
| 63 | close the parsing session with |
| 64 | .Fn man_endparse , |
| 65 | operate over the syntax tree returned by |
| 66 | .Fn man_node |
| 67 | and |
| 68 | .Fn man_meta , |
| 69 | then free all allocated memory with |
| 70 | .Fn man_free . |
| 71 | The |
| 72 | .Fn man_reset |
| 73 | function may be used in order to reset the parser for another input |
| 74 | sequence. See the |
| 75 | .Sx EXAMPLES |
| 76 | section for a full example. |
| 77 | .\" PARAGRAPH |
| 78 | .Pp |
| 79 | This section further defines the |
| 80 | .Sx Types , |
| 81 | .Sx Functions |
| 82 | and |
| 83 | .Sx Variables |
| 84 | available to programmers. Following that, the |
| 85 | .Sx Abstract Syntax Tree |
| 86 | section documents the output tree. |
| 87 | .\" SUBSECTION |
| 88 | .Ss Types |
| 89 | Both functions (see |
| 90 | .Sx Functions ) |
| 91 | and variables (see |
| 92 | .Sx Variables ) |
| 93 | may use the following types: |
| 94 | .Bl -ohang -offset "XXXX" |
| 95 | .\" LIST-ITEM |
| 96 | .It Vt struct man |
| 97 | An opaque type defined in |
| 98 | .Pa man.c . |
| 99 | Its values are only used privately within the library. |
| 100 | .\" LIST-ITEM |
| 101 | .It Vt struct man_cb |
| 102 | A set of message callbacks defined in |
| 103 | .Pa man.h . |
| 104 | .\" LIST-ITEM |
| 105 | .It Vt struct man_node |
| 106 | A parsed node. Defined in |
| 107 | .Pa man.h . |
| 108 | See |
| 109 | .Sx Abstract Syntax Tree |
| 110 | for details. |
| 111 | .El |
| 112 | .\" SUBSECTION |
| 113 | .Ss Functions |
| 114 | Function descriptions follow: |
| 115 | .Bl -ohang -offset "XXXX" |
| 116 | .\" LIST-ITEM |
| 117 | .It Fn man_alloc |
| 118 | Allocates a parsing structure. The |
| 119 | .Fa data |
| 120 | pointer is passed to callbacks in |
| 121 | .Fa cb , |
| 122 | which are documented further in the header file. |
| 123 | The |
| 124 | .Fa pflags |
| 125 | arguments are defined in |
| 126 | .Pa man.h . |
| 127 | Returns NULL on failure. If non-NULL, the pointer must be freed with |
| 128 | .Fn man_free . |
| 129 | .\" LIST-ITEM |
| 130 | .It Fn man_reset |
| 131 | Reset the parser for another parse routine. After its use, |
| 132 | .Fn man_parseln |
| 133 | behaves as if invoked for the first time. |
| 134 | .\" LIST-ITEM |
| 135 | .It Fn man_free |
| 136 | Free all resources of a parser. The pointer is no longer valid after |
| 137 | invocation. |
| 138 | .\" LIST-ITEM |
| 139 | .It Fn man_parseln |
| 140 | Parse a nil-terminated line of input. This line should not contain the |
| 141 | trailing newline. Returns 0 on failure, 1 on success. The input buffer |
| 142 | .Fa buf |
| 143 | is modified by this function. |
| 144 | .\" LIST-ITEM |
| 145 | .It Fn man_endparse |
| 146 | Signals that the parse is complete. Note that if |
| 147 | .Fn man_endparse |
| 148 | is called subsequent to |
| 149 | .Fn man_node , |
| 150 | the resulting tree is incomplete. Returns 0 on failure, 1 on success. |
| 151 | .\" LIST-ITEM |
| 152 | .It Fn man_node |
| 153 | Returns the first node of the parse. Note that if |
| 154 | .Fn man_parseln |
| 155 | or |
| 156 | .Fn man_endparse |
| 157 | return 0, the tree will be incomplete. |
| 158 | .It Fn man_meta |
| 159 | Returns the document's parsed meta-data. If this information has not |
| 160 | yet been supplied or |
| 161 | .Fn man_parseln |
| 162 | or |
| 163 | .Fn man_endparse |
| 164 | return 0, the data will be incomplete. |
| 165 | .El |
| 166 | .\" SUBSECTION |
| 167 | .Ss Variables |
| 168 | The following variables are also defined: |
| 169 | .Bl -ohang -offset "XXXX" |
| 170 | .\" LIST-ITEM |
| 171 | .It Va man_macronames |
| 172 | An array of string-ified token names. |
| 173 | .El |
| 174 | .\" SUBSECTION |
| 175 | .Ss Abstract Syntax Tree |
| 176 | The |
| 177 | .Nm |
| 178 | functions produce an abstract syntax tree (AST) describing input in a |
| 179 | regular form. It may be reviewed at any time with |
| 180 | .Fn man_nodes ; |
| 181 | however, if called before |
| 182 | .Fn man_endparse , |
| 183 | or after |
| 184 | .Fn man_endparse |
| 185 | or |
| 186 | .Fn man_parseln |
| 187 | fail, it may be incomplete. |
| 188 | .\" PARAGRAPH |
| 189 | .Pp |
| 190 | This AST is governed by the ontological |
| 191 | rules dictated in |
| 192 | .Xr man 7 |
| 193 | and derives its terminology accordingly. |
| 194 | .\" PARAGRAPH |
| 195 | .Pp |
| 196 | The AST is composed of |
| 197 | .Vt struct man_node |
| 198 | nodes with element, root and text types as declared |
| 199 | by the |
| 200 | .Va type |
| 201 | field. Each node also provides its parse point (the |
| 202 | .Va line , |
| 203 | .Va sec , |
| 204 | and |
| 205 | .Va pos |
| 206 | fields), its position in the tree (the |
| 207 | .Va parent , |
| 208 | .Va child , |
| 209 | .Va next |
| 210 | and |
| 211 | .Va prev |
| 212 | fields) and some type-specific data. |
| 213 | .\" PARAGRAPH |
| 214 | .Pp |
| 215 | The tree itself is arranged according to the following normal form, |
| 216 | where capitalised non-terminals represent nodes. |
| 217 | .Pp |
| 218 | .Bl -tag -width "ELEMENTXX" -compact -offset "XXXX" |
| 219 | .\" LIST-ITEM |
| 220 | .It ROOT |
| 221 | \(<- mnode+ |
| 222 | .It mnode |
| 223 | \(<- ELEMENT | TEXT | BLOCK |
| 224 | .It BLOCK |
| 225 | \(<- HEAD BODY |
| 226 | .It HEAD |
| 227 | \(<- mnode* |
| 228 | .It BODY |
| 229 | \(<- mnode* |
| 230 | .It ELEMENT |
| 231 | \(<- ELEMENT | TEXT* |
| 232 | .It TEXT |
| 233 | \(<- [[:alpha:]]* |
| 234 | .El |
| 235 | .\" PARAGRAPH |
| 236 | .Pp |
| 237 | The only elements capable of nesting other elements are those with |
| 238 | next-lint scope as documented in |
| 239 | .Xr man 7 . |
| 240 | .\" SECTION |
| 241 | .Sh EXAMPLES |
| 242 | The following example reads lines from stdin and parses them, operating |
| 243 | on the finished parse tree with |
| 244 | .Fn parsed . |
| 245 | Note that, if the last line of the file isn't newline-terminated, this |
| 246 | will truncate the file's last character (see |
| 247 | .Xr fgetln 3 ) . |
| 248 | Further, this example does not error-check nor free memory upon failure. |
| 249 | .Bd -literal -offset "XXXX" |
| 250 | struct man *man; |
| 251 | struct man_node *node; |
| 252 | char *buf; |
| 253 | size_t len; |
| 254 | int line; |
| 255 | |
| 256 | line = 1; |
| 257 | man = man_alloc(NULL, 0, NULL); |
| 258 | |
| 259 | while ((buf = fgetln(fp, &len))) { |
| 260 | buf[len - 1] = '\\0'; |
| 261 | if ( ! man_parseln(man, line, buf)) |
| 262 | errx(1, "man_parseln"); |
| 263 | line++; |
| 264 | } |
| 265 | |
| 266 | if ( ! man_endparse(man)) |
| 267 | errx(1, "man_endparse"); |
| 268 | if (NULL == (node = man_node(man))) |
| 269 | errx(1, "man_node"); |
| 270 | |
| 271 | parsed(man, node); |
| 272 | man_free(man); |
| 273 | .Ed |
| 274 | .\" SECTION |
| 275 | .Sh SEE ALSO |
| 276 | .Xr mandoc 1 , |
| 277 | .Xr man 7 |
| 278 | .\" SECTION |
| 279 | .Sh AUTHORS |
| 280 | The |
| 281 | .Nm |
| 282 | utility was written by |
| 283 | .An Kristaps Dzonsons Aq kristaps@kth.se . |