Import mdocml-1.11.3
[dragonfly.git] / contrib / mdocml / mandoc.3
CommitLineData
a4c7eb57 1.\" $Id: mandoc.3,v 1.10 2011/05/24 21:41:11 kristaps Exp $
60e1e752
SW
2.\"
3.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4.\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5.\"
6.\" Permission to use, copy, modify, and distribute this software for any
7.\" purpose with or without fee is hereby granted, provided that the above
8.\" copyright notice and this permission notice appear in all copies.
9.\"
10.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17.\"
a4c7eb57 18.Dd $Mdocdate: May 24 2011 $
60e1e752
SW
19.Dt MANDOC 3
20.Os
21.Sh NAME
22.Nm mandoc ,
a4c7eb57 23.Nm mandoc_escape ,
60e1e752
SW
24.Nm man_meta ,
25.Nm man_node ,
a4c7eb57
SW
26.Nm mchars_alloc ,
27.Nm mchars_free ,
28.Nm mchars_num2char ,
29.Nm mchars_num2uc ,
30.Nm mchars_spec2cp ,
31.Nm mchars_spec2str ,
60e1e752
SW
32.Nm mdoc_meta ,
33.Nm mdoc_node ,
34.Nm mparse_alloc ,
35.Nm mparse_free ,
36.Nm mparse_readfd ,
37.Nm mparse_reset ,
38.Nm mparse_result ,
39.Nm mparse_strerror ,
40.Nm mparse_strlevel
41.Nd mandoc macro compiler library
a4c7eb57
SW
42.Sh LIBRARY
43.Lb mandoc
60e1e752
SW
44.Sh SYNOPSIS
45.In man.h
46.In mdoc.h
47.In mandoc.h
a4c7eb57
SW
48.Ft "enum mandoc_esc"
49.Fo mandoc_escape
50.Fa "const char **in"
51.Fa "const char **seq"
52.Fa "int *len"
53.Fc
60e1e752
SW
54.Ft "const struct man_meta *"
55.Fo man_meta
56.Fa "const struct man *man"
57.Fc
58.Ft "const struct man_node *"
59.Fo man_node
60.Fa "const struct man *man"
61.Fc
a4c7eb57
SW
62.Ft "struct mchars *"
63.Fn mchars_alloc
64.Ft void
65.Fn mchars_free "struct mchars *p"
66.Ft char
67.Fn mchars_num2char "const char *cp" "size_t sz"
68.Ft int
69.Fn mchars_num2uc "const char *cp" "size_t sz"
70.Ft "const char *"
71.Fo mchars_spec2str
72.Fa "struct mchars *p"
73.Fa "const char *cp"
74.Fa "size_t sz"
75.Fa "size_t *rsz"
76.Fc
77.Ft int
78.Fo mchars_spec2cp
79.Fa "struct mchars *p"
80.Fa "const char *cp"
81.Fa "size_t sz"
82.Ft "const char *"
83.Fc
60e1e752
SW
84.Ft "const struct mdoc_meta *"
85.Fo mdoc_meta
86.Fa "const struct mdoc *mdoc"
87.Fc
88.Ft "const struct mdoc_node *"
89.Fo mdoc_node
90.Fa "const struct mdoc *mdoc"
91.Fc
92.Ft void
93.Fo mparse_alloc
94.Fa "enum mparset type"
95.Fa "enum mandoclevel wlevel"
96.Fa "mandocmsg msg"
97.Fa "void *msgarg"
98.Fc
99.Ft void
100.Fo mparse_free
101.Fa "struct mparse *parse"
102.Fc
103.Ft "enum mandoclevel"
104.Fo mparse_readfd
105.Fa "struct mparse *parse"
106.Fa "int fd"
107.Fa "const char *fname"
108.Fc
109.Ft void
110.Fo mparse_reset
111.Fa "struct mparse *parse"
112.Fc
113.Ft void
114.Fo mparse_result
115.Fa "struct mparse *parse"
116.Fa "struct mdoc **mdoc"
117.Fa "struct man **man"
118.Fc
119.Ft "const char *"
120.Fo mparse_strerror
121.Fa "enum mandocerr"
122.Fc
123.Ft "const char *"
124.Fo mparse_strlevel
125.Fa "enum mandoclevel"
126.Fc
127.Vt extern const char * const * man_macronames;
128.Vt extern const char * const * mdoc_argnames;
129.Vt extern const char * const * mdoc_macronames;
a4c7eb57
SW
130.Fd "#define ASCII_NBRSP"
131.Fd "#define ASCII_HYPH"
60e1e752
SW
132.Sh DESCRIPTION
133The
134.Nm mandoc
135library parses a
136.Ux
137manual into an abstract syntax tree (AST).
138.Ux
139manuals are composed of
140.Xr mdoc 7
141or
142.Xr man 7 ,
143and may be mixed with
144.Xr roff 7 ,
145.Xr tbl 7 ,
146and
147.Xr eqn 7
148invocations.
149.Pp
150The following describes a general parse sequence:
151.Bl -enum
152.It
153initiate a parsing sequence with
154.Fn mparse_alloc ;
155.It
156parse files or file descriptors with
157.Fn mparse_readfd ;
158.It
159retrieve a parsed syntax tree, if the parse was successful, with
160.Fn mparse_result ;
161.It
162iterate over parse nodes with
163.Fn mdoc_node
164or
165.Fn man_node ;
166.It
167free all allocated memory with
168.Fn mparse_free ,
169or invoke
170.Fn mparse_reset
171and parse new files.
172.El
a4c7eb57
SW
173.Pp
174The
175.Nm
176library also contains routines for translating character strings into glyphs
177.Pq see Fn mchars_alloc
178and parsing escape sequences from strings
179.Pq see Fn mandoc_escape .
180.Pp
181This library is
182.Ud
183.Sh REFERENCE
184This section documents the functions, types, and variables available
185via
186.In mandoc.h .
187.Ss Types
188.Bl -ohang
189.It Vt "enum mandoc_esc"
190.It Vt "enum mandocerr"
191.It Vt "enum mandoclevel"
192.It Vt "struct mchars"
193An opaque pointer to an object allowing for translation between
194character strings and glyphs.
195See
196.Fn mchars_alloc .
197.It Vt "enum mparset"
198.It Vt "struct mparse"
199.It Vt "mandocmsg"
200.El
201.Ss Functions
202.Bl -ohang
203.It Fn mandoc_escape
204Scan an escape sequence, i.e., a character string beginning with
205.Sq \e .
206Pass a pointer to this string as
207.Va end ;
208it will be set to the supremum of the parsed escape sequence unless
209returning ESCAPE_ERROR, in which case the string is bogus and should be
210thrown away.
211If not ESCAPE_ERROR or ESCAPE_IGNORE,
212.Va start
213is set to the first relevant character of the substring (font, glyph,
214whatever) of length
215.Va sz .
216Both
217.Va start
218and
219.Va sz
220may be NULL.
221.It Fn man_meta
222Obtain the meta-data of a successful parse.
223This may only be used on a pointer returned by
224.Fn mparse_result .
225.It Fn man_node
226Obtain the root node of a successful parse.
227This may only be used on a pointer returned by
228.Fn mparse_result .
229.It Fn mchars_alloc
230Allocate an
231.Vt "struct mchars *"
232object for translating special characters into glyphs.
233See
234.Xr mandoc_char 7
235for an overview of special characters.
236The object must be freed with
237.Fn mchars_free .
238.It Fn mchars_free
239Free an object created with
240.Fn mchars_alloc .
241.It Fn mchars_num2char
242Convert a character index (e.g., the \eN\(aq\(aq escape) into a
243printable ASCII character.
244Returns \e0 (the nil character) if the input sequence is malformed.
245.It Fn mchars_num2uc
246Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into
247a Unicode codepoint.
248Returns \e0 (the nil character) if the input sequence is malformed.
249.It Fn mchars_spec2cp
250Convert a special character into a valid Unicode codepoint.
251Returns \-1 on failure or a non-zero Unicode codepoint on success.
252.It Fn mchars_spec2str
253Convert a special character into an ASCII string.
254Returns NULL on failure.
255.It Fn mdoc_meta
256Obtain the meta-data of a successful parse.
257This may only be used on a pointer returned by
258.Fn mparse_result .
259.It Fn mdoc_node
260Obtain the root node of a successful parse.
261This may only be used on a pointer returned by
262.Fn mparse_result .
263.It Fn mparse_alloc
264Allocate a parser.
265The same parser may be used for multiple files so long as
266.Fn mparse_reset
267is called between parses.
268.Fn mparse_free
269must be called to free the memory allocated by this function.
270.It Fn mparse_free
271Free all memory allocated by
272.Fn mparse_alloc .
273.It Fn mparse_readfd
274Parse a file or file descriptor.
275If
276.Va fd
277is -1,
278.Va fname
279is opened for reading.
280Otherwise,
281.Va fname
282is assumed to be the name associated with
283.Va fd .
284This may be called multiple times with different parameters; however,
285.Fn mparse_reset
286should be invoked between parses.
287.It Fn mparse_reset
288Reset a parser so that
289.Fn mparse_readfd
290may be used again.
291.It Fn mparse_result
292Obtain the result of a parse.
293Only successful parses
294.Po
295i.e., those where
296.Fn mparse_readfd
297returned less than MANDOCLEVEL_FATAL
298.Pc
299should invoke this function, in which case one of the two pointers will
300be filled in.
301.It Fn mparse_strerror
302Return a statically-allocated string representation of an error code.
303.It Fn mparse_strlevel
304Return a statically-allocated string representation of a level code.
305.El
306.Ss Variables
307.Bl -ohang
308.It Va man_macronames
309The string representation of a man macro as indexed by
310.Vt "enum mant" .
311.It Va mdoc_argnames
312The string representation of a mdoc macro argument as indexed by
313.Vt "enum mdocargt" .
314.It Va mdoc_macronames
315The string representation of a mdoc macro as indexed by
316.Vt "enum mdoct" .
317.El
60e1e752
SW
318.Sh IMPLEMENTATION NOTES
319This section consists of structural documentation for
320.Xr mdoc 7
321and
322.Xr man 7
323syntax trees.
324.Ss Man Abstract Syntax Tree
325This AST is governed by the ontological rules dictated in
326.Xr man 7
327and derives its terminology accordingly.
328.Pp
329The AST is composed of
330.Vt struct man_node
331nodes with element, root and text types as declared by the
332.Va type
333field.
334Each node also provides its parse point (the
335.Va line ,
336.Va sec ,
337and
338.Va pos
339fields), its position in the tree (the
340.Va parent ,
341.Va child ,
342.Va next
343and
344.Va prev
345fields) and some type-specific data.
346.Pp
347The tree itself is arranged according to the following normal form,
348where capitalised non-terminals represent nodes.
349.Pp
350.Bl -tag -width "ELEMENTXX" -compact
351.It ROOT
352\(<- mnode+
353.It mnode
354\(<- ELEMENT | TEXT | BLOCK
355.It BLOCK
356\(<- HEAD BODY
357.It HEAD
358\(<- mnode*
359.It BODY
360\(<- mnode*
361.It ELEMENT
362\(<- ELEMENT | TEXT*
363.It TEXT
364\(<- [[:alpha:]]*
365.El
366.Pp
367The only elements capable of nesting other elements are those with
368next-lint scope as documented in
369.Xr man 7 .
370.Ss Mdoc Abstract Syntax Tree
371This AST is governed by the ontological
372rules dictated in
373.Xr mdoc 7
374and derives its terminology accordingly.
375.Qq In-line
376elements described in
377.Xr mdoc 7
378are described simply as
379.Qq elements .
380.Pp
381The AST is composed of
382.Vt struct mdoc_node
383nodes with block, head, body, element, root and text types as declared
384by the
385.Va type
386field.
387Each node also provides its parse point (the
388.Va line ,
389.Va sec ,
390and
391.Va pos
392fields), its position in the tree (the
393.Va parent ,
394.Va child ,
395.Va nchild ,
396.Va next
397and
398.Va prev
399fields) and some type-specific data, in particular, for nodes generated
400from macros, the generating macro in the
401.Va tok
402field.
403.Pp
404The tree itself is arranged according to the following normal form,
405where capitalised non-terminals represent nodes.
406.Pp
407.Bl -tag -width "ELEMENTXX" -compact
408.It ROOT
409\(<- mnode+
410.It mnode
411\(<- BLOCK | ELEMENT | TEXT
412.It BLOCK
413\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
414.It ELEMENT
415\(<- TEXT*
416.It HEAD
417\(<- mnode*
418.It BODY
419\(<- mnode* [ENDBODY mnode*]
420.It TAIL
421\(<- mnode*
422.It TEXT
423\(<- [[:printable:],0x1e]*
424.El
425.Pp
426Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
427the BLOCK production: these refer to punctuation marks.
428Furthermore, although a TEXT node will generally have a non-zero-length
429string, in the specific case of
430.Sq \&.Bd \-literal ,
431an empty line will produce a zero-length string.
432Multiple body parts are only found in invocations of
433.Sq \&Bl \-column ,
434where a new body introduces a new phrase.
435.Pp
436The
437.Xr mdoc 7
a4c7eb57 438syntax tree accommodates for broken block structures as well.
60e1e752
SW
439The ENDBODY node is available to end the formatting associated
440with a given block before the physical end of that block.
441It has a non-null
442.Va end
443field, is of the BODY
444.Va type ,
445has the same
446.Va tok
447as the BLOCK it is ending, and has a
448.Va pending
449field pointing to that BLOCK's BODY node.
450It is an indirect child of that BODY node
451and has no children of its own.
452.Pp
453An ENDBODY node is generated when a block ends while one of its child
454blocks is still open, like in the following example:
455.Bd -literal -offset indent
456\&.Ao ao
457\&.Bo bo ac
458\&.Ac bc
459\&.Bc end
460.Ed
461.Pp
462This example results in the following block structure:
463.Bd -literal -offset indent
464BLOCK Ao
465 HEAD Ao
466 BODY Ao
467 TEXT ao
468 BLOCK Bo, pending -> Ao
469 HEAD Bo
470 BODY Bo
471 TEXT bo
472 TEXT ac
473 ENDBODY Ao, pending -> Ao
474 TEXT bc
475TEXT end
476.Ed
477.Pp
478Here, the formatting of the
479.Sq \&Ao
480block extends from TEXT ao to TEXT ac,
481while the formatting of the
482.Sq \&Bo
483block extends from TEXT bo to TEXT bc.
484It renders as follows in
485.Fl T Ns Cm ascii
486mode:
487.Pp
488.Dl <ao [bo ac> bc] end
489.Pp
490Support for badly-nested blocks is only provided for backward
491compatibility with some older
492.Xr mdoc 7
493implementations.
494Using badly-nested blocks is
495.Em strongly discouraged ;
496for example, the
497.Fl T Ns Cm html
498and
499.Fl T Ns Cm xhtml
500front-ends to
501.Xr mandoc 1
502are unable to render them in any meaningful way.
503Furthermore, behaviour when encountering badly-nested blocks is not
504consistent across troff implementations, especially when using multiple
505levels of badly-nested blocks.
506.Sh SEE ALSO
507.Xr mandoc 1 ,
508.Xr eqn 7 ,
509.Xr man 7 ,
a4c7eb57 510.Xr mandoc_char 7 ,
60e1e752
SW
511.Xr mdoc 7 ,
512.Xr roff 7 ,
513.Xr tbl 7
514.Sh AUTHORS
515The
516.Nm
517library was written by
518.An Kristaps Dzonsons Aq kristaps@bsd.lv .