Import mdocml-1.12.1
[dragonfly.git] / contrib / mdocml / mandoc.3
CommitLineData
36342e81 1.\" $Id: mandoc.3,v 1.17 2012/01/13 15:27:14 joerg Exp $
60e1e752
SW
2.\"
3.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4.\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5.\"
6.\" Permission to use, copy, modify, and distribute this software for any
7.\" purpose with or without fee is hereby granted, provided that the above
8.\" copyright notice and this permission notice appear in all copies.
9.\"
10.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17.\"
36342e81 18.Dd $Mdocdate: January 13 2012 $
60e1e752
SW
19.Dt MANDOC 3
20.Os
21.Sh NAME
22.Nm mandoc ,
a4c7eb57 23.Nm mandoc_escape ,
60e1e752 24.Nm man_meta ,
36342e81 25.Nm man_mparse ,
60e1e752 26.Nm man_node ,
a4c7eb57
SW
27.Nm mchars_alloc ,
28.Nm mchars_free ,
29.Nm mchars_num2char ,
30.Nm mchars_num2uc ,
31.Nm mchars_spec2cp ,
32.Nm mchars_spec2str ,
60e1e752
SW
33.Nm mdoc_meta ,
34.Nm mdoc_node ,
35.Nm mparse_alloc ,
36.Nm mparse_free ,
36342e81
SW
37.Nm mparse_getkeep ,
38.Nm mparse_keep ,
60e1e752
SW
39.Nm mparse_readfd ,
40.Nm mparse_reset ,
41.Nm mparse_result ,
42.Nm mparse_strerror ,
43.Nm mparse_strlevel
44.Nd mandoc macro compiler library
a4c7eb57
SW
45.Sh LIBRARY
46.Lb mandoc
60e1e752
SW
47.Sh SYNOPSIS
48.In man.h
49.In mdoc.h
50.In mandoc.h
a4c7eb57
SW
51.Ft "enum mandoc_esc"
52.Fo mandoc_escape
36342e81
SW
53.Fa "const char **end"
54.Fa "const char **start"
55.Fa "int *sz"
a4c7eb57 56.Fc
60e1e752
SW
57.Ft "const struct man_meta *"
58.Fo man_meta
59.Fa "const struct man *man"
60.Fc
36342e81
SW
61.Ft "const struct mparse *"
62.Fo man_mparse
63.Fa "const struct man *man"
64.Fc
60e1e752
SW
65.Ft "const struct man_node *"
66.Fo man_node
67.Fa "const struct man *man"
68.Fc
a4c7eb57
SW
69.Ft "struct mchars *"
70.Fn mchars_alloc
71.Ft void
72.Fn mchars_free "struct mchars *p"
73.Ft char
74.Fn mchars_num2char "const char *cp" "size_t sz"
75.Ft int
76.Fn mchars_num2uc "const char *cp" "size_t sz"
77.Ft "const char *"
78.Fo mchars_spec2str
36342e81 79.Fa "const struct mchars *p"
a4c7eb57
SW
80.Fa "const char *cp"
81.Fa "size_t sz"
82.Fa "size_t *rsz"
83.Fc
84.Ft int
85.Fo mchars_spec2cp
36342e81 86.Fa "const struct mchars *p"
a4c7eb57
SW
87.Fa "const char *cp"
88.Fa "size_t sz"
89.Ft "const char *"
90.Fc
60e1e752
SW
91.Ft "const struct mdoc_meta *"
92.Fo mdoc_meta
93.Fa "const struct mdoc *mdoc"
94.Fc
95.Ft "const struct mdoc_node *"
96.Fo mdoc_node
97.Fa "const struct mdoc *mdoc"
98.Fc
99.Ft void
100.Fo mparse_alloc
101.Fa "enum mparset type"
102.Fa "enum mandoclevel wlevel"
103.Fa "mandocmsg msg"
104.Fa "void *msgarg"
105.Fc
106.Ft void
107.Fo mparse_free
108.Fa "struct mparse *parse"
109.Fc
36342e81
SW
110.Ft void
111.Fo mparse_getkeep
112.Fa "const struct mparse *parse"
113.Fc
114.Ft void
115.Fo mparse_keep
116.Fa "struct mparse *parse"
117.Fc
60e1e752
SW
118.Ft "enum mandoclevel"
119.Fo mparse_readfd
120.Fa "struct mparse *parse"
121.Fa "int fd"
122.Fa "const char *fname"
123.Fc
124.Ft void
125.Fo mparse_reset
126.Fa "struct mparse *parse"
127.Fc
128.Ft void
129.Fo mparse_result
130.Fa "struct mparse *parse"
131.Fa "struct mdoc **mdoc"
132.Fa "struct man **man"
133.Fc
134.Ft "const char *"
135.Fo mparse_strerror
136.Fa "enum mandocerr"
137.Fc
138.Ft "const char *"
139.Fo mparse_strlevel
140.Fa "enum mandoclevel"
141.Fc
142.Vt extern const char * const * man_macronames;
143.Vt extern const char * const * mdoc_argnames;
144.Vt extern const char * const * mdoc_macronames;
a4c7eb57
SW
145.Fd "#define ASCII_NBRSP"
146.Fd "#define ASCII_HYPH"
60e1e752
SW
147.Sh DESCRIPTION
148The
149.Nm mandoc
150library parses a
151.Ux
152manual into an abstract syntax tree (AST).
153.Ux
154manuals are composed of
155.Xr mdoc 7
156or
157.Xr man 7 ,
158and may be mixed with
159.Xr roff 7 ,
160.Xr tbl 7 ,
161and
162.Xr eqn 7
163invocations.
164.Pp
165The following describes a general parse sequence:
166.Bl -enum
167.It
168initiate a parsing sequence with
169.Fn mparse_alloc ;
170.It
171parse files or file descriptors with
172.Fn mparse_readfd ;
173.It
174retrieve a parsed syntax tree, if the parse was successful, with
175.Fn mparse_result ;
176.It
177iterate over parse nodes with
178.Fn mdoc_node
179or
180.Fn man_node ;
181.It
182free all allocated memory with
183.Fn mparse_free ,
184or invoke
185.Fn mparse_reset
186and parse new files.
187.El
a4c7eb57
SW
188.Pp
189The
190.Nm
191library also contains routines for translating character strings into glyphs
192.Pq see Fn mchars_alloc
193and parsing escape sequences from strings
194.Pq see Fn mandoc_escape .
a4c7eb57
SW
195.Sh REFERENCE
196This section documents the functions, types, and variables available
197via
198.In mandoc.h .
199.Ss Types
200.Bl -ohang
201.It Vt "enum mandoc_esc"
36342e81 202An escape sequence classification.
a4c7eb57 203.It Vt "enum mandocerr"
36342e81 204A fatal error, error, or warning message during parsing.
a4c7eb57 205.It Vt "enum mandoclevel"
36342e81
SW
206A classification of an
207.Vt "enum mandoclevel"
208as regards system operation.
a4c7eb57
SW
209.It Vt "struct mchars"
210An opaque pointer to an object allowing for translation between
211character strings and glyphs.
212See
213.Fn mchars_alloc .
214.It Vt "enum mparset"
36342e81
SW
215The type of parser when reading input.
216This should usually be
217.Dv MPARSE_AUTO
218for auto-detection.
a4c7eb57 219.It Vt "struct mparse"
36342e81
SW
220An opaque pointer to a running parse sequence.
221Created with
222.Fn mparse_alloc
223and freed with
224.Fn mparse_free .
225This may be used across parsed input if
226.Fn mparse_reset
227is called between parses.
a4c7eb57 228.It Vt "mandocmsg"
36342e81
SW
229A prototype for a function to handle fatal error, error, and warning
230messages emitted by the parser.
a4c7eb57
SW
231.El
232.Ss Functions
233.Bl -ohang
234.It Fn mandoc_escape
235Scan an escape sequence, i.e., a character string beginning with
236.Sq \e .
36342e81
SW
237Pass a pointer to the character after the
238.Sq \e
239as
a4c7eb57
SW
240.Va end ;
241it will be set to the supremum of the parsed escape sequence unless
36342e81
SW
242returning
243.Dv ESCAPE_ERROR ,
244in which case the string is bogus and should be
a4c7eb57 245thrown away.
36342e81
SW
246If not
247.Dv ESCAPE_ERROR
248or
249.Dv ESCAPE_IGNORE ,
a4c7eb57
SW
250.Va start
251is set to the first relevant character of the substring (font, glyph,
252whatever) of length
253.Va sz .
254Both
255.Va start
256and
257.Va sz
36342e81
SW
258may be
259.Dv NULL .
a4c7eb57
SW
260.It Fn man_meta
261Obtain the meta-data of a successful parse.
262This may only be used on a pointer returned by
263.Fn mparse_result .
36342e81
SW
264.It Fn man_mparse
265Get the parser used for the current output.
a4c7eb57
SW
266.It Fn man_node
267Obtain the root node of a successful parse.
268This may only be used on a pointer returned by
269.Fn mparse_result .
270.It Fn mchars_alloc
271Allocate an
272.Vt "struct mchars *"
273object for translating special characters into glyphs.
274See
275.Xr mandoc_char 7
276for an overview of special characters.
277The object must be freed with
278.Fn mchars_free .
279.It Fn mchars_free
280Free an object created with
281.Fn mchars_alloc .
282.It Fn mchars_num2char
283Convert a character index (e.g., the \eN\(aq\(aq escape) into a
284printable ASCII character.
285Returns \e0 (the nil character) if the input sequence is malformed.
286.It Fn mchars_num2uc
287Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into
288a Unicode codepoint.
289Returns \e0 (the nil character) if the input sequence is malformed.
290.It Fn mchars_spec2cp
291Convert a special character into a valid Unicode codepoint.
292Returns \-1 on failure or a non-zero Unicode codepoint on success.
293.It Fn mchars_spec2str
294Convert a special character into an ASCII string.
36342e81
SW
295Returns
296.Dv NULL
297on failure.
a4c7eb57
SW
298.It Fn mdoc_meta
299Obtain the meta-data of a successful parse.
300This may only be used on a pointer returned by
301.Fn mparse_result .
302.It Fn mdoc_node
303Obtain the root node of a successful parse.
304This may only be used on a pointer returned by
305.Fn mparse_result .
306.It Fn mparse_alloc
307Allocate a parser.
308The same parser may be used for multiple files so long as
309.Fn mparse_reset
310is called between parses.
311.Fn mparse_free
312must be called to free the memory allocated by this function.
313.It Fn mparse_free
314Free all memory allocated by
315.Fn mparse_alloc .
36342e81
SW
316.It Fn mparse_getkeep
317Acquire the keep buffer.
318Must follow a call of
319.Fn mparse_keep .
320.It Fn mparse_keep
321Instruct the parser to retain a copy of its parsed input.
322This can be acquired with subsequent
323.Fn mparse_getkeep
324calls.
a4c7eb57
SW
325.It Fn mparse_readfd
326Parse a file or file descriptor.
327If
328.Va fd
329is -1,
330.Va fname
331is opened for reading.
332Otherwise,
333.Va fname
334is assumed to be the name associated with
335.Va fd .
336This may be called multiple times with different parameters; however,
337.Fn mparse_reset
338should be invoked between parses.
339.It Fn mparse_reset
340Reset a parser so that
341.Fn mparse_readfd
342may be used again.
343.It Fn mparse_result
344Obtain the result of a parse.
345Only successful parses
346.Po
347i.e., those where
348.Fn mparse_readfd
349returned less than MANDOCLEVEL_FATAL
350.Pc
351should invoke this function, in which case one of the two pointers will
352be filled in.
353.It Fn mparse_strerror
354Return a statically-allocated string representation of an error code.
355.It Fn mparse_strlevel
356Return a statically-allocated string representation of a level code.
357.El
358.Ss Variables
359.Bl -ohang
360.It Va man_macronames
361The string representation of a man macro as indexed by
362.Vt "enum mant" .
363.It Va mdoc_argnames
364The string representation of a mdoc macro argument as indexed by
365.Vt "enum mdocargt" .
366.It Va mdoc_macronames
367The string representation of a mdoc macro as indexed by
368.Vt "enum mdoct" .
369.El
60e1e752
SW
370.Sh IMPLEMENTATION NOTES
371This section consists of structural documentation for
372.Xr mdoc 7
373and
374.Xr man 7
36342e81
SW
375syntax trees and strings.
376.Ss Man and Mdoc Strings
377Strings may be extracted from mdoc and man meta-data, or from text
378nodes (MDOC_TEXT and MAN_TEXT, respectively).
379These strings have special non-printing formatting cues embedded in the
380text itself, as well as
381.Xr roff 7
382escapes preserved from input.
383Implementing systems will need to handle both situations to produce
384human-readable text.
385In general, strings may be assumed to consist of 7-bit ASCII characters.
386.Pp
387The following non-printing characters may be embedded in text strings:
388.Bl -tag -width Ds
389.It Dv ASCII_NBRSP
390A non-breaking space character.
391.It Dv ASCII_HYPH
392A soft hyphen.
393.El
394.Pp
395Escape characters are also passed verbatim into text strings.
396An escape character is a sequence of characters beginning with the
397backslash
398.Pq Sq \e .
399To construct human-readable text, these should be intercepted with
400.Fn mandoc_escape
401and converted with one of
402.Fn mchars_num2char ,
403.Fn mchars_spec2str ,
404and so on.
60e1e752
SW
405.Ss Man Abstract Syntax Tree
406This AST is governed by the ontological rules dictated in
407.Xr man 7
408and derives its terminology accordingly.
409.Pp
410The AST is composed of
411.Vt struct man_node
412nodes with element, root and text types as declared by the
413.Va type
414field.
415Each node also provides its parse point (the
416.Va line ,
417.Va sec ,
418and
419.Va pos
420fields), its position in the tree (the
421.Va parent ,
422.Va child ,
423.Va next
424and
425.Va prev
426fields) and some type-specific data.
427.Pp
428The tree itself is arranged according to the following normal form,
429where capitalised non-terminals represent nodes.
430.Pp
431.Bl -tag -width "ELEMENTXX" -compact
432.It ROOT
433\(<- mnode+
434.It mnode
435\(<- ELEMENT | TEXT | BLOCK
436.It BLOCK
437\(<- HEAD BODY
438.It HEAD
439\(<- mnode*
440.It BODY
441\(<- mnode*
442.It ELEMENT
443\(<- ELEMENT | TEXT*
444.It TEXT
36342e81 445\(<- [[:ascii:]]*
60e1e752
SW
446.El
447.Pp
448The only elements capable of nesting other elements are those with
449next-lint scope as documented in
450.Xr man 7 .
451.Ss Mdoc Abstract Syntax Tree
452This AST is governed by the ontological
453rules dictated in
454.Xr mdoc 7
455and derives its terminology accordingly.
456.Qq In-line
457elements described in
458.Xr mdoc 7
459are described simply as
460.Qq elements .
461.Pp
462The AST is composed of
463.Vt struct mdoc_node
464nodes with block, head, body, element, root and text types as declared
465by the
466.Va type
467field.
468Each node also provides its parse point (the
469.Va line ,
470.Va sec ,
471and
472.Va pos
473fields), its position in the tree (the
474.Va parent ,
475.Va child ,
476.Va nchild ,
477.Va next
478and
479.Va prev
480fields) and some type-specific data, in particular, for nodes generated
481from macros, the generating macro in the
482.Va tok
483field.
484.Pp
485The tree itself is arranged according to the following normal form,
486where capitalised non-terminals represent nodes.
487.Pp
488.Bl -tag -width "ELEMENTXX" -compact
489.It ROOT
490\(<- mnode+
491.It mnode
492\(<- BLOCK | ELEMENT | TEXT
493.It BLOCK
494\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
495.It ELEMENT
496\(<- TEXT*
497.It HEAD
498\(<- mnode*
499.It BODY
500\(<- mnode* [ENDBODY mnode*]
501.It TAIL
502\(<- mnode*
503.It TEXT
36342e81 504\(<- [[:ascii:]]*
60e1e752
SW
505.El
506.Pp
507Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
508the BLOCK production: these refer to punctuation marks.
509Furthermore, although a TEXT node will generally have a non-zero-length
510string, in the specific case of
511.Sq \&.Bd \-literal ,
512an empty line will produce a zero-length string.
513Multiple body parts are only found in invocations of
514.Sq \&Bl \-column ,
515where a new body introduces a new phrase.
516.Pp
517The
518.Xr mdoc 7
a4c7eb57 519syntax tree accommodates for broken block structures as well.
60e1e752
SW
520The ENDBODY node is available to end the formatting associated
521with a given block before the physical end of that block.
522It has a non-null
523.Va end
524field, is of the BODY
525.Va type ,
526has the same
527.Va tok
528as the BLOCK it is ending, and has a
529.Va pending
530field pointing to that BLOCK's BODY node.
531It is an indirect child of that BODY node
532and has no children of its own.
533.Pp
534An ENDBODY node is generated when a block ends while one of its child
535blocks is still open, like in the following example:
536.Bd -literal -offset indent
537\&.Ao ao
538\&.Bo bo ac
539\&.Ac bc
540\&.Bc end
541.Ed
542.Pp
543This example results in the following block structure:
544.Bd -literal -offset indent
545BLOCK Ao
546 HEAD Ao
547 BODY Ao
548 TEXT ao
549 BLOCK Bo, pending -> Ao
550 HEAD Bo
551 BODY Bo
552 TEXT bo
553 TEXT ac
554 ENDBODY Ao, pending -> Ao
555 TEXT bc
556TEXT end
557.Ed
558.Pp
559Here, the formatting of the
560.Sq \&Ao
561block extends from TEXT ao to TEXT ac,
562while the formatting of the
563.Sq \&Bo
564block extends from TEXT bo to TEXT bc.
565It renders as follows in
566.Fl T Ns Cm ascii
567mode:
568.Pp
569.Dl <ao [bo ac> bc] end
570.Pp
571Support for badly-nested blocks is only provided for backward
572compatibility with some older
573.Xr mdoc 7
574implementations.
575Using badly-nested blocks is
576.Em strongly discouraged ;
577for example, the
578.Fl T Ns Cm html
579and
580.Fl T Ns Cm xhtml
581front-ends to
582.Xr mandoc 1
583are unable to render them in any meaningful way.
584Furthermore, behaviour when encountering badly-nested blocks is not
585consistent across troff implementations, especially when using multiple
586levels of badly-nested blocks.
587.Sh SEE ALSO
588.Xr mandoc 1 ,
589.Xr eqn 7 ,
590.Xr man 7 ,
a4c7eb57 591.Xr mandoc_char 7 ,
60e1e752
SW
592.Xr mdoc 7 ,
593.Xr roff 7 ,
594.Xr tbl 7
595.Sh AUTHORS
596The
597.Nm
598library was written by
36342e81
SW
599.An Kristaps Dzonsons ,
600.Mt kristaps@bsd.lv .