Import mdocml-1.13.1
[dragonfly.git] / contrib / mdocml / mandoc.3
CommitLineData
070c62a6 1.\" $Id: mandoc.3,v 1.25 2014/08/05 05:48:56 schwarze Exp $
60e1e752
SW
2.\"
3.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4.\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5.\"
6.\" Permission to use, copy, modify, and distribute this software for any
7.\" purpose with or without fee is hereby granted, provided that the above
8.\" copyright notice and this permission notice appear in all copies.
9.\"
10.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17.\"
070c62a6 18.Dd $Mdocdate: August 5 2014 $
60e1e752
SW
19.Dt MANDOC 3
20.Os
21.Sh NAME
22.Nm mandoc ,
070c62a6 23.Nm man_deroff ,
60e1e752 24.Nm man_meta ,
36342e81 25.Nm man_mparse ,
60e1e752 26.Nm man_node ,
070c62a6 27.Nm mdoc_deroff ,
60e1e752
SW
28.Nm mdoc_meta ,
29.Nm mdoc_node ,
30.Nm mparse_alloc ,
31.Nm mparse_free ,
36342e81
SW
32.Nm mparse_getkeep ,
33.Nm mparse_keep ,
60e1e752
SW
34.Nm mparse_readfd ,
35.Nm mparse_reset ,
36.Nm mparse_result ,
37.Nm mparse_strerror ,
38.Nm mparse_strlevel
39.Nd mandoc macro compiler library
a4c7eb57 40.Sh LIBRARY
7888c61d 41.Lb libmandoc
60e1e752 42.Sh SYNOPSIS
070c62a6 43.In sys/types.h
60e1e752 44.In mandoc.h
070c62a6
FF
45.Fd "#define ASCII_NBRSP"
46.Fd "#define ASCII_HYPH"
47.Fd "#define ASCII_BREAK"
48.Ft struct mparse *
60e1e752 49.Fo mparse_alloc
070c62a6 50.Fa "int options"
60e1e752 51.Fa "enum mandoclevel wlevel"
070c62a6
FF
52.Fa "mandocmsg mmsg"
53.Fa "char *defos"
54.Fc
55.Ft void
56.Fo (*mandocmsg)
57.Fa "enum mandocerr errtype"
58.Fa "enum mandoclevel level"
59.Fa "const char *file"
60.Fa "int line"
61.Fa "int col"
62.Fa "const char *msg"
60e1e752
SW
63.Fc
64.Ft void
65.Fo mparse_free
66.Fa "struct mparse *parse"
67.Fc
070c62a6 68.Ft const char *
36342e81
SW
69.Fo mparse_getkeep
70.Fa "const struct mparse *parse"
71.Fc
72.Ft void
73.Fo mparse_keep
74.Fa "struct mparse *parse"
75.Fc
60e1e752
SW
76.Ft "enum mandoclevel"
77.Fo mparse_readfd
78.Fa "struct mparse *parse"
79.Fa "int fd"
80.Fa "const char *fname"
81.Fc
82.Ft void
83.Fo mparse_reset
84.Fa "struct mparse *parse"
85.Fc
86.Ft void
87.Fo mparse_result
88.Fa "struct mparse *parse"
89.Fa "struct mdoc **mdoc"
90.Fa "struct man **man"
070c62a6 91.Fa "char **sodest"
60e1e752
SW
92.Fc
93.Ft "const char *"
94.Fo mparse_strerror
95.Fa "enum mandocerr"
96.Fc
97.Ft "const char *"
98.Fo mparse_strlevel
99.Fa "enum mandoclevel"
100.Fc
070c62a6
FF
101.In sys/types.h
102.In mandoc.h
103.In mdoc.h
104.Ft void
105.Fo mdoc_deroff
106.Fa "char **dest"
107.Fa "const struct mdoc_node *node"
108.Fc
109.Ft "const struct mdoc_meta *"
110.Fo mdoc_meta
111.Fa "const struct mdoc *mdoc"
112.Fc
113.Ft "const struct mdoc_node *"
114.Fo mdoc_node
115.Fa "const struct mdoc *mdoc"
116.Fc
60e1e752
SW
117.Vt extern const char * const * mdoc_argnames;
118.Vt extern const char * const * mdoc_macronames;
070c62a6
FF
119.In sys/types.h
120.In mandoc.h
121.In man.h
122.Ft void
123.Fo man_deroff
124.Fa "char **dest"
125.Fa "const struct man_node *node"
126.Fc
127.Ft "const struct man_meta *"
128.Fo man_meta
129.Fa "const struct man *man"
130.Fc
131.Ft "const struct mparse *"
132.Fo man_mparse
133.Fa "const struct man *man"
134.Fc
135.Ft "const struct man_node *"
136.Fo man_node
137.Fa "const struct man *man"
138.Fc
139.Vt extern const char * const * man_macronames;
60e1e752
SW
140.Sh DESCRIPTION
141The
142.Nm mandoc
143library parses a
144.Ux
145manual into an abstract syntax tree (AST).
146.Ux
147manuals are composed of
148.Xr mdoc 7
149or
150.Xr man 7 ,
151and may be mixed with
152.Xr roff 7 ,
153.Xr tbl 7 ,
154and
155.Xr eqn 7
156invocations.
157.Pp
158The following describes a general parse sequence:
159.Bl -enum
160.It
161initiate a parsing sequence with
162.Fn mparse_alloc ;
163.It
164parse files or file descriptors with
165.Fn mparse_readfd ;
166.It
167retrieve a parsed syntax tree, if the parse was successful, with
168.Fn mparse_result ;
169.It
170iterate over parse nodes with
171.Fn mdoc_node
172or
173.Fn man_node ;
174.It
175free all allocated memory with
176.Fn mparse_free ,
177or invoke
178.Fn mparse_reset
179and parse new files.
180.El
a4c7eb57
SW
181.Sh REFERENCE
182This section documents the functions, types, and variables available
183via
070c62a6
FF
184.In mandoc.h ,
185with the exception of those documented in
186.Xr mandoc_escape 3
187and
188.Xr mchars_alloc 3 .
a4c7eb57
SW
189.Ss Types
190.Bl -ohang
a4c7eb57 191.It Vt "enum mandocerr"
36342e81 192A fatal error, error, or warning message during parsing.
a4c7eb57 193.It Vt "enum mandoclevel"
36342e81 194A classification of an
070c62a6 195.Vt "enum mandocerr"
36342e81 196as regards system operation.
a4c7eb57 197.It Vt "struct mparse"
36342e81
SW
198An opaque pointer to a running parse sequence.
199Created with
200.Fn mparse_alloc
201and freed with
202.Fn mparse_free .
203This may be used across parsed input if
204.Fn mparse_reset
205is called between parses.
a4c7eb57 206.It Vt "mandocmsg"
36342e81
SW
207A prototype for a function to handle fatal error, error, and warning
208messages emitted by the parser.
a4c7eb57
SW
209.El
210.Ss Functions
211.Bl -ohang
070c62a6
FF
212.It Fn man_deroff
213Obtain a text-only representation of a
214.Vt struct man_node ,
215including text contained in its child nodes.
216To be used on children of the pointer returned from
217.Fn man_node .
218When it is no longer needed, the pointer returned from
219.Fn man_deroff
220can be passed to
221.Xr free 3 .
a4c7eb57 222.It Fn man_meta
070c62a6
FF
223Obtain the meta-data of a successful
224.Xr man 7
225parse.
a4c7eb57
SW
226This may only be used on a pointer returned by
227.Fn mparse_result .
f88b6c16
FF
228Declared in
229.In man.h ,
230implemented in
231.Pa man.c .
36342e81
SW
232.It Fn man_mparse
233Get the parser used for the current output.
f88b6c16
FF
234Declared in
235.In man.h ,
236implemented in
237.Pa man.c .
a4c7eb57 238.It Fn man_node
070c62a6
FF
239Obtain the root node of a successful
240.Xr man 7
241parse.
a4c7eb57
SW
242This may only be used on a pointer returned by
243.Fn mparse_result .
f88b6c16
FF
244Declared in
245.In man.h ,
246implemented in
247.Pa man.c .
070c62a6
FF
248.It Fn mdoc_deroff
249Obtain a text-only representation of a
250.Vt struct mdoc_node ,
251including text contained in its child nodes.
252To be used on children of the pointer returned from
253.Fn mdoc_node .
254When it is no longer needed, the pointer returned from
255.Fn mdoc_deroff
256can be passed to
257.Xr free 3 .
a4c7eb57 258.It Fn mdoc_meta
070c62a6
FF
259Obtain the meta-data of a successful
260.Xr mdoc
261parse.
a4c7eb57
SW
262This may only be used on a pointer returned by
263.Fn mparse_result .
f88b6c16
FF
264Declared in
265.In mdoc.h ,
266implemented in
267.Pa mdoc.c .
a4c7eb57 268.It Fn mdoc_node
070c62a6
FF
269Obtain the root node of a successful
270.Xr mdoc
271parse.
a4c7eb57
SW
272This may only be used on a pointer returned by
273.Fn mparse_result .
f88b6c16
FF
274Declared in
275.In mdoc.h ,
276implemented in
277.Pa mdoc.c .
a4c7eb57
SW
278.It Fn mparse_alloc
279Allocate a parser.
070c62a6
FF
280The arguments have the following effect:
281.Bl -tag -offset 5n -width inttype
282.It Ar options
283When the
284.Dv MPARSE_MDOC
285or
286.Dv MPARSE_MAN
287bit is set, only that parser is used.
288Otherwise, the document type is automatically detected.
289.Pp
290When the
291.Dv MPARSE_SO
292bit is set,
293.Xr roff 7
294.Ic \&so
295file inclusion requests are always honoured.
296Otherwise, if the request is the only content in an input file,
297only the file name is remembered, to be returned in the
298.Fa sodest
299argument of
300.Fn mparse_result .
301.Pp
302When the
303.Dv MPARSE_QUICK
304bit is set, parsing is aborted after the NAME section.
305This is for example useful in
306.Xr makewhatis 8
307.Fl Q
308to quickly build minimal databases.
309.It Ar wlevel
310Can be set to
311.Dv MANDOCLEVEL_FATAL ,
312.Dv MANDOCLEVEL_ERROR ,
313or
314.Dv MANDOCLEVEL_WARNING .
315Messages below the selected level will be suppressed.
316.It Ar mmsg
317A callback function to handle errors and warnings.
318See
319.Pa main.c
320for an example.
321.It Ar defos
322A default string for the
323.Xr mdoc 7
324.Sq \&Os
325macro, overriding the
326.Dv OSNAME
327preprocessor definition and the results of
328.Xr uname 3 .
329.El
330.Pp
a4c7eb57
SW
331The same parser may be used for multiple files so long as
332.Fn mparse_reset
333is called between parses.
334.Fn mparse_free
335must be called to free the memory allocated by this function.
f88b6c16
FF
336Declared in
337.In mandoc.h ,
338implemented in
339.Pa read.c .
a4c7eb57
SW
340.It Fn mparse_free
341Free all memory allocated by
342.Fn mparse_alloc .
f88b6c16
FF
343Declared in
344.In mandoc.h ,
345implemented in
346.Pa read.c .
36342e81
SW
347.It Fn mparse_getkeep
348Acquire the keep buffer.
349Must follow a call of
350.Fn mparse_keep .
f88b6c16
FF
351Declared in
352.In mandoc.h ,
353implemented in
354.Pa read.c .
36342e81
SW
355.It Fn mparse_keep
356Instruct the parser to retain a copy of its parsed input.
357This can be acquired with subsequent
358.Fn mparse_getkeep
359calls.
f88b6c16
FF
360Declared in
361.In mandoc.h ,
362implemented in
363.Pa read.c .
a4c7eb57
SW
364.It Fn mparse_readfd
365Parse a file or file descriptor.
366If
367.Va fd
368is -1,
369.Va fname
370is opened for reading.
371Otherwise,
372.Va fname
373is assumed to be the name associated with
374.Va fd .
375This may be called multiple times with different parameters; however,
376.Fn mparse_reset
377should be invoked between parses.
f88b6c16
FF
378Declared in
379.In mandoc.h ,
380implemented in
381.Pa read.c .
a4c7eb57
SW
382.It Fn mparse_reset
383Reset a parser so that
384.Fn mparse_readfd
385may be used again.
f88b6c16
FF
386Declared in
387.In mandoc.h ,
388implemented in
389.Pa read.c .
a4c7eb57
SW
390.It Fn mparse_result
391Obtain the result of a parse.
392Only successful parses
393.Po
394i.e., those where
395.Fn mparse_readfd
396returned less than MANDOCLEVEL_FATAL
397.Pc
070c62a6 398should invoke this function, in which case one of the three pointers will
a4c7eb57 399be filled in.
f88b6c16
FF
400Declared in
401.In mandoc.h ,
402implemented in
403.Pa read.c .
a4c7eb57
SW
404.It Fn mparse_strerror
405Return a statically-allocated string representation of an error code.
f88b6c16
FF
406Declared in
407.In mandoc.h ,
408implemented in
409.Pa read.c .
a4c7eb57
SW
410.It Fn mparse_strlevel
411Return a statically-allocated string representation of a level code.
f88b6c16
FF
412Declared in
413.In mandoc.h ,
414implemented in
415.Pa read.c .
a4c7eb57
SW
416.El
417.Ss Variables
418.Bl -ohang
419.It Va man_macronames
420The string representation of a man macro as indexed by
421.Vt "enum mant" .
422.It Va mdoc_argnames
423The string representation of a mdoc macro argument as indexed by
424.Vt "enum mdocargt" .
425.It Va mdoc_macronames
426The string representation of a mdoc macro as indexed by
427.Vt "enum mdoct" .
428.El
60e1e752
SW
429.Sh IMPLEMENTATION NOTES
430This section consists of structural documentation for
431.Xr mdoc 7
432and
433.Xr man 7
36342e81
SW
434syntax trees and strings.
435.Ss Man and Mdoc Strings
436Strings may be extracted from mdoc and man meta-data, or from text
437nodes (MDOC_TEXT and MAN_TEXT, respectively).
438These strings have special non-printing formatting cues embedded in the
439text itself, as well as
440.Xr roff 7
441escapes preserved from input.
442Implementing systems will need to handle both situations to produce
443human-readable text.
444In general, strings may be assumed to consist of 7-bit ASCII characters.
445.Pp
446The following non-printing characters may be embedded in text strings:
447.Bl -tag -width Ds
448.It Dv ASCII_NBRSP
449A non-breaking space character.
450.It Dv ASCII_HYPH
451A soft hyphen.
070c62a6
FF
452.It Dv ASCII_BREAK
453A breakable zero-width space.
36342e81
SW
454.El
455.Pp
456Escape characters are also passed verbatim into text strings.
457An escape character is a sequence of characters beginning with the
458backslash
459.Pq Sq \e .
460To construct human-readable text, these should be intercepted with
070c62a6
FF
461.Xr mandoc_escape 3
462and converted with one the functions described in
463.Xr mchars_alloc 3 .
60e1e752
SW
464.Ss Man Abstract Syntax Tree
465This AST is governed by the ontological rules dictated in
466.Xr man 7
467and derives its terminology accordingly.
468.Pp
469The AST is composed of
470.Vt struct man_node
471nodes with element, root and text types as declared by the
472.Va type
473field.
474Each node also provides its parse point (the
475.Va line ,
476.Va sec ,
477and
478.Va pos
479fields), its position in the tree (the
480.Va parent ,
481.Va child ,
482.Va next
483and
484.Va prev
485fields) and some type-specific data.
486.Pp
487The tree itself is arranged according to the following normal form,
488where capitalised non-terminals represent nodes.
489.Pp
490.Bl -tag -width "ELEMENTXX" -compact
491.It ROOT
492\(<- mnode+
493.It mnode
494\(<- ELEMENT | TEXT | BLOCK
495.It BLOCK
496\(<- HEAD BODY
497.It HEAD
498\(<- mnode*
499.It BODY
500\(<- mnode*
501.It ELEMENT
502\(<- ELEMENT | TEXT*
503.It TEXT
36342e81 504\(<- [[:ascii:]]*
60e1e752
SW
505.El
506.Pp
507The only elements capable of nesting other elements are those with
070c62a6 508next-line scope as documented in
60e1e752
SW
509.Xr man 7 .
510.Ss Mdoc Abstract Syntax Tree
511This AST is governed by the ontological
512rules dictated in
513.Xr mdoc 7
514and derives its terminology accordingly.
515.Qq In-line
516elements described in
517.Xr mdoc 7
518are described simply as
519.Qq elements .
520.Pp
521The AST is composed of
522.Vt struct mdoc_node
523nodes with block, head, body, element, root and text types as declared
524by the
525.Va type
526field.
527Each node also provides its parse point (the
528.Va line ,
529.Va sec ,
530and
531.Va pos
532fields), its position in the tree (the
533.Va parent ,
534.Va child ,
535.Va nchild ,
536.Va next
537and
538.Va prev
539fields) and some type-specific data, in particular, for nodes generated
540from macros, the generating macro in the
541.Va tok
542field.
543.Pp
544The tree itself is arranged according to the following normal form,
545where capitalised non-terminals represent nodes.
546.Pp
547.Bl -tag -width "ELEMENTXX" -compact
548.It ROOT
549\(<- mnode+
550.It mnode
551\(<- BLOCK | ELEMENT | TEXT
552.It BLOCK
553\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
554.It ELEMENT
555\(<- TEXT*
556.It HEAD
557\(<- mnode*
558.It BODY
559\(<- mnode* [ENDBODY mnode*]
560.It TAIL
561\(<- mnode*
562.It TEXT
36342e81 563\(<- [[:ascii:]]*
60e1e752
SW
564.El
565.Pp
566Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
567the BLOCK production: these refer to punctuation marks.
568Furthermore, although a TEXT node will generally have a non-zero-length
569string, in the specific case of
570.Sq \&.Bd \-literal ,
571an empty line will produce a zero-length string.
572Multiple body parts are only found in invocations of
573.Sq \&Bl \-column ,
574where a new body introduces a new phrase.
575.Pp
576The
577.Xr mdoc 7
a4c7eb57 578syntax tree accommodates for broken block structures as well.
60e1e752
SW
579The ENDBODY node is available to end the formatting associated
580with a given block before the physical end of that block.
581It has a non-null
582.Va end
583field, is of the BODY
584.Va type ,
585has the same
586.Va tok
587as the BLOCK it is ending, and has a
588.Va pending
589field pointing to that BLOCK's BODY node.
590It is an indirect child of that BODY node
591and has no children of its own.
592.Pp
593An ENDBODY node is generated when a block ends while one of its child
594blocks is still open, like in the following example:
595.Bd -literal -offset indent
596\&.Ao ao
597\&.Bo bo ac
598\&.Ac bc
599\&.Bc end
600.Ed
601.Pp
602This example results in the following block structure:
603.Bd -literal -offset indent
604BLOCK Ao
605 HEAD Ao
606 BODY Ao
607 TEXT ao
608 BLOCK Bo, pending -> Ao
609 HEAD Bo
610 BODY Bo
611 TEXT bo
612 TEXT ac
613 ENDBODY Ao, pending -> Ao
614 TEXT bc
615TEXT end
616.Ed
617.Pp
618Here, the formatting of the
619.Sq \&Ao
620block extends from TEXT ao to TEXT ac,
621while the formatting of the
622.Sq \&Bo
623block extends from TEXT bo to TEXT bc.
624It renders as follows in
625.Fl T Ns Cm ascii
626mode:
627.Pp
628.Dl <ao [bo ac> bc] end
629.Pp
630Support for badly-nested blocks is only provided for backward
631compatibility with some older
632.Xr mdoc 7
633implementations.
634Using badly-nested blocks is
635.Em strongly discouraged ;
636for example, the
637.Fl T Ns Cm html
638and
639.Fl T Ns Cm xhtml
640front-ends to
641.Xr mandoc 1
642are unable to render them in any meaningful way.
643Furthermore, behaviour when encountering badly-nested blocks is not
070c62a6 644consistent across troff implementations, especially when using multiple
60e1e752
SW
645levels of badly-nested blocks.
646.Sh SEE ALSO
647.Xr mandoc 1 ,
070c62a6
FF
648.Xr mandoc_escape 3 ,
649.Xr mandoc_malloc 3 ,
650.Xr mchars_alloc 3 ,
60e1e752
SW
651.Xr eqn 7 ,
652.Xr man 7 ,
a4c7eb57 653.Xr mandoc_char 7 ,
60e1e752
SW
654.Xr mdoc 7 ,
655.Xr roff 7 ,
656.Xr tbl 7
657.Sh AUTHORS
658The
659.Nm
660library was written by
f88b6c16 661.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .