Import mdocml-1.13.1
[dragonfly.git] / contrib / mdocml / mandoc.3
... / ...
CommitLineData
1.\" $Id: mandoc.3,v 1.25 2014/08/05 05:48:56 schwarze Exp $
2.\"
3.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4.\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5.\"
6.\" Permission to use, copy, modify, and distribute this software for any
7.\" purpose with or without fee is hereby granted, provided that the above
8.\" copyright notice and this permission notice appear in all copies.
9.\"
10.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17.\"
18.Dd $Mdocdate: August 5 2014 $
19.Dt MANDOC 3
20.Os
21.Sh NAME
22.Nm mandoc ,
23.Nm man_deroff ,
24.Nm man_meta ,
25.Nm man_mparse ,
26.Nm man_node ,
27.Nm mdoc_deroff ,
28.Nm mdoc_meta ,
29.Nm mdoc_node ,
30.Nm mparse_alloc ,
31.Nm mparse_free ,
32.Nm mparse_getkeep ,
33.Nm mparse_keep ,
34.Nm mparse_readfd ,
35.Nm mparse_reset ,
36.Nm mparse_result ,
37.Nm mparse_strerror ,
38.Nm mparse_strlevel
39.Nd mandoc macro compiler library
40.Sh LIBRARY
41.Lb libmandoc
42.Sh SYNOPSIS
43.In sys/types.h
44.In mandoc.h
45.Fd "#define ASCII_NBRSP"
46.Fd "#define ASCII_HYPH"
47.Fd "#define ASCII_BREAK"
48.Ft struct mparse *
49.Fo mparse_alloc
50.Fa "int options"
51.Fa "enum mandoclevel wlevel"
52.Fa "mandocmsg mmsg"
53.Fa "char *defos"
54.Fc
55.Ft void
56.Fo (*mandocmsg)
57.Fa "enum mandocerr errtype"
58.Fa "enum mandoclevel level"
59.Fa "const char *file"
60.Fa "int line"
61.Fa "int col"
62.Fa "const char *msg"
63.Fc
64.Ft void
65.Fo mparse_free
66.Fa "struct mparse *parse"
67.Fc
68.Ft const char *
69.Fo mparse_getkeep
70.Fa "const struct mparse *parse"
71.Fc
72.Ft void
73.Fo mparse_keep
74.Fa "struct mparse *parse"
75.Fc
76.Ft "enum mandoclevel"
77.Fo mparse_readfd
78.Fa "struct mparse *parse"
79.Fa "int fd"
80.Fa "const char *fname"
81.Fc
82.Ft void
83.Fo mparse_reset
84.Fa "struct mparse *parse"
85.Fc
86.Ft void
87.Fo mparse_result
88.Fa "struct mparse *parse"
89.Fa "struct mdoc **mdoc"
90.Fa "struct man **man"
91.Fa "char **sodest"
92.Fc
93.Ft "const char *"
94.Fo mparse_strerror
95.Fa "enum mandocerr"
96.Fc
97.Ft "const char *"
98.Fo mparse_strlevel
99.Fa "enum mandoclevel"
100.Fc
101.In sys/types.h
102.In mandoc.h
103.In mdoc.h
104.Ft void
105.Fo mdoc_deroff
106.Fa "char **dest"
107.Fa "const struct mdoc_node *node"
108.Fc
109.Ft "const struct mdoc_meta *"
110.Fo mdoc_meta
111.Fa "const struct mdoc *mdoc"
112.Fc
113.Ft "const struct mdoc_node *"
114.Fo mdoc_node
115.Fa "const struct mdoc *mdoc"
116.Fc
117.Vt extern const char * const * mdoc_argnames;
118.Vt extern const char * const * mdoc_macronames;
119.In sys/types.h
120.In mandoc.h
121.In man.h
122.Ft void
123.Fo man_deroff
124.Fa "char **dest"
125.Fa "const struct man_node *node"
126.Fc
127.Ft "const struct man_meta *"
128.Fo man_meta
129.Fa "const struct man *man"
130.Fc
131.Ft "const struct mparse *"
132.Fo man_mparse
133.Fa "const struct man *man"
134.Fc
135.Ft "const struct man_node *"
136.Fo man_node
137.Fa "const struct man *man"
138.Fc
139.Vt extern const char * const * man_macronames;
140.Sh DESCRIPTION
141The
142.Nm mandoc
143library parses a
144.Ux
145manual into an abstract syntax tree (AST).
146.Ux
147manuals are composed of
148.Xr mdoc 7
149or
150.Xr man 7 ,
151and may be mixed with
152.Xr roff 7 ,
153.Xr tbl 7 ,
154and
155.Xr eqn 7
156invocations.
157.Pp
158The following describes a general parse sequence:
159.Bl -enum
160.It
161initiate a parsing sequence with
162.Fn mparse_alloc ;
163.It
164parse files or file descriptors with
165.Fn mparse_readfd ;
166.It
167retrieve a parsed syntax tree, if the parse was successful, with
168.Fn mparse_result ;
169.It
170iterate over parse nodes with
171.Fn mdoc_node
172or
173.Fn man_node ;
174.It
175free all allocated memory with
176.Fn mparse_free ,
177or invoke
178.Fn mparse_reset
179and parse new files.
180.El
181.Sh REFERENCE
182This section documents the functions, types, and variables available
183via
184.In mandoc.h ,
185with the exception of those documented in
186.Xr mandoc_escape 3
187and
188.Xr mchars_alloc 3 .
189.Ss Types
190.Bl -ohang
191.It Vt "enum mandocerr"
192A fatal error, error, or warning message during parsing.
193.It Vt "enum mandoclevel"
194A classification of an
195.Vt "enum mandocerr"
196as regards system operation.
197.It Vt "struct mparse"
198An opaque pointer to a running parse sequence.
199Created with
200.Fn mparse_alloc
201and freed with
202.Fn mparse_free .
203This may be used across parsed input if
204.Fn mparse_reset
205is called between parses.
206.It Vt "mandocmsg"
207A prototype for a function to handle fatal error, error, and warning
208messages emitted by the parser.
209.El
210.Ss Functions
211.Bl -ohang
212.It Fn man_deroff
213Obtain a text-only representation of a
214.Vt struct man_node ,
215including text contained in its child nodes.
216To be used on children of the pointer returned from
217.Fn man_node .
218When it is no longer needed, the pointer returned from
219.Fn man_deroff
220can be passed to
221.Xr free 3 .
222.It Fn man_meta
223Obtain the meta-data of a successful
224.Xr man 7
225parse.
226This may only be used on a pointer returned by
227.Fn mparse_result .
228Declared in
229.In man.h ,
230implemented in
231.Pa man.c .
232.It Fn man_mparse
233Get the parser used for the current output.
234Declared in
235.In man.h ,
236implemented in
237.Pa man.c .
238.It Fn man_node
239Obtain the root node of a successful
240.Xr man 7
241parse.
242This may only be used on a pointer returned by
243.Fn mparse_result .
244Declared in
245.In man.h ,
246implemented in
247.Pa man.c .
248.It Fn mdoc_deroff
249Obtain a text-only representation of a
250.Vt struct mdoc_node ,
251including text contained in its child nodes.
252To be used on children of the pointer returned from
253.Fn mdoc_node .
254When it is no longer needed, the pointer returned from
255.Fn mdoc_deroff
256can be passed to
257.Xr free 3 .
258.It Fn mdoc_meta
259Obtain the meta-data of a successful
260.Xr mdoc
261parse.
262This may only be used on a pointer returned by
263.Fn mparse_result .
264Declared in
265.In mdoc.h ,
266implemented in
267.Pa mdoc.c .
268.It Fn mdoc_node
269Obtain the root node of a successful
270.Xr mdoc
271parse.
272This may only be used on a pointer returned by
273.Fn mparse_result .
274Declared in
275.In mdoc.h ,
276implemented in
277.Pa mdoc.c .
278.It Fn mparse_alloc
279Allocate a parser.
280The arguments have the following effect:
281.Bl -tag -offset 5n -width inttype
282.It Ar options
283When the
284.Dv MPARSE_MDOC
285or
286.Dv MPARSE_MAN
287bit is set, only that parser is used.
288Otherwise, the document type is automatically detected.
289.Pp
290When the
291.Dv MPARSE_SO
292bit is set,
293.Xr roff 7
294.Ic \&so
295file inclusion requests are always honoured.
296Otherwise, if the request is the only content in an input file,
297only the file name is remembered, to be returned in the
298.Fa sodest
299argument of
300.Fn mparse_result .
301.Pp
302When the
303.Dv MPARSE_QUICK
304bit is set, parsing is aborted after the NAME section.
305This is for example useful in
306.Xr makewhatis 8
307.Fl Q
308to quickly build minimal databases.
309.It Ar wlevel
310Can be set to
311.Dv MANDOCLEVEL_FATAL ,
312.Dv MANDOCLEVEL_ERROR ,
313or
314.Dv MANDOCLEVEL_WARNING .
315Messages below the selected level will be suppressed.
316.It Ar mmsg
317A callback function to handle errors and warnings.
318See
319.Pa main.c
320for an example.
321.It Ar defos
322A default string for the
323.Xr mdoc 7
324.Sq \&Os
325macro, overriding the
326.Dv OSNAME
327preprocessor definition and the results of
328.Xr uname 3 .
329.El
330.Pp
331The same parser may be used for multiple files so long as
332.Fn mparse_reset
333is called between parses.
334.Fn mparse_free
335must be called to free the memory allocated by this function.
336Declared in
337.In mandoc.h ,
338implemented in
339.Pa read.c .
340.It Fn mparse_free
341Free all memory allocated by
342.Fn mparse_alloc .
343Declared in
344.In mandoc.h ,
345implemented in
346.Pa read.c .
347.It Fn mparse_getkeep
348Acquire the keep buffer.
349Must follow a call of
350.Fn mparse_keep .
351Declared in
352.In mandoc.h ,
353implemented in
354.Pa read.c .
355.It Fn mparse_keep
356Instruct the parser to retain a copy of its parsed input.
357This can be acquired with subsequent
358.Fn mparse_getkeep
359calls.
360Declared in
361.In mandoc.h ,
362implemented in
363.Pa read.c .
364.It Fn mparse_readfd
365Parse a file or file descriptor.
366If
367.Va fd
368is -1,
369.Va fname
370is opened for reading.
371Otherwise,
372.Va fname
373is assumed to be the name associated with
374.Va fd .
375This may be called multiple times with different parameters; however,
376.Fn mparse_reset
377should be invoked between parses.
378Declared in
379.In mandoc.h ,
380implemented in
381.Pa read.c .
382.It Fn mparse_reset
383Reset a parser so that
384.Fn mparse_readfd
385may be used again.
386Declared in
387.In mandoc.h ,
388implemented in
389.Pa read.c .
390.It Fn mparse_result
391Obtain the result of a parse.
392Only successful parses
393.Po
394i.e., those where
395.Fn mparse_readfd
396returned less than MANDOCLEVEL_FATAL
397.Pc
398should invoke this function, in which case one of the three pointers will
399be filled in.
400Declared in
401.In mandoc.h ,
402implemented in
403.Pa read.c .
404.It Fn mparse_strerror
405Return a statically-allocated string representation of an error code.
406Declared in
407.In mandoc.h ,
408implemented in
409.Pa read.c .
410.It Fn mparse_strlevel
411Return a statically-allocated string representation of a level code.
412Declared in
413.In mandoc.h ,
414implemented in
415.Pa read.c .
416.El
417.Ss Variables
418.Bl -ohang
419.It Va man_macronames
420The string representation of a man macro as indexed by
421.Vt "enum mant" .
422.It Va mdoc_argnames
423The string representation of a mdoc macro argument as indexed by
424.Vt "enum mdocargt" .
425.It Va mdoc_macronames
426The string representation of a mdoc macro as indexed by
427.Vt "enum mdoct" .
428.El
429.Sh IMPLEMENTATION NOTES
430This section consists of structural documentation for
431.Xr mdoc 7
432and
433.Xr man 7
434syntax trees and strings.
435.Ss Man and Mdoc Strings
436Strings may be extracted from mdoc and man meta-data, or from text
437nodes (MDOC_TEXT and MAN_TEXT, respectively).
438These strings have special non-printing formatting cues embedded in the
439text itself, as well as
440.Xr roff 7
441escapes preserved from input.
442Implementing systems will need to handle both situations to produce
443human-readable text.
444In general, strings may be assumed to consist of 7-bit ASCII characters.
445.Pp
446The following non-printing characters may be embedded in text strings:
447.Bl -tag -width Ds
448.It Dv ASCII_NBRSP
449A non-breaking space character.
450.It Dv ASCII_HYPH
451A soft hyphen.
452.It Dv ASCII_BREAK
453A breakable zero-width space.
454.El
455.Pp
456Escape characters are also passed verbatim into text strings.
457An escape character is a sequence of characters beginning with the
458backslash
459.Pq Sq \e .
460To construct human-readable text, these should be intercepted with
461.Xr mandoc_escape 3
462and converted with one the functions described in
463.Xr mchars_alloc 3 .
464.Ss Man Abstract Syntax Tree
465This AST is governed by the ontological rules dictated in
466.Xr man 7
467and derives its terminology accordingly.
468.Pp
469The AST is composed of
470.Vt struct man_node
471nodes with element, root and text types as declared by the
472.Va type
473field.
474Each node also provides its parse point (the
475.Va line ,
476.Va sec ,
477and
478.Va pos
479fields), its position in the tree (the
480.Va parent ,
481.Va child ,
482.Va next
483and
484.Va prev
485fields) and some type-specific data.
486.Pp
487The tree itself is arranged according to the following normal form,
488where capitalised non-terminals represent nodes.
489.Pp
490.Bl -tag -width "ELEMENTXX" -compact
491.It ROOT
492\(<- mnode+
493.It mnode
494\(<- ELEMENT | TEXT | BLOCK
495.It BLOCK
496\(<- HEAD BODY
497.It HEAD
498\(<- mnode*
499.It BODY
500\(<- mnode*
501.It ELEMENT
502\(<- ELEMENT | TEXT*
503.It TEXT
504\(<- [[:ascii:]]*
505.El
506.Pp
507The only elements capable of nesting other elements are those with
508next-line scope as documented in
509.Xr man 7 .
510.Ss Mdoc Abstract Syntax Tree
511This AST is governed by the ontological
512rules dictated in
513.Xr mdoc 7
514and derives its terminology accordingly.
515.Qq In-line
516elements described in
517.Xr mdoc 7
518are described simply as
519.Qq elements .
520.Pp
521The AST is composed of
522.Vt struct mdoc_node
523nodes with block, head, body, element, root and text types as declared
524by the
525.Va type
526field.
527Each node also provides its parse point (the
528.Va line ,
529.Va sec ,
530and
531.Va pos
532fields), its position in the tree (the
533.Va parent ,
534.Va child ,
535.Va nchild ,
536.Va next
537and
538.Va prev
539fields) and some type-specific data, in particular, for nodes generated
540from macros, the generating macro in the
541.Va tok
542field.
543.Pp
544The tree itself is arranged according to the following normal form,
545where capitalised non-terminals represent nodes.
546.Pp
547.Bl -tag -width "ELEMENTXX" -compact
548.It ROOT
549\(<- mnode+
550.It mnode
551\(<- BLOCK | ELEMENT | TEXT
552.It BLOCK
553\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
554.It ELEMENT
555\(<- TEXT*
556.It HEAD
557\(<- mnode*
558.It BODY
559\(<- mnode* [ENDBODY mnode*]
560.It TAIL
561\(<- mnode*
562.It TEXT
563\(<- [[:ascii:]]*
564.El
565.Pp
566Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
567the BLOCK production: these refer to punctuation marks.
568Furthermore, although a TEXT node will generally have a non-zero-length
569string, in the specific case of
570.Sq \&.Bd \-literal ,
571an empty line will produce a zero-length string.
572Multiple body parts are only found in invocations of
573.Sq \&Bl \-column ,
574where a new body introduces a new phrase.
575.Pp
576The
577.Xr mdoc 7
578syntax tree accommodates for broken block structures as well.
579The ENDBODY node is available to end the formatting associated
580with a given block before the physical end of that block.
581It has a non-null
582.Va end
583field, is of the BODY
584.Va type ,
585has the same
586.Va tok
587as the BLOCK it is ending, and has a
588.Va pending
589field pointing to that BLOCK's BODY node.
590It is an indirect child of that BODY node
591and has no children of its own.
592.Pp
593An ENDBODY node is generated when a block ends while one of its child
594blocks is still open, like in the following example:
595.Bd -literal -offset indent
596\&.Ao ao
597\&.Bo bo ac
598\&.Ac bc
599\&.Bc end
600.Ed
601.Pp
602This example results in the following block structure:
603.Bd -literal -offset indent
604BLOCK Ao
605 HEAD Ao
606 BODY Ao
607 TEXT ao
608 BLOCK Bo, pending -> Ao
609 HEAD Bo
610 BODY Bo
611 TEXT bo
612 TEXT ac
613 ENDBODY Ao, pending -> Ao
614 TEXT bc
615TEXT end
616.Ed
617.Pp
618Here, the formatting of the
619.Sq \&Ao
620block extends from TEXT ao to TEXT ac,
621while the formatting of the
622.Sq \&Bo
623block extends from TEXT bo to TEXT bc.
624It renders as follows in
625.Fl T Ns Cm ascii
626mode:
627.Pp
628.Dl <ao [bo ac> bc] end
629.Pp
630Support for badly-nested blocks is only provided for backward
631compatibility with some older
632.Xr mdoc 7
633implementations.
634Using badly-nested blocks is
635.Em strongly discouraged ;
636for example, the
637.Fl T Ns Cm html
638and
639.Fl T Ns Cm xhtml
640front-ends to
641.Xr mandoc 1
642are unable to render them in any meaningful way.
643Furthermore, behaviour when encountering badly-nested blocks is not
644consistent across troff implementations, especially when using multiple
645levels of badly-nested blocks.
646.Sh SEE ALSO
647.Xr mandoc 1 ,
648.Xr mandoc_escape 3 ,
649.Xr mandoc_malloc 3 ,
650.Xr mchars_alloc 3 ,
651.Xr eqn 7 ,
652.Xr man 7 ,
653.Xr mandoc_char 7 ,
654.Xr mdoc 7 ,
655.Xr roff 7 ,
656.Xr tbl 7
657.Sh AUTHORS
658The
659.Nm
660library was written by
661.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .