Import mdocml-1.12.3
[dragonfly.git] / contrib / mdocml / mandoc.3
CommitLineData
7888c61d 1.\" $Id: mandoc.3,v 1.22 2013/10/06 17:01:52 schwarze Exp $
60e1e752
SW
2.\"
3.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4.\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5.\"
6.\" Permission to use, copy, modify, and distribute this software for any
7.\" purpose with or without fee is hereby granted, provided that the above
8.\" copyright notice and this permission notice appear in all copies.
9.\"
10.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17.\"
7888c61d 18.Dd $Mdocdate: October 6 2013 $
60e1e752
SW
19.Dt MANDOC 3
20.Os
21.Sh NAME
22.Nm mandoc ,
a4c7eb57 23.Nm mandoc_escape ,
60e1e752 24.Nm man_meta ,
36342e81 25.Nm man_mparse ,
60e1e752 26.Nm man_node ,
a4c7eb57
SW
27.Nm mchars_alloc ,
28.Nm mchars_free ,
29.Nm mchars_num2char ,
30.Nm mchars_num2uc ,
31.Nm mchars_spec2cp ,
32.Nm mchars_spec2str ,
60e1e752
SW
33.Nm mdoc_meta ,
34.Nm mdoc_node ,
35.Nm mparse_alloc ,
36.Nm mparse_free ,
36342e81
SW
37.Nm mparse_getkeep ,
38.Nm mparse_keep ,
60e1e752
SW
39.Nm mparse_readfd ,
40.Nm mparse_reset ,
41.Nm mparse_result ,
42.Nm mparse_strerror ,
43.Nm mparse_strlevel
44.Nd mandoc macro compiler library
a4c7eb57 45.Sh LIBRARY
7888c61d 46.Lb libmandoc
60e1e752
SW
47.Sh SYNOPSIS
48.In man.h
49.In mdoc.h
50.In mandoc.h
a4c7eb57
SW
51.Ft "enum mandoc_esc"
52.Fo mandoc_escape
7888c61d
FF
53.Fa "const char const **end"
54.Fa "const char const **start"
36342e81 55.Fa "int *sz"
a4c7eb57 56.Fc
60e1e752
SW
57.Ft "const struct man_meta *"
58.Fo man_meta
59.Fa "const struct man *man"
60.Fc
36342e81
SW
61.Ft "const struct mparse *"
62.Fo man_mparse
63.Fa "const struct man *man"
64.Fc
60e1e752
SW
65.Ft "const struct man_node *"
66.Fo man_node
67.Fa "const struct man *man"
68.Fc
a4c7eb57 69.Ft "struct mchars *"
f88b6c16 70.Fn mchars_alloc "void"
a4c7eb57
SW
71.Ft void
72.Fn mchars_free "struct mchars *p"
73.Ft char
74.Fn mchars_num2char "const char *cp" "size_t sz"
75.Ft int
76.Fn mchars_num2uc "const char *cp" "size_t sz"
77.Ft "const char *"
78.Fo mchars_spec2str
36342e81 79.Fa "const struct mchars *p"
a4c7eb57
SW
80.Fa "const char *cp"
81.Fa "size_t sz"
82.Fa "size_t *rsz"
83.Fc
84.Ft int
85.Fo mchars_spec2cp
36342e81 86.Fa "const struct mchars *p"
a4c7eb57
SW
87.Fa "const char *cp"
88.Fa "size_t sz"
a4c7eb57 89.Fc
60e1e752
SW
90.Ft "const struct mdoc_meta *"
91.Fo mdoc_meta
92.Fa "const struct mdoc *mdoc"
93.Fc
94.Ft "const struct mdoc_node *"
95.Fo mdoc_node
96.Fa "const struct mdoc *mdoc"
97.Fc
98.Ft void
99.Fo mparse_alloc
100.Fa "enum mparset type"
101.Fa "enum mandoclevel wlevel"
102.Fa "mandocmsg msg"
103.Fa "void *msgarg"
104.Fc
105.Ft void
106.Fo mparse_free
107.Fa "struct mparse *parse"
108.Fc
36342e81
SW
109.Ft void
110.Fo mparse_getkeep
111.Fa "const struct mparse *parse"
112.Fc
113.Ft void
114.Fo mparse_keep
115.Fa "struct mparse *parse"
116.Fc
60e1e752
SW
117.Ft "enum mandoclevel"
118.Fo mparse_readfd
119.Fa "struct mparse *parse"
120.Fa "int fd"
121.Fa "const char *fname"
122.Fc
123.Ft void
124.Fo mparse_reset
125.Fa "struct mparse *parse"
126.Fc
127.Ft void
128.Fo mparse_result
129.Fa "struct mparse *parse"
130.Fa "struct mdoc **mdoc"
131.Fa "struct man **man"
132.Fc
133.Ft "const char *"
134.Fo mparse_strerror
135.Fa "enum mandocerr"
136.Fc
137.Ft "const char *"
138.Fo mparse_strlevel
139.Fa "enum mandoclevel"
140.Fc
141.Vt extern const char * const * man_macronames;
142.Vt extern const char * const * mdoc_argnames;
143.Vt extern const char * const * mdoc_macronames;
a4c7eb57
SW
144.Fd "#define ASCII_NBRSP"
145.Fd "#define ASCII_HYPH"
60e1e752
SW
146.Sh DESCRIPTION
147The
148.Nm mandoc
149library parses a
150.Ux
151manual into an abstract syntax tree (AST).
152.Ux
153manuals are composed of
154.Xr mdoc 7
155or
156.Xr man 7 ,
157and may be mixed with
158.Xr roff 7 ,
159.Xr tbl 7 ,
160and
161.Xr eqn 7
162invocations.
163.Pp
164The following describes a general parse sequence:
165.Bl -enum
166.It
167initiate a parsing sequence with
168.Fn mparse_alloc ;
169.It
170parse files or file descriptors with
171.Fn mparse_readfd ;
172.It
173retrieve a parsed syntax tree, if the parse was successful, with
174.Fn mparse_result ;
175.It
176iterate over parse nodes with
177.Fn mdoc_node
178or
179.Fn man_node ;
180.It
181free all allocated memory with
182.Fn mparse_free ,
183or invoke
184.Fn mparse_reset
185and parse new files.
186.El
a4c7eb57
SW
187.Pp
188The
189.Nm
190library also contains routines for translating character strings into glyphs
191.Pq see Fn mchars_alloc
192and parsing escape sequences from strings
193.Pq see Fn mandoc_escape .
a4c7eb57
SW
194.Sh REFERENCE
195This section documents the functions, types, and variables available
196via
197.In mandoc.h .
198.Ss Types
199.Bl -ohang
200.It Vt "enum mandoc_esc"
36342e81 201An escape sequence classification.
a4c7eb57 202.It Vt "enum mandocerr"
36342e81 203A fatal error, error, or warning message during parsing.
a4c7eb57 204.It Vt "enum mandoclevel"
36342e81
SW
205A classification of an
206.Vt "enum mandoclevel"
207as regards system operation.
a4c7eb57
SW
208.It Vt "struct mchars"
209An opaque pointer to an object allowing for translation between
210character strings and glyphs.
211See
212.Fn mchars_alloc .
213.It Vt "enum mparset"
36342e81
SW
214The type of parser when reading input.
215This should usually be
216.Dv MPARSE_AUTO
217for auto-detection.
a4c7eb57 218.It Vt "struct mparse"
36342e81
SW
219An opaque pointer to a running parse sequence.
220Created with
221.Fn mparse_alloc
222and freed with
223.Fn mparse_free .
224This may be used across parsed input if
225.Fn mparse_reset
226is called between parses.
a4c7eb57 227.It Vt "mandocmsg"
36342e81
SW
228A prototype for a function to handle fatal error, error, and warning
229messages emitted by the parser.
a4c7eb57
SW
230.El
231.Ss Functions
232.Bl -ohang
233.It Fn mandoc_escape
234Scan an escape sequence, i.e., a character string beginning with
235.Sq \e .
36342e81
SW
236Pass a pointer to the character after the
237.Sq \e
238as
a4c7eb57
SW
239.Va end ;
240it will be set to the supremum of the parsed escape sequence unless
36342e81
SW
241returning
242.Dv ESCAPE_ERROR ,
243in which case the string is bogus and should be
a4c7eb57 244thrown away.
36342e81
SW
245If not
246.Dv ESCAPE_ERROR
247or
248.Dv ESCAPE_IGNORE ,
a4c7eb57
SW
249.Va start
250is set to the first relevant character of the substring (font, glyph,
251whatever) of length
252.Va sz .
253Both
254.Va start
255and
256.Va sz
36342e81
SW
257may be
258.Dv NULL .
f88b6c16
FF
259Declared in
260.In mandoc.h ,
261implemented in
262.Pa mandoc.c .
a4c7eb57
SW
263.It Fn man_meta
264Obtain the meta-data of a successful parse.
265This may only be used on a pointer returned by
266.Fn mparse_result .
f88b6c16
FF
267Declared in
268.In man.h ,
269implemented in
270.Pa man.c .
36342e81
SW
271.It Fn man_mparse
272Get the parser used for the current output.
f88b6c16
FF
273Declared in
274.In man.h ,
275implemented in
276.Pa man.c .
a4c7eb57
SW
277.It Fn man_node
278Obtain the root node of a successful parse.
279This may only be used on a pointer returned by
280.Fn mparse_result .
f88b6c16
FF
281Declared in
282.In man.h ,
283implemented in
284.Pa man.c .
a4c7eb57
SW
285.It Fn mchars_alloc
286Allocate an
287.Vt "struct mchars *"
288object for translating special characters into glyphs.
289See
290.Xr mandoc_char 7
291for an overview of special characters.
292The object must be freed with
293.Fn mchars_free .
f88b6c16
FF
294Declared in
295.In mandoc.h ,
296implemented in
297.Pa chars.c .
a4c7eb57
SW
298.It Fn mchars_free
299Free an object created with
300.Fn mchars_alloc .
f88b6c16
FF
301Declared in
302.In mandoc.h ,
303implemented in
304.Pa chars.c .
a4c7eb57
SW
305.It Fn mchars_num2char
306Convert a character index (e.g., the \eN\(aq\(aq escape) into a
307printable ASCII character.
308Returns \e0 (the nil character) if the input sequence is malformed.
f88b6c16
FF
309Declared in
310.In mandoc.h ,
311implemented in
312.Pa chars.c .
a4c7eb57
SW
313.It Fn mchars_num2uc
314Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into
315a Unicode codepoint.
316Returns \e0 (the nil character) if the input sequence is malformed.
f88b6c16
FF
317Declared in
318.In mandoc.h ,
319implemented in
320.Pa chars.c .
a4c7eb57
SW
321.It Fn mchars_spec2cp
322Convert a special character into a valid Unicode codepoint.
323Returns \-1 on failure or a non-zero Unicode codepoint on success.
f88b6c16
FF
324Declared in
325.In mandoc.h ,
326implemented in
327.Pa chars.c .
a4c7eb57
SW
328.It Fn mchars_spec2str
329Convert a special character into an ASCII string.
36342e81
SW
330Returns
331.Dv NULL
332on failure.
f88b6c16
FF
333Declared in
334.In mandoc.h ,
335implemented in
336.Pa chars.c .
a4c7eb57
SW
337.It Fn mdoc_meta
338Obtain the meta-data of a successful parse.
339This may only be used on a pointer returned by
340.Fn mparse_result .
f88b6c16
FF
341Declared in
342.In mdoc.h ,
343implemented in
344.Pa mdoc.c .
a4c7eb57
SW
345.It Fn mdoc_node
346Obtain the root node of a successful parse.
347This may only be used on a pointer returned by
348.Fn mparse_result .
f88b6c16
FF
349Declared in
350.In mdoc.h ,
351implemented in
352.Pa mdoc.c .
a4c7eb57
SW
353.It Fn mparse_alloc
354Allocate a parser.
355The same parser may be used for multiple files so long as
356.Fn mparse_reset
357is called between parses.
358.Fn mparse_free
359must be called to free the memory allocated by this function.
f88b6c16
FF
360Declared in
361.In mandoc.h ,
362implemented in
363.Pa read.c .
a4c7eb57
SW
364.It Fn mparse_free
365Free all memory allocated by
366.Fn mparse_alloc .
f88b6c16
FF
367Declared in
368.In mandoc.h ,
369implemented in
370.Pa read.c .
36342e81
SW
371.It Fn mparse_getkeep
372Acquire the keep buffer.
373Must follow a call of
374.Fn mparse_keep .
f88b6c16
FF
375Declared in
376.In mandoc.h ,
377implemented in
378.Pa read.c .
36342e81
SW
379.It Fn mparse_keep
380Instruct the parser to retain a copy of its parsed input.
381This can be acquired with subsequent
382.Fn mparse_getkeep
383calls.
f88b6c16
FF
384Declared in
385.In mandoc.h ,
386implemented in
387.Pa read.c .
a4c7eb57
SW
388.It Fn mparse_readfd
389Parse a file or file descriptor.
390If
391.Va fd
392is -1,
393.Va fname
394is opened for reading.
395Otherwise,
396.Va fname
397is assumed to be the name associated with
398.Va fd .
399This may be called multiple times with different parameters; however,
400.Fn mparse_reset
401should be invoked between parses.
f88b6c16
FF
402Declared in
403.In mandoc.h ,
404implemented in
405.Pa read.c .
a4c7eb57
SW
406.It Fn mparse_reset
407Reset a parser so that
408.Fn mparse_readfd
409may be used again.
f88b6c16
FF
410Declared in
411.In mandoc.h ,
412implemented in
413.Pa read.c .
a4c7eb57
SW
414.It Fn mparse_result
415Obtain the result of a parse.
416Only successful parses
417.Po
418i.e., those where
419.Fn mparse_readfd
420returned less than MANDOCLEVEL_FATAL
421.Pc
422should invoke this function, in which case one of the two pointers will
423be filled in.
f88b6c16
FF
424Declared in
425.In mandoc.h ,
426implemented in
427.Pa read.c .
a4c7eb57
SW
428.It Fn mparse_strerror
429Return a statically-allocated string representation of an error code.
f88b6c16
FF
430Declared in
431.In mandoc.h ,
432implemented in
433.Pa read.c .
a4c7eb57
SW
434.It Fn mparse_strlevel
435Return a statically-allocated string representation of a level code.
f88b6c16
FF
436Declared in
437.In mandoc.h ,
438implemented in
439.Pa read.c .
a4c7eb57
SW
440.El
441.Ss Variables
442.Bl -ohang
443.It Va man_macronames
444The string representation of a man macro as indexed by
445.Vt "enum mant" .
446.It Va mdoc_argnames
447The string representation of a mdoc macro argument as indexed by
448.Vt "enum mdocargt" .
449.It Va mdoc_macronames
450The string representation of a mdoc macro as indexed by
451.Vt "enum mdoct" .
452.El
60e1e752
SW
453.Sh IMPLEMENTATION NOTES
454This section consists of structural documentation for
455.Xr mdoc 7
456and
457.Xr man 7
36342e81
SW
458syntax trees and strings.
459.Ss Man and Mdoc Strings
460Strings may be extracted from mdoc and man meta-data, or from text
461nodes (MDOC_TEXT and MAN_TEXT, respectively).
462These strings have special non-printing formatting cues embedded in the
463text itself, as well as
464.Xr roff 7
465escapes preserved from input.
466Implementing systems will need to handle both situations to produce
467human-readable text.
468In general, strings may be assumed to consist of 7-bit ASCII characters.
469.Pp
470The following non-printing characters may be embedded in text strings:
471.Bl -tag -width Ds
472.It Dv ASCII_NBRSP
473A non-breaking space character.
474.It Dv ASCII_HYPH
475A soft hyphen.
476.El
477.Pp
478Escape characters are also passed verbatim into text strings.
479An escape character is a sequence of characters beginning with the
480backslash
481.Pq Sq \e .
482To construct human-readable text, these should be intercepted with
483.Fn mandoc_escape
484and converted with one of
485.Fn mchars_num2char ,
486.Fn mchars_spec2str ,
487and so on.
60e1e752
SW
488.Ss Man Abstract Syntax Tree
489This AST is governed by the ontological rules dictated in
490.Xr man 7
491and derives its terminology accordingly.
492.Pp
493The AST is composed of
494.Vt struct man_node
495nodes with element, root and text types as declared by the
496.Va type
497field.
498Each node also provides its parse point (the
499.Va line ,
500.Va sec ,
501and
502.Va pos
503fields), its position in the tree (the
504.Va parent ,
505.Va child ,
506.Va next
507and
508.Va prev
509fields) and some type-specific data.
510.Pp
511The tree itself is arranged according to the following normal form,
512where capitalised non-terminals represent nodes.
513.Pp
514.Bl -tag -width "ELEMENTXX" -compact
515.It ROOT
516\(<- mnode+
517.It mnode
518\(<- ELEMENT | TEXT | BLOCK
519.It BLOCK
520\(<- HEAD BODY
521.It HEAD
522\(<- mnode*
523.It BODY
524\(<- mnode*
525.It ELEMENT
526\(<- ELEMENT | TEXT*
527.It TEXT
36342e81 528\(<- [[:ascii:]]*
60e1e752
SW
529.El
530.Pp
531The only elements capable of nesting other elements are those with
532next-lint scope as documented in
533.Xr man 7 .
534.Ss Mdoc Abstract Syntax Tree
535This AST is governed by the ontological
536rules dictated in
537.Xr mdoc 7
538and derives its terminology accordingly.
539.Qq In-line
540elements described in
541.Xr mdoc 7
542are described simply as
543.Qq elements .
544.Pp
545The AST is composed of
546.Vt struct mdoc_node
547nodes with block, head, body, element, root and text types as declared
548by the
549.Va type
550field.
551Each node also provides its parse point (the
552.Va line ,
553.Va sec ,
554and
555.Va pos
556fields), its position in the tree (the
557.Va parent ,
558.Va child ,
559.Va nchild ,
560.Va next
561and
562.Va prev
563fields) and some type-specific data, in particular, for nodes generated
564from macros, the generating macro in the
565.Va tok
566field.
567.Pp
568The tree itself is arranged according to the following normal form,
569where capitalised non-terminals represent nodes.
570.Pp
571.Bl -tag -width "ELEMENTXX" -compact
572.It ROOT
573\(<- mnode+
574.It mnode
575\(<- BLOCK | ELEMENT | TEXT
576.It BLOCK
577\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
578.It ELEMENT
579\(<- TEXT*
580.It HEAD
581\(<- mnode*
582.It BODY
583\(<- mnode* [ENDBODY mnode*]
584.It TAIL
585\(<- mnode*
586.It TEXT
36342e81 587\(<- [[:ascii:]]*
60e1e752
SW
588.El
589.Pp
590Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
591the BLOCK production: these refer to punctuation marks.
592Furthermore, although a TEXT node will generally have a non-zero-length
593string, in the specific case of
594.Sq \&.Bd \-literal ,
595an empty line will produce a zero-length string.
596Multiple body parts are only found in invocations of
597.Sq \&Bl \-column ,
598where a new body introduces a new phrase.
599.Pp
600The
601.Xr mdoc 7
a4c7eb57 602syntax tree accommodates for broken block structures as well.
60e1e752
SW
603The ENDBODY node is available to end the formatting associated
604with a given block before the physical end of that block.
605It has a non-null
606.Va end
607field, is of the BODY
608.Va type ,
609has the same
610.Va tok
611as the BLOCK it is ending, and has a
612.Va pending
613field pointing to that BLOCK's BODY node.
614It is an indirect child of that BODY node
615and has no children of its own.
616.Pp
617An ENDBODY node is generated when a block ends while one of its child
618blocks is still open, like in the following example:
619.Bd -literal -offset indent
620\&.Ao ao
621\&.Bo bo ac
622\&.Ac bc
623\&.Bc end
624.Ed
625.Pp
626This example results in the following block structure:
627.Bd -literal -offset indent
628BLOCK Ao
629 HEAD Ao
630 BODY Ao
631 TEXT ao
632 BLOCK Bo, pending -> Ao
633 HEAD Bo
634 BODY Bo
635 TEXT bo
636 TEXT ac
637 ENDBODY Ao, pending -> Ao
638 TEXT bc
639TEXT end
640.Ed
641.Pp
642Here, the formatting of the
643.Sq \&Ao
644block extends from TEXT ao to TEXT ac,
645while the formatting of the
646.Sq \&Bo
647block extends from TEXT bo to TEXT bc.
648It renders as follows in
649.Fl T Ns Cm ascii
650mode:
651.Pp
652.Dl <ao [bo ac> bc] end
653.Pp
654Support for badly-nested blocks is only provided for backward
655compatibility with some older
656.Xr mdoc 7
657implementations.
658Using badly-nested blocks is
659.Em strongly discouraged ;
660for example, the
661.Fl T Ns Cm html
662and
663.Fl T Ns Cm xhtml
664front-ends to
665.Xr mandoc 1
666are unable to render them in any meaningful way.
667Furthermore, behaviour when encountering badly-nested blocks is not
668consistent across troff implementations, especially when using multiple
669levels of badly-nested blocks.
670.Sh SEE ALSO
671.Xr mandoc 1 ,
672.Xr eqn 7 ,
673.Xr man 7 ,
a4c7eb57 674.Xr mandoc_char 7 ,
60e1e752
SW
675.Xr mdoc 7 ,
676.Xr roff 7 ,
677.Xr tbl 7
678.Sh AUTHORS
679The
680.Nm
681library was written by
f88b6c16 682.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .