From 4a06b3fdd643918ebeb698c287ea9c58ae91f277 Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Thu, 19 Nov 2009 09:31:02 +0100 Subject: [PATCH] mandoc(1): Update to 1.9.14. - fixed proper font cascading (no double-mode in -Tascii) - added correct scoping of \f - fixed handling of \c for all input/outputs Thanks-to: Kristaps Dzonsons --- usr.bin/mandoc/Makefile | 2 +- usr.bin/mandoc/chars.c | 7 +- usr.bin/mandoc/chars.in | 11 +- usr.bin/mandoc/html.c | 226 +++++++++++------------ usr.bin/mandoc/html.h | 14 +- usr.bin/mandoc/main.c | 2 +- usr.bin/mandoc/man.7 | 68 +++++-- usr.bin/mandoc/man_html.c | 72 ++++---- usr.bin/mandoc/man_term.c | 160 +++++------------ usr.bin/mandoc/mandoc.1 | 195 ++++++++++++++------ usr.bin/mandoc/mandoc.c | 83 ++++++++- usr.bin/mandoc/mandoc_char.7 | 24 +-- usr.bin/mandoc/mdoc.7 | 65 ++++++- usr.bin/mandoc/mdoc_html.c | 17 +- usr.bin/mandoc/mdoc_term.c | 145 ++++++++------- usr.bin/mandoc/out.c | 191 +++++++++++++++++++- usr.bin/mandoc/out.h | 27 ++- usr.bin/mandoc/term.c | 336 ++++++++++++++++++----------------- usr.bin/mandoc/term.h | 21 ++- 19 files changed, 1041 insertions(+), 625 deletions(-) diff --git a/usr.bin/mandoc/Makefile b/usr.bin/mandoc/Makefile index 59a0af00d3..7909386c95 100644 --- a/usr.bin/mandoc/Makefile +++ b/usr.bin/mandoc/Makefile @@ -1,6 +1,6 @@ # $OpenBSD: Makefile,v 1.21 2009/10/27 21:40:07 schwarze Exp $ -VERSION=1.9.13 +VERSION=1.9.14 CFLAGS+=-DVERSION=\"${VERSION}\" WARNS?= 3 diff --git a/usr.bin/mandoc/chars.c b/usr.bin/mandoc/chars.c index ffa04e8d53..aad8fa6b8b 100644 --- a/usr.bin/mandoc/chars.c +++ b/usr.bin/mandoc/chars.c @@ -1,4 +1,4 @@ -/* $Id: chars.c,v 1.12 2009/11/01 07:44:32 kristaps Exp $ */ +/* $Id: chars.c,v 1.13 2009/11/05 07:21:01 kristaps Exp $ */ /* * Copyright (c) 2009 Kristaps Dzonsons * @@ -47,9 +47,10 @@ struct ln { #define BOTH(w, x, y, z, a, b) \ { NULL, (w), (y), (a), (x), (z), (b), CHARS_BOTH }, -static struct ln lines[LINES_MAX] = { +#define CHAR_TBL_START static struct ln lines[LINES_MAX] = { +#define CHAR_TBL_END }; + #include "chars.in" -}; struct tbl { enum chars type; diff --git a/usr.bin/mandoc/chars.in b/usr.bin/mandoc/chars.in index 1e5d3048e0..c2ac4c1604 100644 --- a/usr.bin/mandoc/chars.in +++ b/usr.bin/mandoc/chars.in @@ -1,4 +1,4 @@ -/* $Id: chars.in,v 1.18 2009/09/24 11:55:28 kristaps Exp $ */ +/* $Id: chars.in,v 1.19 2009/11/05 07:21:02 kristaps Exp $ */ /* * Copyright (c) 2009 Kristaps Dzonsons * @@ -30,11 +30,16 @@ * XXX - update LINES_MAX if adding more! */ +/* Non-breaking, non-collapsing space uses unit separator. */ +static const char ascii_nbrsp[2] = { 31, 0 }; + +CHAR_TBL_START + /* Spacing. */ CHAR("c", 1, "", 0, "", 0) CHAR("0", 1, " ", 1, " ", 7) CHAR(" ", 1, " ", 1, " ", 7) -CHAR("~", 1, " ", 1, " ", 6) +CHAR("~", 1, ascii_nbrsp, 1, " ", 6) CHAR("%", 1, "", 0, "", 0) CHAR("&", 1, "", 0, "", 0) CHAR("^", 1, "", 0, "", 0) @@ -416,3 +421,5 @@ CHAR("%0", 2, "%o", 2, "‰", 7) CHAR("fm", 2, "\'", 1, "′", 7) CHAR("sd", 2, "\"", 1, "″", 7) CHAR("mc", 2, "mu", 2, "µ", 6) + +CHAR_TBL_END diff --git a/usr.bin/mandoc/html.c b/usr.bin/mandoc/html.c index 4d8332f81b..4e66415600 100644 --- a/usr.bin/mandoc/html.c +++ b/usr.bin/mandoc/html.c @@ -1,4 +1,4 @@ -/* $Id: html.c,v 1.80 2009/11/02 06:22:44 kristaps Exp $ */ +/* $Id: html.c,v 1.91 2009/11/16 08:46:58 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -66,7 +66,13 @@ static const struct htmldata htmltags[TAG_MAX] = { {"base", HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */ }; -static const char *const htmlattrs[ATTR_MAX] = { +static const char *const htmlfonts[HTMLFONT_MAX] = { + "roman", + "bold", + "italic" +}; + +static const char *const htmlattrs[ATTR_MAX] = { "http-equiv", "content", "name", @@ -87,6 +93,14 @@ static const char *const htmlattrs[ATTR_MAX] = { extern int getsubopt(char **, char * const *, char **); #endif + +static void print_spec(struct html *, const char *, size_t); +static void print_res(struct html *, const char *, size_t); +static void print_ctag(struct html *, enum htmltag); +static int print_encode(struct html *, const char *, int); +static void print_metaf(struct html *, enum roffdeco); + + void * html_alloc(char *outopts) { @@ -186,12 +200,12 @@ print_gen_head(struct html *h) static void -print_spec(struct html *h, const char *p, int len) +print_spec(struct html *h, const char *p, size_t len) { const char *rhs; size_t sz; - rhs = chars_a2ascii(h->symtab, p, (size_t)len, &sz); + rhs = chars_a2ascii(h->symtab, p, len, &sz); if (NULL == rhs) return; @@ -200,12 +214,12 @@ print_spec(struct html *h, const char *p, int len) static void -print_res(struct html *h, const char *p, int len) +print_res(struct html *h, const char *p, size_t len) { const char *rhs; size_t sz; - rhs = chars_a2res(h->symtab, p, (size_t)len, &sz); + rhs = chars_a2res(h->symtab, p, len, &sz); if (NULL == rhs) return; @@ -213,110 +227,61 @@ print_res(struct html *h, const char *p, int len) } -static void -print_escape(struct html *h, const char **p) +struct tag * +print_ofont(struct html *h, enum htmlfont font) { - int j, type; - const char *wp; - - wp = *p; - type = 1; - - if (0 == *(++wp)) { - *p = wp; - return; - } + struct htmlpair tag; - if ('(' == *wp) { - wp++; - if (0 == *wp || 0 == *(wp + 1)) { - *p = 0 == *wp ? wp : wp + 1; - return; - } - - print_spec(h, wp, 2); - *p = ++wp; - return; + h->metal = h->metac; + h->metac = font; - } else if ('*' == *wp) { - if (0 == *(++wp)) { - *p = wp; - return; - } + /* FIXME: DECO_ROMAN should just close out preexisting. */ - switch (*wp) { - case ('('): - wp++; - if (0 == *wp || 0 == *(wp + 1)) { - *p = 0 == *wp ? wp : wp + 1; - return; - } + if (h->metaf && h->tags.head == h->metaf) + print_tagq(h, h->metaf); - print_res(h, wp, 2); - *p = ++wp; - return; - case ('['): - type = 0; - break; - default: - print_res(h, wp, 1); - *p = wp; - return; - } - - } else if ('f' == *wp) { - if (0 == *(++wp)) { - *p = wp; - return; - } - - switch (*wp) { - case ('B'): - /* TODO */ - break; - case ('I'): - /* TODO */ - break; - case ('P'): - /* FALLTHROUGH */ - case ('R'): - /* TODO */ - break; - default: - break; - } - - *p = wp; - return; + PAIR_CLASS_INIT(&tag, htmlfonts[font]); + h->metaf = print_otag(h, TAG_SPAN, 1, &tag); + return(h->metaf); +} - } else if ('[' != *wp) { - print_spec(h, wp, 1); - *p = wp; - return; - } - wp++; - for (j = 0; *wp && ']' != *wp; wp++, j++) - /* Loop... */ ; +static void +print_metaf(struct html *h, enum roffdeco deco) +{ + enum htmlfont font; - if (0 == *wp) { - *p = wp; - return; + switch (deco) { + case (DECO_PREVIOUS): + font = h->metal; + break; + case (DECO_ITALIC): + font = HTMLFONT_ITALIC; + break; + case (DECO_BOLD): + font = HTMLFONT_BOLD; + break; + case (DECO_ROMAN): + font = HTMLFONT_NONE; + break; + default: + abort(); + /* NOTREACHED */ } - if (type) - print_spec(h, wp - j, j); - else - print_res(h, wp - j, j); - - *p = wp; + (void)print_ofont(h, font); } -static void -print_encode(struct html *h, const char *p) +static int +print_encode(struct html *h, const char *p, int norecurse) { size_t sz; + int len, nospace; + const char *seq; + enum roffdeco deco; + + nospace = 0; for (; *p; p++) { sz = strcspn(p, "\\<>&"); @@ -325,19 +290,50 @@ print_encode(struct html *h, const char *p) p += /* LINTED */ sz; - if ('\\' == *p) { - print_escape(h, &p); + if ('<' == *p) { + printf("<"); + continue; + } else if ('>' == *p) { + printf(">"); + continue; + } else if ('&' == *p) { + printf("&"); continue; } else if ('\0' == *p) break; - if ('<' == *p) - printf("<"); - else if ('>' == *p) - printf(">"); - else if ('&' == *p) - printf("&"); + seq = ++p; + len = a2roffdeco(&deco, &seq, &sz); + + switch (deco) { + case (DECO_RESERVED): + print_res(h, seq, sz); + break; + case (DECO_SPECIAL): + print_spec(h, seq, sz); + break; + case (DECO_PREVIOUS): + /* FALLTHROUGH */ + case (DECO_BOLD): + /* FALLTHROUGH */ + case (DECO_ITALIC): + /* FALLTHROUGH */ + case (DECO_ROMAN): + if (norecurse) + break; + print_metaf(h, deco); + break; + default: + break; + } + + p += len - 1; + + if (DECO_NOSPACE == deco && '\0' == *(p + 1)) + nospace = 1; } + + return(nospace); } @@ -368,22 +364,16 @@ print_otag(struct html *h, enum htmltag tag, for (i = 0; i < sz; i++) { printf(" %s=\"", htmlattrs[p[i].key]); assert(p->val); - print_encode(h, p[i].val); + (void)print_encode(h, p[i].val, 1); putchar('\"'); } putchar('>'); h->flags |= HTML_NOSPACE; - if (HTML_CLRLINE & htmltags[tag].flags) - h->flags |= HTML_NEWLINE; - else - h->flags &= ~HTML_NEWLINE; - return(t); } -/* ARGSUSED */ static void print_ctag(struct html *h, enum htmltag tag) { @@ -391,10 +381,8 @@ print_ctag(struct html *h, enum htmltag tag) printf("", htmltags[tag].name); if (HTML_CLRLINE & htmltags[tag].flags) { h->flags |= HTML_NOSPACE; - h->flags |= HTML_NEWLINE; putchar('\n'); - } else - h->flags &= ~HTML_NEWLINE; + } } @@ -440,11 +428,9 @@ print_text(struct html *h, const char *p) if ( ! (h->flags & HTML_NOSPACE)) putchar(' '); - h->flags &= ~HTML_NOSPACE; - h->flags &= ~HTML_NEWLINE; - - if (p) - print_encode(h, p); + assert(p); + if ( ! print_encode(h, p, 0)) + h->flags &= ~HTML_NOSPACE; if (*p && 0 == *(p + 1)) switch (*p) { @@ -467,6 +453,8 @@ print_tagq(struct html *h, const struct tag *until) struct tag *tag; while ((tag = h->tags.head) != NULL) { + if (tag == h->metaf) + h->metaf = NULL; print_ctag(h, tag->tag); h->tags.head = tag->next; free(tag); @@ -484,6 +472,8 @@ print_stagq(struct html *h, const struct tag *suntil) while ((tag = h->tags.head) != NULL) { if (suntil && tag == suntil) return; + if (tag == h->metaf) + h->metaf = NULL; print_ctag(h, tag->tag); h->tags.head = tag->next; free(tag); diff --git a/usr.bin/mandoc/html.h b/usr.bin/mandoc/html.h index 608bb26d8e..0b76273da1 100644 --- a/usr.bin/mandoc/html.h +++ b/usr.bin/mandoc/html.h @@ -1,4 +1,4 @@ -/* $Id: html.h,v 1.17 2009/10/28 08:00:18 kristaps Exp $ */ +/* $Id: html.h,v 1.21 2009/11/16 06:07:49 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -62,6 +62,13 @@ enum htmlattr { ATTR_MAX }; +enum htmlfont { + HTMLFONT_NONE = 0, + HTMLFONT_BOLD, + HTMLFONT_ITALIC, + HTMLFONT_MAX +}; + struct tag { struct tag *next; enum htmltag tag; @@ -101,7 +108,6 @@ struct htmlpair { struct html { int flags; #define HTML_NOSPACE (1 << 0) -#define HTML_NEWLINE (1 << 1) #define HTML_IGNDELIM (1 << 2) struct tagq tags; struct ordq ords; @@ -112,12 +118,16 @@ struct html { char *style; char buf[BUFSIZ]; size_t buflen; + struct tag *metaf; + enum htmlfont metal; + enum htmlfont metac; }; struct roffsu; void print_gen_doctype(struct html *); void print_gen_head(struct html *); +struct tag *print_ofont(struct html *, enum htmlfont); struct tag *print_otag(struct html *, enum htmltag, int, const struct htmlpair *); void print_tagq(struct html *, const struct tag *); diff --git a/usr.bin/mandoc/main.c b/usr.bin/mandoc/main.c index 9ec8c1cfcf..368452c855 100644 --- a/usr.bin/mandoc/main.c +++ b/usr.bin/mandoc/main.c @@ -40,7 +40,7 @@ #ifdef __linux__ extern int getsubopt(char **, char * const *, char **); -extern size_t strlcat(char *, const char *, size_t); +extern size_t strlcat(char *, const char *, size_t); #endif typedef void (*out_mdoc)(void *, const struct mdoc *); diff --git a/usr.bin/mandoc/man.7 b/usr.bin/mandoc/man.7 index e8a18218df..8f7b87acbd 100644 --- a/usr.bin/mandoc/man.7 +++ b/usr.bin/mandoc/man.7 @@ -1,4 +1,4 @@ -.\" $Id: man.7,v 1.46 2009/11/02 17:07:30 kristaps Exp $ +.\" $Id: man.7,v 1.54 2009/11/16 08:46:59 kristaps Exp $ .\" .\" Copyright (c) 2009 Kristaps Dzonsons .\" @@ -14,7 +14,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd November 6, 2009 +.Dd November 19, 2009 .Dt MAN 7 .Os . @@ -67,14 +67,6 @@ line termination. Blank lines are acceptable; where found, the output will assert a vertical space. . -.Pp -The -.Sq \ec -escape is common in historical -.Nm -documents; if encountered at the end of a word, it ensures that the -subsequent word isn't off-set by whitespace. -. . .Ss Comments Text following a @@ -109,8 +101,45 @@ and .Ss Text Decoration Terms may be text-decorated using the .Sq \ef -escape followed by an indicator: B (bold), I, (italic), or P and R -(Roman, or reset). +escape followed by an indicator: B (bold), I, (italic), R (Roman), or P +(revert to previous mode): +.Pp +.D1 \efBbold\efR \efIitalic\efP +.Pp +A numerical representation 3, 2, or 1 (bold, italic, and Roman, +respectively) may be used instead. A text decoration is only valid, if +specified in free-form text, until the next macro invocation; if +specified within a macro, it's only valid until the macro closes scope. +Note that macros like +.Sx \&BR +open and close a font scope with each argument. +.Pp +Text may also be sized with the +.Sq \es +escape, whose syntax is one of +.Sq \es+-n +for one-digit numerals; +.Sq \es(+-nn +or +.Sq \es+-(nn +for two-digit numerals; and +.Sq \es[+-N] , +.Sq \es+-[N] , +.Sq \es'+-N' , +or +.Sq \es+-'N' +for arbitrary-digit numerals: +.Pp +.D1 \es+1bigger\es-1 +.D1 \es[+10]much bigger\es[-10] +.D1 \es+(10much bigger\es-(10 +.D1 \es+'100'much much bigger\es-'100' +.Pp +Both +.Sq \es +and +.Sq \ef +attributes are forgotten when entering or exiting a macro block. . . .Ss Whitespace @@ -890,8 +919,8 @@ End literal mode begun by . . .Ss \&i -Italicise arguments. If no arguments are specified, all subsequent text -is italicised. +Italicise arguments. Synonym for +.Sx \&I . .Pp See also .Sx \&B , @@ -947,14 +976,21 @@ See also This section documents compatibility with other roff implementations, at this time limited to .Xr groff 1 . -.Bl -hyphen +.Pp +.Bl -dash -compact +.It +The +.Xr groff 1 +.Sx \&i +macro will italicise all subsequent text if a line argument is not +provided. This behaviour is not implemented. .It In quoted literals, groff allowed pair-wise double-quotes to produce a standalone double-quote in formatted output. This idiosyncratic behaviour is no longer applicable. .It The -.Sq sp +.Sx \&sp macro does not accept negative numbers. .It Blocks of whitespace are stripped from both macro and free-form text diff --git a/usr.bin/mandoc/man_html.c b/usr.bin/mandoc/man_html.c index 8d8f2316d8..82b2228c10 100644 --- a/usr.bin/mandoc/man_html.c +++ b/usr.bin/mandoc/man_html.c @@ -1,4 +1,4 @@ -/* $Id: man_html.c,v 1.17 2009/10/30 18:53:08 kristaps Exp $ */ +/* $Id: man_html.c,v 1.24 2009/11/16 08:46:59 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -188,8 +188,19 @@ print_man_node(MAN_ARGS) break; case (MAN_TEXT): print_text(h, n->string); - break; + return; default: + /* + * Close out scope of font prior to opening a macro + * scope. Assert that the metafont is on the top of the + * stack (it's never nested). + */ + if (h->metaf) { + assert(h->metaf == t); + print_tagq(h, h->metaf); + assert(NULL == h->metaf); + t = h->tags.head; + } if (mans[n->tok].pre) child = (*mans[n->tok].pre)(m, n, h); break; @@ -198,6 +209,7 @@ print_man_node(MAN_ARGS) if (child && n->child) print_man_nodelist(m, n->child, h); + /* This will automatically close out any font scope. */ print_stagq(h, t); bufinit(h); @@ -241,8 +253,7 @@ man_root_pre(MAN_ARGS) if (m->vol) (void)strlcat(b, m->vol, BUFSIZ); - (void)snprintf(title, BUFSIZ - 1, - "%s(%d)", m->title, m->msec); + snprintf(title, BUFSIZ - 1, "%s(%d)", m->title, m->msec); PAIR_CLASS_INIT(&tag[0], "header"); bufcat_style(h, "width", "100%"); @@ -333,6 +344,7 @@ man_br_pre(MAN_ARGS) bufcat_su(h, "height", &su); PAIR_STYLE_INIT(&tag, h); print_otag(h, TAG_DIV, 1, &tag); + /* So the div isn't empty: */ print_text(h, "\\~"); @@ -384,30 +396,27 @@ man_alt_pre(MAN_ARGS) const struct man_node *nn; struct tag *t; int i; - struct htmlpair tagi, tagb, *tagp; - - PAIR_CLASS_INIT(&tagi, "italic"); - PAIR_CLASS_INIT(&tagb, "bold"); + enum htmlfont fp; for (i = 0, nn = n->child; nn; nn = nn->next, i++) { switch (n->tok) { case (MAN_BI): - tagp = i % 2 ? &tagi : &tagb; + fp = i % 2 ? HTMLFONT_ITALIC : HTMLFONT_BOLD; break; case (MAN_IB): - tagp = i % 2 ? &tagb : &tagi; + fp = i % 2 ? HTMLFONT_BOLD : HTMLFONT_ITALIC; break; case (MAN_RI): - tagp = i % 2 ? &tagi : NULL; + fp = i % 2 ? HTMLFONT_ITALIC : HTMLFONT_NONE; break; case (MAN_IR): - tagp = i % 2 ? NULL : &tagi; + fp = i % 2 ? HTMLFONT_NONE : HTMLFONT_ITALIC; break; case (MAN_BR): - tagp = i % 2 ? NULL : &tagb; + fp = i % 2 ? HTMLFONT_NONE : HTMLFONT_BOLD; break; case (MAN_RB): - tagp = i % 2 ? &tagb : NULL; + fp = i % 2 ? HTMLFONT_BOLD : HTMLFONT_NONE; break; default: abort(); @@ -417,12 +426,14 @@ man_alt_pre(MAN_ARGS) if (i) h->flags |= HTML_NOSPACE; - if (tagp) { - t = print_otag(h, TAG_SPAN, 1, tagp); - print_man_node(m, nn, h); - print_tagq(h, t); - } else - print_man_node(m, nn, h); + /* + * Open and close the scope with each argument, so that + * internal \f escapes, which are common, are also + * closed out with the scope. + */ + t = print_ofont(h, fp); + print_man_node(m, nn, h); + print_tagq(h, t); } return(0); @@ -435,6 +446,7 @@ man_SB_pre(MAN_ARGS) { struct htmlpair tag; + /* FIXME: print_ofont(). */ PAIR_CLASS_INIT(&tag, "small bold"); print_otag(h, TAG_SPAN, 1, &tag); return(1); @@ -509,19 +521,19 @@ man_PP_pre(MAN_ARGS) i = 0; - if (MAN_ROOT == n->parent->tok) { + if (MAN_ROOT == n->parent->type) { SCALE_HS_INIT(&su, INDENT); bufcat_su(h, "margin-left", &su); - i++; + i = 1; } - if (n->next && n->next->child) { + if (n->prev) { SCALE_VS_INIT(&su, 1); - bufcat_su(h, "margin-bottom", &su); - i++; + bufcat_su(h, "margin-top", &su); + i = 1; } PAIR_STYLE_INIT(&tag, h); - print_otag(h, TAG_DIV, i ? 1 : 0, &tag); + print_otag(h, TAG_DIV, i, &tag); return(1); } @@ -648,10 +660,8 @@ man_HP_pre(MAN_ARGS) static int man_B_pre(MAN_ARGS) { - struct htmlpair tag; - PAIR_CLASS_INIT(&tag, "bold"); - print_otag(h, TAG_SPAN, 1, &tag); + print_ofont(h, HTMLFONT_BOLD); return(1); } @@ -660,10 +670,8 @@ man_B_pre(MAN_ARGS) static int man_I_pre(MAN_ARGS) { - struct htmlpair tag; - PAIR_CLASS_INIT(&tag, "italic"); - print_otag(h, TAG_SPAN, 1, &tag); + print_ofont(h, HTMLFONT_ITALIC); return(1); } diff --git a/usr.bin/mandoc/man_term.c b/usr.bin/mandoc/man_term.c index e2e8ea4b71..f5c1051fc6 100644 --- a/usr.bin/mandoc/man_term.c +++ b/usr.bin/mandoc/man_term.c @@ -1,4 +1,4 @@ -/* $Id: man_term.c,v 1.47 2009/10/30 18:53:08 kristaps Exp $ */ +/* $Id: man_term.c,v 1.54 2009/11/12 08:21:05 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -73,7 +73,7 @@ static int a2height(const struct man_node *); static void print_man_head(struct termp *, const struct man_meta *); -static void print_man_body(DECL_ARGS); +static void print_man_nodelist(DECL_ARGS); static void print_man_node(DECL_ARGS); static void print_man_foot(struct termp *, const struct man_meta *); @@ -85,7 +85,6 @@ static int pre_BI(DECL_ARGS); static int pre_HP(DECL_ARGS); static int pre_I(DECL_ARGS); static int pre_IP(DECL_ARGS); -static int pre_IR(DECL_ARGS); static int pre_PP(DECL_ARGS); static int pre_RB(DECL_ARGS); static int pre_RI(DECL_ARGS); @@ -97,18 +96,14 @@ static int pre_br(DECL_ARGS); static int pre_fi(DECL_ARGS); static int pre_ign(DECL_ARGS); static int pre_nf(DECL_ARGS); -static int pre_r(DECL_ARGS); static int pre_sp(DECL_ARGS); -static void post_B(DECL_ARGS); -static void post_I(DECL_ARGS); static void post_IP(DECL_ARGS); static void post_HP(DECL_ARGS); static void post_RS(DECL_ARGS); static void post_SH(DECL_ARGS); static void post_SS(DECL_ARGS); static void post_TP(DECL_ARGS); -static void post_i(DECL_ARGS); static const struct termact termacts[MAN_MAX] = { { pre_br, NULL }, /* br */ @@ -122,22 +117,22 @@ static const struct termact termacts[MAN_MAX] = { { pre_IP, post_IP }, /* IP */ { pre_HP, post_HP }, /* HP */ { NULL, NULL }, /* SM */ - { pre_B, post_B }, /* SB */ + { pre_B, NULL }, /* SB */ { pre_BI, NULL }, /* BI */ { pre_BI, NULL }, /* IB */ { pre_RB, NULL }, /* BR */ { pre_RB, NULL }, /* RB */ { NULL, NULL }, /* R */ - { pre_B, post_B }, /* B */ - { pre_I, post_I }, /* I */ - { pre_IR, NULL }, /* IR */ + { pre_B, NULL }, /* B */ + { pre_I, NULL }, /* I */ + { pre_RI, NULL }, /* IR */ { pre_RI, NULL }, /* RI */ { NULL, NULL }, /* na */ - { pre_I, post_i }, /* i */ + { pre_I, NULL }, /* i */ { pre_sp, NULL }, /* sp */ { pre_nf, NULL }, /* nf */ { pre_fi, NULL }, /* fi */ - { pre_r, NULL }, /* r */ + { NULL, NULL }, /* r */ { NULL, NULL }, /* RE */ { pre_RS, post_RS }, /* RS */ { pre_ign, NULL }, /* DT */ @@ -178,7 +173,7 @@ terminal_man(void *arg, const struct man *man) mt.offset = INDENT; if (n->child) - print_man_body(p, &mt, n->child, m); + print_man_nodelist(p, &mt, n->child, m); print_man_foot(p, m); } @@ -242,40 +237,11 @@ static int pre_I(DECL_ARGS) { - p->under++; + term_fontrepl(p, TERMFONT_UNDER); return(1); } -/* ARGSUSED */ -static int -pre_r(DECL_ARGS) -{ - - p->bold = p->under = 0; - return(1); -} - - -/* ARGSUSED */ -static void -post_i(DECL_ARGS) -{ - - if (n->nchild) - p->under--; -} - - -/* ARGSUSED */ -static void -post_I(DECL_ARGS) -{ - - p->under--; -} - - /* ARGSUSED */ static int pre_fi(DECL_ARGS) @@ -297,26 +263,6 @@ pre_nf(DECL_ARGS) } -/* ARGSUSED */ -static int -pre_IR(DECL_ARGS) -{ - const struct man_node *nn; - int i; - - for (i = 0, nn = n->child; nn; nn = nn->next, i++) { - if ( ! (i % 2)) - p->under++; - if (i > 0) - p->flags |= TERMP_NOSPACE; - print_man_node(p, mt, nn, m); - if ( ! (i % 2)) - p->under--; - } - return(0); -} - - /* ARGSUSED */ static int pre_RB(DECL_ARGS) @@ -326,19 +272,16 @@ pre_RB(DECL_ARGS) for (i = 0, nn = n->child; nn; nn = nn->next, i++) { if (i % 2 && MAN_RB == n->tok) - p->bold++; + term_fontrepl(p, TERMFONT_BOLD); else if ( ! (i % 2) && MAN_RB != n->tok) - p->bold++; + term_fontrepl(p, TERMFONT_BOLD); + else + term_fontrepl(p, TERMFONT_NONE); if (i > 0) p->flags |= TERMP_NOSPACE; print_man_node(p, mt, nn, m); - - if (i % 2 && MAN_RB == n->tok) - p->bold--; - else if ( ! (i % 2) && MAN_RB != n->tok) - p->bold--; } return(0); } @@ -352,13 +295,17 @@ pre_RI(DECL_ARGS) int i; for (i = 0, nn = n->child; nn; nn = nn->next, i++) { - if ( ! (i % 2)) - p->under++; + if (i % 2 && MAN_RI == n->tok) + term_fontrepl(p, TERMFONT_UNDER); + else if ( ! (i % 2) && MAN_RI != n->tok) + term_fontrepl(p, TERMFONT_UNDER); + else + term_fontrepl(p, TERMFONT_NONE); + if (i > 0) p->flags |= TERMP_NOSPACE; + print_man_node(p, mt, nn, m); - if ( ! (i % 2)) - p->under--; } return(0); } @@ -373,26 +320,18 @@ pre_BI(DECL_ARGS) for (i = 0, nn = n->child; nn; nn = nn->next, i++) { if (i % 2 && MAN_BI == n->tok) - p->under++; + term_fontrepl(p, TERMFONT_UNDER); else if (i % 2) - p->bold++; + term_fontrepl(p, TERMFONT_BOLD); else if (MAN_BI == n->tok) - p->bold++; + term_fontrepl(p, TERMFONT_BOLD); else - p->under++; + term_fontrepl(p, TERMFONT_UNDER); if (i) p->flags |= TERMP_NOSPACE; - print_man_node(p, mt, nn, m); - if (i % 2 && MAN_BI == n->tok) - p->under--; - else if (i % 2) - p->bold--; - else if (MAN_BI == n->tok) - p->bold--; - else - p->under--; + print_man_node(p, mt, nn, m); } return(0); } @@ -403,20 +342,11 @@ static int pre_B(DECL_ARGS) { - p->bold++; + term_fontrepl(p, TERMFONT_BOLD); return(1); } -/* ARGSUSED */ -static void -post_B(DECL_ARGS) -{ - - p->bold--; -} - - /* ARGSUSED */ static int pre_sp(DECL_ARGS) @@ -720,7 +650,7 @@ pre_SS(DECL_ARGS) term_vspace(p); break; case (MAN_HEAD): - p->bold++; + term_fontrepl(p, TERMFONT_BOLD); p->offset = HALFINDENT; break; case (MAN_BODY): @@ -742,7 +672,6 @@ post_SS(DECL_ARGS) switch (n->type) { case (MAN_HEAD): term_newln(p); - p->bold--; break; case (MAN_BODY): term_newln(p); @@ -769,7 +698,7 @@ pre_SH(DECL_ARGS) term_vspace(p); break; case (MAN_HEAD): - p->bold++; + term_fontrepl(p, TERMFONT_BOLD); p->offset = 0; break; case (MAN_BODY): @@ -791,7 +720,6 @@ post_SH(DECL_ARGS) switch (n->type) { case (MAN_HEAD): term_newln(p); - p->bold--; break; case (MAN_BODY): term_newln(p); @@ -855,7 +783,7 @@ post_RS(DECL_ARGS) static void print_man_node(DECL_ARGS) { - int c, sz; + int c; c = 1; @@ -865,46 +793,42 @@ print_man_node(DECL_ARGS) term_vspace(p); break; } - /* - * Note! This is hacky. Here, we recognise the `\c' - * escape embedded in so many -man pages. It's supposed - * to remove the subsequent space, so we mark NOSPACE if - * it's encountered in the string. - */ - sz = (int)strlen(n->string); + term_word(p, n->string); - if (sz >= 2 && n->string[sz - 1] == 'c' && - n->string[sz - 2] == '\\') - p->flags |= TERMP_NOSPACE; + /* FIXME: this means that macro lines are munged! */ + if (MANT_LITERAL & mt->fl) { p->flags |= TERMP_NOSPACE; term_flushln(p); } break; default: + term_fontrepl(p, TERMFONT_NONE); if (termacts[n->tok].pre) c = (*termacts[n->tok].pre)(p, mt, n, m); break; } if (c && n->child) - print_man_body(p, mt, n->child, m); + print_man_nodelist(p, mt, n->child, m); - if (MAN_TEXT != n->type) + if (MAN_TEXT != n->type) { if (termacts[n->tok].post) (*termacts[n->tok].post)(p, mt, n, m); + term_fontrepl(p, TERMFONT_NONE); + } } static void -print_man_body(DECL_ARGS) +print_man_nodelist(DECL_ARGS) { print_man_node(p, mt, n, m); if ( ! n->next) return; - print_man_body(p, mt, n->next, m); + print_man_nodelist(p, mt, n->next, m); } @@ -913,6 +837,8 @@ print_man_foot(struct termp *p, const struct man_meta *meta) { char buf[DATESIZ]; + term_fontrepl(p, TERMFONT_NONE); + time2a(meta->date, buf, DATESIZ); term_vspace(p); diff --git a/usr.bin/mandoc/mandoc.1 b/usr.bin/mandoc/mandoc.1 index 80bdc8162a..b2f261b48e 100644 --- a/usr.bin/mandoc/mandoc.1 +++ b/usr.bin/mandoc/mandoc.1 @@ -1,4 +1,4 @@ -.\" $Id: mandoc.1,v 1.45 2009/10/26 15:44:51 kristaps Exp $ +.\" $Id: mandoc.1,v 1.48 2009/11/16 09:52:47 kristaps Exp $ .\" .\" Copyright (c) 2009 Kristaps Dzonsons .\" @@ -14,7 +14,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd November 11, 2009 +.Dd November 19, 2009 .Dt MANDOC 1 .Os . @@ -96,55 +96,14 @@ or .Xr man 7 text from stdin, implying .Fl m Ns Ar andoc , -and prints 78-column backspace-encoded output to stdout as if +and produces .Fl T Ns Ar ascii -were provided. +output. . .Pp .Ex -std mandoc . . -.Ss Punctuation and Spacing -If punctuation is set apart from words, such as in the phrase -.Dq to be \&, or not to be , -it's processed by -.Nm -according to the following rules: opening punctuation -.Po -.Sq \&( , -.Sq \&[ , -and -.Sq \&{ -.Pc -is not followed by a space; closing punctuation -.Po -.Sq \&. , -.Sq \&, , -.Sq \&; , -.Sq \&: , -.Sq \&? , -.Sq \&! , -.Sq \&) , -.Sq \&] -and -.Sq \&} -.Pc -is not preceded by whitespace. -. -.Pp -If the input is -.Xr mdoc 7 , -these rules are also applied to macro arguments when appropriate. -. -.Pp -White-space, in non-literal (normal) mode, is stripped from input and -replaced on output by a single space. Thus, if you wish to preserve multiple -spaces, they must be space-escaped or used in a literal display mode, e.g., -.Sq \&Bd \-literal -in -.Xr mdoc 7 . -. -. .Ss Input Formats The .Nm @@ -195,15 +154,18 @@ The .Nm utility accepts the following .Fl T -arguments: +arguments (see +.Sx OUTPUT ) : . .Bl -tag -width Ds .It Fl T Ns Ar ascii Produce 7-bit ASCII output, backspace-encoded for bold and underline -styles. This is the default. +styles. This is the default. See +.Sx ASCII Output . . .It Fl T Ns Ar html -Produce strict HTML-4.01 output, with a sane default style. +Produce strict HTML-4.01 output, with a sane default style. See +.Sx HTML Output . . .It Fl T Ns Ar tree Produce an indented parse tree. @@ -255,10 +217,11 @@ Don't halt when encountering parse errors. Useful with over a large set of manuals passed on the command line. .El . +. .Ss Output Options For the time being, only .Fl T Ns Ar html -is the only mode with output options: +accepts output options: .Bl -tag -width Ds .It Fl O Ns Ar style=style.css The file @@ -292,6 +255,99 @@ If no section is included, section 1 is assumed. The default is not to present a hyperlink. .El . +. +.Sh OUTPUT +This section documents output details of +.Nm . +In general, output conforms to the traditional manual style of a header, +a body composed of sections and sub-sections, and a footer. +.Pp +The text style of output characters (non-macro characters, punctuation, +and white-space) is dictated by context. +.Pp +White-space is generally stripped from input. This can be changed with +character escapes (specified in +.Xr mandoc_char 7 ) +or literal modes (specified in +.Xr mdoc 7 +and +.Xr man 7 ) . +.Pp +If non-macro punctuation is set apart from words, such as in the phrase +.Dq to be \&, or not to be , +it's processed by +.Nm , +regardless of output format, according to the following rules: opening +punctuation +.Po +.Sq \&( , +.Sq \&[ , +and +.Sq \&{ +.Pc +is not followed by a space; closing punctuation +.Po +.Sq \&. , +.Sq \&, , +.Sq \&; , +.Sq \&: , +.Sq \&? , +.Sq \&! , +.Sq \&) , +.Sq \&] +and +.Sq \&} +.Pc +is not preceded by white-space. +. +.Pp +If the input is +.Xr mdoc 7 , +however, these rules are also applied to macro arguments when appropriate. +. +. +.Ss ASCII Output +Output produced by +.Fl T Ns Ar ascii , +which is the default, is rendered in standard 7-bit ASCII documented in +.Xr ascii 7 . +.Pp +Font styles are applied by using back-spaced encoding such that an +underlined character +.Sq c +is rendered as +.Sq _ Ns \e[bs] Ns c , +where +.Sq \e[bs] +is the back-space character number 8. Emboldened characters are rendered as +.Sq c Ns \e[bs] Ns c . +.Pp +The special characters documented in +.Xr mandoc_char 7 +are rendered best-effort in an ASCII equivalent. +.Pp +Output width is limited to 78 visible columns unless literal input lines +exceed this limit. +. +. +.Ss HTML Output +Output produced by +.Fl T Ns Ar html +comforms to HTML-4.01 strict. +.Pp +Font styles and page structure are applied using CSS2. By default, no +font style is applied to any text, although CSS2 is hard-coded to format +the basic structure of output. +.Pp +The +.Pa example.style.css +file documents the range of styles applied to output and, if used, will +cause rendered documents to appear as they do in +.Fl T Ns Ar ascii . +.Pp +Special characters are rendered in decimal-encoded UTF-8. +. +. .Sh EXAMPLES To page manuals to the terminal: . @@ -304,7 +360,7 @@ To produce HTML manuals with .Ar style.css as the style-sheet: .Pp -.D1 % mandoc \-Thtml -ostyle=style.css mdoc.7 > mdoc.7.html +.D1 % mandoc \-Thtml -Ostyle=style.css mdoc.7 > mdoc.7.html .Pp To check over a large set of manuals: . @@ -320,7 +376,7 @@ compatibility with Each input and output format is separately noted. . . -.Ss ASCII output +.Ss ASCII Compatibility .Bl -bullet -compact .It The @@ -380,10 +436,21 @@ retains spaces. Sentences are unilaterally monospaced. .El . -.Ss HTML output +. +.Ss HTML Compatibility .Bl -bullet -compact .It The +.Sq \efP +escape will revert the font to the previous +.Sq \ef +escape, not to the last rendered decoration, which is now dictated by +CSS instead of hard-coded. It also will not span past the current +scope, for the same reason. Note that in +.Sx ASCII Output +mode, this will work fine. +.It +The .Xr mdoc 7 .Sq \&Bl \-hang and @@ -399,7 +466,8 @@ and .Sq TP lists render similarly. .El -.\" SECTION +. +. .Sh SEE ALSO .Xr mandoc_char 7 , .Xr mdoc 7 , @@ -411,11 +479,28 @@ The utility was written by .An Kristaps Dzonsons Aq kristaps@kth.se . . +. .Sh CAVEATS +The +.Fl T Ns Ar html +CSS2 styling used for +.Fl m Ns Ar doc +input lists does not render properly in brain-dead browsers, such as +Internet Explorer 6 and earlier. +.Pp In .Fl T Ns Ar html , the maximum size of an element attribute is determined by .Dv BUFSIZ , which is usually 1024 bytes. Be aware of this when setting long link -formats with -.Fl O Ns Ar man=fmt . +formats, e.g., +.Fl O Ns Ar style=really/long/link . +.Pp +The +.Fl T Ns Ar html +output mode doesn't render the +.Sq \es +font size escape documented in +.Xr mdoc 7 +and +.Xr man 7 . diff --git a/usr.bin/mandoc/mandoc.c b/usr.bin/mandoc/mandoc.c index d0e23ea4b0..6bc91a1395 100644 --- a/usr.bin/mandoc/mandoc.c +++ b/usr.bin/mandoc/mandoc.c @@ -1,4 +1,4 @@ -/* $Id: mandoc.c,v 1.7 2009/11/02 06:22:45 kristaps Exp $ */ +/* $Id: mandoc.c,v 1.8 2009/11/05 10:16:01 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -35,7 +35,9 @@ static int a2time(time_t *, const char *, const char *); int mandoc_special(const char *p) { - int c; + int terminator; /* Terminator for \s. */ + int lim; /* Limit for N in \s. */ + int c, i; if ('\\' != *p++) return(0); @@ -74,9 +76,84 @@ mandoc_special(const char *p) case ('e'): return(2); case ('f'): - if (0 == *++p || ! isgraph((u_char)*p)) + if ('\0' == *++p || ! isgraph((u_char)*p)) return(0); return(3); + case ('s'): + if ('\0' == *++p) + return(2); + + c = 2; + terminator = 0; + lim = 1; + + if (*p == '\'') { + lim = 0; + terminator = 1; + ++p; + ++c; + } else if (*p == '[') { + lim = 0; + terminator = 2; + ++p; + ++c; + } else if (*p == '(') { + lim = 2; + terminator = 3; + ++p; + ++c; + } + + if (*p == '+' || *p == '-') { + ++p; + ++c; + } + + if (*p == '\'') { + if (terminator) + return(0); + lim = 0; + terminator = 1; + ++p; + ++c; + } else if (*p == '[') { + if (terminator) + return(0); + lim = 0; + terminator = 2; + ++p; + ++c; + } else if (*p == '(') { + if (terminator) + return(0); + lim = 2; + terminator = 3; + ++p; + ++c; + } + + /* TODO: needs to handle floating point. */ + + if ( ! isdigit((u_char)*p)) + return(0); + + for (i = 0; isdigit((u_char)*p); i++) { + if (lim && i >= lim) + break; + ++p; + ++c; + } + + if (terminator && terminator < 3) { + if (1 == terminator && *p != '\'') + return(0); + if (2 == terminator && *p != ']') + return(0); + ++p; + ++c; + } + + return(c); case ('*'): if (0 == *++p || ! isgraph((u_char)*p)) return(0); diff --git a/usr.bin/mandoc/mandoc_char.7 b/usr.bin/mandoc/mandoc_char.7 index 6e268188fa..221eba3c61 100644 --- a/usr.bin/mandoc/mandoc_char.7 +++ b/usr.bin/mandoc/mandoc_char.7 @@ -1,4 +1,4 @@ -.\" $Id: mandoc_char.7,v 1.27 2009/10/17 04:37:52 kristaps Exp $ +.\" $Id: mandoc_char.7,v 1.29 2009/11/16 09:52:47 kristaps Exp $ .\" .\" Copyright (c) 2009 Kristaps Dzonsons .\" @@ -14,7 +14,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd November 11, 2009 +.Dd November 19, 2009 .Dt MANDOC_CHAR 7 .Os . @@ -76,25 +76,6 @@ Note that each output mode will have a different rendering of the characters. It's guaranteed that each input symbol will correspond to a (more or less) meaningful output rendering, regardless the mode. . -.Ss ASCII output -Formatting documents with ASCII output results in a 7-bit ASCII -approximation of zero or more characters, for example, the -.Dq aleph -character -.Sq \e(Ah -will render as -.Sq N . -Approximations are a best-effort, and naturally some clarity will be lost. -. -.Ss HTML output -The HTML output mode uses decimal-encoded UTF-8 for sequences, for -example, the -.Dq aleph -character -.Sq \e(Ah -will render as -.Sq ℵ . -. . .Sh SPECIAL CHARACTERS These are the preferred input symbols for producing special characters. @@ -110,6 +91,7 @@ Spacing: .It \e& Ta zero-width space .It \e| Ta zero-width space .It \e0 Ta breaking, non-collapsing digit-width space +.It \ec Ta removes any trailing space (if applicable) .El . .Pp diff --git a/usr.bin/mandoc/mdoc.7 b/usr.bin/mandoc/mdoc.7 index d85b08a1f1..3b39324741 100644 --- a/usr.bin/mandoc/mdoc.7 +++ b/usr.bin/mandoc/mdoc.7 @@ -1,4 +1,4 @@ -.\" $Id: mdoc.7,v 1.73 2009/11/02 11:39:40 kristaps Exp $ +.\" $Id: mdoc.7,v 1.78 2009/11/16 09:52:47 kristaps Exp $ .\" .\" Copyright (c) 2009 Kristaps Dzonsons .\" @@ -14,7 +14,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd November 11, 2009 +.Dd November 19, 2009 .Dt MDOC 7 .Os . @@ -131,10 +131,50 @@ and .Ss Text Decoration Terms may be text-decorated using the .Sq \ef -escape followed by an indicator: B (bold), I, (italic), or P and R -(Roman, or reset). This form is not recommended for +escape followed by an indicator: B (bold), I, (italic), R (Roman), or P +(revert to previous mode): +.Pp +.D1 \efBbold\efR \efIitalic\efP +.Pp +A numerical representation 3, 2, or 1 (bold, italic, and Roman, +respectively) may be used instead. A text decoration is valid within +the current font scope only: if a macro opens a font scope alongside +its own scope, such as +.Sx \&Bf +.Cm \&Sy , +in-scope invocations of +.Sq \ef +are only valid within the font scope of the macro. If +.Sq \ef +is specified outside of any font scope, such as in unenclosed, free-form +text, it will affect the remainder of the document. +.Pp +Text may also be sized with the +.Sq \es +escape, whose syntax is one of +.Sq \es+-n +for one-digit numerals; +.Sq \es(+-nn +or +.Sq \es+-(nn +for two-digit numerals; and +.Sq \es[+-N] , +.Sq \es+-[N] , +.Sq \es'+-N' , +or +.Sq \es+-'N' +for arbitrary-digit numerals: +.Pp +.D1 \es+1bigger\es-1 +.D1 \es[+10]much bigger\es[-10] +.D1 \es+(10much bigger\es-(10 +.D1 \es+'100'much much bigger\es-'100' +.Pp +Note these forms are +.Em not +recommended for .Nm , -which encourages semantic, not presentation, annotation. +which encourages semantic annotation. . . .Ss Predefined Strings @@ -1790,9 +1830,16 @@ file re-write .Pp .Bl -dash -compact .It +.Xr groff 1 +behaves strangely (even between versions) when specifying +.Sq \ef +escapes within line-macro scopes. These aberrations have been +normalised. +.It Negative scaling units are now truncated to zero instead of creating interesting conditions, such as with -.Sq \&sp -1i . +.Sx \&sp +.Cm \-1i . Furthermore, the .Sq f scaling unit, while accepted, is rendered as the default unit. @@ -1802,7 +1849,8 @@ standalone double-quote in formatted output. This idiosyncratic behaviour is no longer applicable. .It Display types -.Sx \&Bd Fl center +.Sx \&Bd +.Fl center and .Fl right are aliases for @@ -1832,7 +1880,8 @@ made historic groff .Qq go orbital but is a proper delimiter in this implementation. .It -.Sx \&It Fl nested +.Sx \&It +.Fl nested is assumed for all lists (it wasn't in historic groff): any list may be nested and .Fl enum diff --git a/usr.bin/mandoc/mdoc_html.c b/usr.bin/mandoc/mdoc_html.c index 444b9f9000..36d091a507 100644 --- a/usr.bin/mandoc/mdoc_html.c +++ b/usr.bin/mandoc/mdoc_html.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_html.c,v 1.46 2009/10/31 08:34:12 kristaps Exp $ */ +/* $Id: mdoc_html.c,v 1.48 2009/11/16 08:46:59 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -424,7 +424,7 @@ print_mdoc_node(MDOC_ARGS) break; case (MDOC_TEXT): print_text(h, n->string); - break; + return; default: if (mdocs[n->tok].pre) child = (*mdocs[n->tok].pre)(m, n, h); @@ -441,8 +441,6 @@ print_mdoc_node(MDOC_ARGS) case (MDOC_ROOT): mdoc_root_post(m, n, h); break; - case (MDOC_TEXT): - break; default: if (mdocs[n->tok].post) (*mdocs[n->tok].post)(m, n, h); @@ -725,12 +723,11 @@ mdoc_nm_pre(MDOC_ARGS) { struct htmlpair tag; - if ( ! (HTML_NEWLINE & h->flags)) - if (SEC_SYNOPSIS == n->sec) { - bufcat_style(h, "clear", "both"); - PAIR_STYLE_INIT(&tag, h); - print_otag(h, TAG_BR, 1, &tag); - } + if (SEC_SYNOPSIS == n->sec && n->prev) { + bufcat_style(h, "clear", "both"); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_BR, 1, &tag); + } PAIR_CLASS_INIT(&tag, "name"); print_otag(h, TAG_SPAN, 1, &tag); diff --git a/usr.bin/mandoc/mdoc_term.c b/usr.bin/mandoc/mdoc_term.c index 3e00b72806..40b732db37 100644 --- a/usr.bin/mandoc/mdoc_term.c +++ b/usr.bin/mandoc/mdoc_term.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_term.c,v 1.100 2009/10/31 06:50:25 kristaps Exp $ */ +/* $Id: mdoc_term.c,v 1.102 2009/11/12 05:50:12 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -59,9 +59,9 @@ static int arg_listtype(const struct mdoc_node *); static void print_bvspace(struct termp *, const struct mdoc_node *, const struct mdoc_node *); -static void print_node(DECL_ARGS); -static void print_head(DECL_ARGS); -static void print_body(DECL_ARGS); +static void print_mdoc_node(DECL_ARGS); +static void print_mdoc_head(DECL_ARGS); +static void print_mdoc_nodelist(DECL_ARGS); static void print_foot(DECL_ARGS); #ifdef __linux__ @@ -116,6 +116,7 @@ static int termp_fo_pre(DECL_ARGS); static int termp_ft_pre(DECL_ARGS); static int termp_in_pre(DECL_ARGS); static int termp_it_pre(DECL_ARGS); +static int termp_li_pre(DECL_ARGS); static int termp_lk_pre(DECL_ARGS); static int termp_nd_pre(DECL_ARGS); static int termp_nm_pre(DECL_ARGS); @@ -167,7 +168,7 @@ static const struct termact termacts[MDOC_MAX] = { { termp_ft_pre, termp_ft_post }, /* Ft */ { termp_bold_pre, NULL }, /* Ic */ { termp_in_pre, termp_in_post }, /* In */ - { NULL, NULL }, /* Li */ + { termp_li_pre, NULL }, /* Li */ { termp_nd_pre, NULL }, /* Nd */ { termp_nm_pre, NULL }, /* Nm */ { termp_op_pre, termp_op_post }, /* Op */ @@ -283,36 +284,36 @@ terminal_mdoc(void *arg, const struct mdoc *mdoc) n = mdoc_node(mdoc); m = mdoc_meta(mdoc); - print_head(p, NULL, m, n); + print_mdoc_head(p, NULL, m, n); if (n->child) - print_body(p, NULL, m, n->child); + print_mdoc_nodelist(p, NULL, m, n->child); print_foot(p, NULL, m, n); } static void -print_body(DECL_ARGS) +print_mdoc_nodelist(DECL_ARGS) { - print_node(p, pair, m, n); + print_mdoc_node(p, pair, m, n); if (n->next) - print_body(p, pair, m, n->next); + print_mdoc_nodelist(p, pair, m, n->next); } /* ARGSUSED */ static void -print_node(DECL_ARGS) +print_mdoc_node(DECL_ARGS) { - int chld, bold, under; + int chld; + const void *font; struct termpair npair; size_t offset, rmargin; chld = 1; offset = p->offset; rmargin = p->rmargin; - bold = p->bold; - under = p->under; + font = term_fontq(p); memset(&npair, 0, sizeof(struct termpair)); npair.ppair = pair; @@ -322,17 +323,11 @@ print_node(DECL_ARGS) chld = (*termacts[n->tok].pre)(p, &npair, m, n); } else term_word(p, n->string); - if (chld && n->child) - print_body(p, &npair, m, n->child); - /* - * XXX - if bold/under were to span scopes, this wouldn't be - * possible, but because decoration is always in-scope, we can - * get away with this. - */ + if (chld && n->child) + print_mdoc_nodelist(p, &npair, m, n->child); - p->bold = bold; - p->under = under; + term_fontpopq(p, font); if (MDOC_TEXT != n->type) if (termacts[n->tok].post) @@ -349,6 +344,8 @@ print_foot(DECL_ARGS) { char buf[DATESIZ], os[BUFSIZ]; + term_fontrepl(p, TERMFONT_NONE); + /* * Output the footer in new-groff style, that is, three columns * with the middle being the manual date and flanking columns @@ -390,10 +387,9 @@ print_foot(DECL_ARGS) } -/* FIXME: put in utility library. */ /* ARGSUSED */ static void -print_head(DECL_ARGS) +print_mdoc_head(DECL_ARGS) { char buf[BUFSIZ], title[BUFSIZ]; @@ -779,7 +775,7 @@ termp_it_pre(DECL_ARGS) switch (type) { case (MDOC_Diag): if (MDOC_HEAD == n->type) - p->bold++; + term_fontpush(p, TERMFONT_BOLD); break; default: break; @@ -917,16 +913,16 @@ termp_it_pre(DECL_ARGS) if (MDOC_HEAD == n->type) switch (type) { case (MDOC_Bullet): - p->bold++; + term_fontpush(p, TERMFONT_BOLD); term_word(p, "\\[bu]"); - p->bold--; + term_fontpop(p); break; case (MDOC_Dash): /* FALLTHROUGH */ case (MDOC_Hyphen): - p->bold++; + term_fontpush(p, TERMFONT_BOLD); term_word(p, "\\(hy"); - p->bold--; + term_fontpop(p); break; case (MDOC_Enum): (pair->ppair->ppair->count)++; @@ -1008,7 +1004,9 @@ termp_nm_pre(DECL_ARGS) if (SEC_SYNOPSIS == n->sec) term_newln(p); - p->bold++; + + term_fontpush(p, TERMFONT_BOLD); + if (NULL == n->child) term_word(p, m->name); return(1); @@ -1020,7 +1018,7 @@ static int termp_fl_pre(DECL_ARGS) { - p->bold++; + term_fontpush(p, TERMFONT_BOLD); term_word(p, "\\-"); p->flags |= TERMP_NOSPACE; return(1); @@ -1118,9 +1116,9 @@ termp_rv_pre(DECL_ARGS) term_word(p, "The"); for (nn = n->child; nn; nn = nn->next) { - p->bold++; + term_fontpush(p, TERMFONT_BOLD); term_word(p, nn->string); - p->bold--; + term_fontpop(p); p->flags |= TERMP_NOSPACE; if (nn->next && NULL == nn->next->next) term_word(p, "(), and"); @@ -1138,9 +1136,9 @@ termp_rv_pre(DECL_ARGS) term_word(p, "the value 0 if successful; otherwise the value " "-1 is returned and the global variable"); - p->under++; + term_fontpush(p, TERMFONT_UNDER); term_word(p, "errno"); - p->under--; + term_fontpop(p); term_word(p, "is set to indicate the error."); @@ -1157,9 +1155,9 @@ termp_ex_pre(DECL_ARGS) term_word(p, "The"); for (nn = n->child; nn; nn = nn->next) { - p->bold++; + term_fontpush(p, TERMFONT_BOLD); term_word(p, nn->string); - p->bold--; + term_fontpop(p); p->flags |= TERMP_NOSPACE; if (nn->next && NULL == nn->next->next) term_word(p, ", and"); @@ -1261,7 +1259,7 @@ static int termp_bold_pre(DECL_ARGS) { - p->bold++; + term_fontpush(p, TERMFONT_BOLD); return(1); } @@ -1295,7 +1293,7 @@ termp_sh_pre(DECL_ARGS) term_vspace(p); break; case (MDOC_HEAD): - p->bold++; + term_fontpush(p, TERMFONT_BOLD); break; case (MDOC_BODY): p->offset = INDENT; @@ -1430,7 +1428,8 @@ termp_ft_pre(DECL_ARGS) if (SEC_SYNOPSIS == n->sec) if (n->prev && MDOC_Fo == n->prev->tok) term_vspace(p); - p->under++; + + term_fontpush(p, TERMFONT_UNDER); return(1); } @@ -1451,17 +1450,18 @@ termp_fn_pre(DECL_ARGS) { const struct mdoc_node *nn; - p->bold++; + term_fontpush(p, TERMFONT_BOLD); term_word(p, n->child->string); - p->bold--; + term_fontpop(p); p->flags |= TERMP_NOSPACE; term_word(p, "("); for (nn = n->child->next; nn; nn = nn->next) { - p->under++; + term_fontpush(p, TERMFONT_UNDER); term_word(p, nn->string); - p->under--; + term_fontpop(p); + if (nn->next) term_word(p, ","); } @@ -1492,14 +1492,15 @@ termp_fa_pre(DECL_ARGS) const struct mdoc_node *nn; if (n->parent->tok != MDOC_Fo) { - p->under++; + term_fontpush(p, TERMFONT_UNDER); return(1); } for (nn = n->child; nn; nn = nn->next) { - p->under++; + term_fontpush(p, TERMFONT_UNDER); term_word(p, nn->string); - p->under--; + term_fontpop(p); + if (nn->next) term_word(p, ","); } @@ -1561,7 +1562,7 @@ termp_bd_pre(DECL_ARGS) for (nn = n->child; nn; nn = nn->next) { p->flags |= TERMP_NOSPACE; - print_node(p, pair, m, nn); + print_mdoc_node(p, pair, m, nn); if (NULL == nn->next) continue; if (nn->prev && nn->prev->line < nn->line) @@ -1715,7 +1716,7 @@ termp_ss_pre(DECL_ARGS) term_vspace(p); break; case (MDOC_HEAD): - p->bold++; + term_fontpush(p, TERMFONT_BOLD); p->offset = HALFINDENT; break; default: @@ -1741,7 +1742,7 @@ static int termp_cd_pre(DECL_ARGS) { - p->bold++; + term_fontpush(p, TERMFONT_BOLD); term_newln(p); return(1); } @@ -1752,7 +1753,7 @@ static int termp_in_pre(DECL_ARGS) { - p->bold++; + term_fontpush(p, TERMFONT_BOLD); if (SEC_SYNOPSIS == n->sec) term_word(p, "#include"); @@ -1767,10 +1768,10 @@ static void termp_in_post(DECL_ARGS) { - p->bold++; + term_fontpush(p, TERMFONT_BOLD); p->flags |= TERMP_NOSPACE; term_word(p, ">"); - p->bold--; + term_fontpop(p); if (SEC_SYNOPSIS != n->sec) return; @@ -1902,12 +1903,12 @@ termp_fo_pre(DECL_ARGS) } else if (MDOC_HEAD != n->type) return(1); - p->bold++; + term_fontpush(p, TERMFONT_BOLD); for (nn = n->child; nn; nn = nn->next) { assert(MDOC_TEXT == nn->type); term_word(p, nn->string); } - p->bold--; + term_fontpop(p); return(0); } @@ -1941,18 +1942,22 @@ termp_bf_pre(DECL_ARGS) if (NULL == (nn = n->head->child)) { if (arg_hasattr(MDOC_Emphasis, n)) - p->under++; + term_fontpush(p, TERMFONT_UNDER); else if (arg_hasattr(MDOC_Symbolic, n)) - p->bold++; + term_fontpush(p, TERMFONT_BOLD); + else + term_fontpush(p, TERMFONT_NONE); return(1); } assert(MDOC_TEXT == nn->type); if (0 == strcmp("Em", nn->string)) - p->under++; + term_fontpush(p, TERMFONT_UNDER); else if (0 == strcmp("Sy", nn->string)) - p->bold++; + term_fontpush(p, TERMFONT_BOLD); + else + term_fontpush(p, TERMFONT_NONE); return(1); } @@ -2006,28 +2011,38 @@ termp____post(DECL_ARGS) } +/* ARGSUSED */ +static int +termp_li_pre(DECL_ARGS) +{ + + term_fontpush(p, TERMFONT_NONE); + return(1); +} + + /* ARGSUSED */ static int termp_lk_pre(DECL_ARGS) { const struct mdoc_node *nn; - p->under++; + term_fontpush(p, TERMFONT_UNDER); nn = n->child; if (NULL == nn->next) return(1); term_word(p, nn->string); - p->under--; + term_fontpop(p); p->flags |= TERMP_NOSPACE; term_word(p, ":"); - p->bold++; + term_fontpush(p, TERMFONT_BOLD); for (nn = nn->next; nn; nn = nn->next) term_word(p, nn->string); - p->bold--; + term_fontpop(p); return(0); } @@ -2038,7 +2053,7 @@ static int termp_under_pre(DECL_ARGS) { - p->under++; + term_fontpush(p, TERMFONT_UNDER); return(1); } diff --git a/usr.bin/mandoc/out.c b/usr.bin/mandoc/out.c index cdd0bc2994..72c54fc24f 100644 --- a/usr.bin/mandoc/out.c +++ b/usr.bin/mandoc/out.c @@ -1,4 +1,4 @@ -/* $Id: out.c,v 1.7 2009/10/22 18:59:00 kristaps Exp $ */ +/* $Id: out.c,v 1.11 2009/11/12 08:21:05 kristaps Exp $ */ /* * Copyright (c) 2009 Kristaps Dzonsons * @@ -25,6 +25,26 @@ #include "out.h" +/* See a2roffdeco(). */ +#define C2LIM(c, l) do { \ + (l) = 1; \ + if ('[' == (c) || '\'' == (c)) \ + (l) = 0; \ + else if ('(' == (c)) \ + (l) = 2; } \ + while (/* CONSTCOND */ 0) + +/* See a2roffdeco(). */ +#define C2TERM(c, t) do { \ + (t) = 0; \ + if ('\'' == (c)) \ + (t) = 1; \ + else if ('[' == (c)) \ + (t) = 2; \ + else if ('(' == (c)) \ + (t) = 3; } \ + while (/* CONSTCOND */ 0) + #ifdef __linux__ extern size_t strlcat(char *, const char *, size_t); #endif @@ -165,3 +185,172 @@ time2a(time_t t, char *dst, size_t sz) (void)strftime(p, sz, "%Y", &tm); } + + +/* + * Returns length of parsed string (the leading "\" should NOT be + * included). This can be zero if the current character is the nil + * terminator. "d" is set to the type of parsed decorator, which may + * have an adjoining "word" of size "sz" (e.g., "(ab" -> "ab", 2). + */ +int +a2roffdeco(enum roffdeco *d, + const char **word, size_t *sz) +{ + int j, type, term, lim; + const char *wp, *sp; + + *d = DECO_NONE; + wp = *word; + type = 1; + + switch (*wp) { + case ('\0'): + return(0); + + case ('('): + if ('\0' == *(++wp)) + return(1); + if ('\0' == *(wp + 1)) + return(2); + + *d = DECO_SPECIAL; + *sz = 2; + *word = wp; + return(3); + + case ('*'): + switch (*(++wp)) { + case ('\0'): + return(1); + + case ('('): + if ('\0' == *(++wp)) + return(2); + if ('\0' == *(wp + 1)) + return(3); + + *d = DECO_RESERVED; + *sz = 2; + *word = wp; + return(4); + + case ('['): + type = 0; + break; + + default: + *d = DECO_RESERVED; + *sz = 1; + *word = wp; + return(2); + } + break; + + case ('s'): + sp = wp; + if ('\0' == *(++wp)) + return(1); + + C2LIM(*wp, lim); + C2TERM(*wp, term); + + if (term) + wp++; + + *word = wp; + + if (*wp == '+' || *wp == '-') + ++wp; + + switch (*wp) { + case ('\''): + /* FALLTHROUGH */ + case ('['): + /* FALLTHROUGH */ + case ('('): + if (term) + return((int)(wp - sp)); + + C2LIM(*wp, lim); + C2TERM(*wp, term); + wp++; + break; + default: + break; + } + + if ( ! isdigit((u_char)*wp)) + return((int)(wp - sp)); + + for (j = 0; isdigit((u_char)*wp); j++) { + if (lim && j >= lim) + break; + ++wp; + } + + if (term && term < 3) { + if (1 == term && *wp != '\'') + return((int)(wp - sp)); + if (2 == term && *wp != ']') + return((int)(wp - sp)); + ++wp; + } + + *d = DECO_SIZE; + return((int)(wp - sp)); + + case ('f'): + switch (*(++wp)) { + case ('\0'): + return(1); + case ('3'): + /* FALLTHROUGH */ + case ('B'): + *d = DECO_BOLD; + break; + case ('2'): + /* FALLTHROUGH */ + case ('I'): + *d = DECO_ITALIC; + break; + case ('P'): + *d = DECO_PREVIOUS; + break; + case ('1'): + /* FALLTHROUGH */ + case ('R'): + *d = DECO_ROMAN; + break; + default: + break; + } + + return(2); + + case ('['): + break; + + case ('c'): + *d = DECO_NOSPACE; + *sz = 1; + return(1); + + default: + *d = DECO_SPECIAL; + *word = wp; + *sz = 1; + return(1); + } + + *word = ++wp; + for (j = 0; *wp && ']' != *wp; wp++, j++) + /* Loop... */ ; + + if ('\0' == *wp) + return(j + 1); + + *d = type ? DECO_SPECIAL : DECO_RESERVED; + *sz = (size_t)j; + return (j + 2); +} diff --git a/usr.bin/mandoc/out.h b/usr.bin/mandoc/out.h index fa7fdda834..0110dbf7bd 100644 --- a/usr.bin/mandoc/out.h +++ b/usr.bin/mandoc/out.h @@ -1,4 +1,4 @@ -/* $Id: out.h,v 1.6 2009/10/22 18:55:33 kristaps Exp $ */ +/* $Id: out.h,v 1.9 2009/11/12 08:21:05 kristaps Exp $ */ /* * Copyright (c) 2009 Kristaps Dzonsons * @@ -35,6 +35,19 @@ enum roffscale { SCALE_MAX }; +enum roffdeco { + DECO_NONE, + DECO_SPECIAL, + DECO_RESERVED, + DECO_BOLD, + DECO_ITALIC, + DECO_ROMAN, + DECO_PREVIOUS, + DECO_SIZE, + DECO_NOSPACE, + DECO_MAX +}; + struct roffsu { enum roffscale unit; double scale; @@ -42,18 +55,24 @@ struct roffsu { }; #define SCALE_INVERT(p) \ - do { (p)->scale = -(p)->scale; } while (/*CONSTCOND*/0) + do { (p)->scale = -(p)->scale; } \ + while (/* CONSTCOND */ 0) + #define SCALE_VS_INIT(p, v) \ do { (p)->unit = SCALE_VS; \ (p)->scale = (v); \ - (p)->pt = 0; } while (/*CONSTCOND*/0) + (p)->pt = 0; } \ + while (/* CONSTCOND */ 0) + #define SCALE_HS_INIT(p, v) \ do { (p)->unit = SCALE_BU; \ (p)->scale = (v); \ - (p)->pt = 0; } while (/*CONSTCOND*/0) + (p)->pt = 0; } \ + while (/* CONSTCOND */ 0) int a2roffsu(const char *, struct roffsu *, enum roffscale); +int a2roffdeco(enum roffdeco *, const char **, size_t *); void time2a(time_t, char *, size_t); __END_DECLS diff --git a/usr.bin/mandoc/term.c b/usr.bin/mandoc/term.c index 7d8e01db41..8ce5ccc98a 100644 --- a/usr.bin/mandoc/term.c +++ b/usr.bin/mandoc/term.c @@ -1,4 +1,4 @@ -/* $Id: term.c,v 1.120 2009/10/31 06:10:58 kristaps Exp $ */ +/* $Id: term.c,v 1.127 2009/11/12 08:21:06 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -14,7 +14,10 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include + #include +#include #include #include #include @@ -27,19 +30,14 @@ #include "mdoc.h" #include "main.h" -/* FIXME: accomodate non-breaking, non-collapsing white-space. */ -/* FIXME: accomodate non-breaking, collapsing white-space. */ - static struct termp *term_alloc(enum termenc); static void term_free(struct termp *); - -static void do_escaped(struct termp *, const char **); -static void do_special(struct termp *, - const char *, size_t); -static void do_reserved(struct termp *, - const char *, size_t); -static void buffer(struct termp *, char); -static void encode(struct termp *, char); +static void spec(struct termp *, const char *, size_t); +static void res(struct termp *, const char *, size_t); +static void buffera(struct termp *, const char *, size_t); +static void bufferc(struct termp *, char); +static void adjbuf(struct termp *p, size_t); +static void encode(struct termp *, const char *, size_t); void * @@ -229,7 +227,12 @@ term_flushln(struct termp *p) for ( ; i < (int)p->col; i++) { if (' ' == p->buf[i]) break; - putchar(p->buf[i]); + + /* The unit sep. is a non-breaking space. */ + if (31 == p->buf[i]) + putchar(' '); + else + putchar(p->buf[i]); } vis += vsz; } @@ -315,152 +318,91 @@ term_vspace(struct termp *p) static void -do_special(struct termp *p, const char *word, size_t len) +spec(struct termp *p, const char *word, size_t len) { const char *rhs; size_t sz; - int i; rhs = chars_a2ascii(p->symtab, word, len, &sz); - - if (NULL == rhs) { -#if 0 - fputs("Unknown special character: ", stderr); - for (i = 0; i < (int)len; i++) - fputc(word[i], stderr); - fputc('\n', stderr); -#endif - return; - } - for (i = 0; i < (int)sz; i++) - encode(p, rhs[i]); + if (rhs) + encode(p, rhs, sz); } static void -do_reserved(struct termp *p, const char *word, size_t len) +res(struct termp *p, const char *word, size_t len) { const char *rhs; size_t sz; - int i; rhs = chars_a2res(p->symtab, word, len, &sz); + if (rhs) + encode(p, rhs, sz); +} - if (NULL == rhs) { -#if 0 - fputs("Unknown reserved word: ", stderr); - for (i = 0; i < (int)len; i++) - fputc(word[i], stderr); - fputc('\n', stderr); -#endif - return; - } - for (i = 0; i < (int)sz; i++) - encode(p, rhs[i]); + +void +term_fontlast(struct termp *p) +{ + enum termfont f; + + f = p->fontl; + p->fontl = p->fontq[p->fonti]; + p->fontq[p->fonti] = f; } -/* - * Handle an escape sequence: determine its length and pass it to the - * escape-symbol look table. Note that we assume mdoc(3) has validated - * the escape sequence (we assert upon badly-formed escape sequences). - */ -static void -do_escaped(struct termp *p, const char **word) +void +term_fontrepl(struct termp *p, enum termfont f) { - int j, type; - const char *wp; - wp = *word; - type = 1; + p->fontl = p->fontq[p->fonti]; + p->fontq[p->fonti] = f; +} - if (0 == *(++wp)) { - *word = wp; - return; - } - if ('(' == *wp) { - wp++; - if (0 == *wp || 0 == *(wp + 1)) { - *word = 0 == *wp ? wp : wp + 1; - return; - } +void +term_fontpush(struct termp *p, enum termfont f) +{ - do_special(p, wp, 2); - *word = ++wp; - return; + assert(p->fonti + 1 < 10); + p->fontl = p->fontq[p->fonti]; + p->fontq[++p->fonti] = f; +} - } else if ('*' == *wp) { - if (0 == *(++wp)) { - *word = wp; - return; - } - switch (*wp) { - case ('('): - wp++; - if (0 == *wp || 0 == *(wp + 1)) { - *word = 0 == *wp ? wp : wp + 1; - return; - } +const void * +term_fontq(struct termp *p) +{ - do_reserved(p, wp, 2); - *word = ++wp; - return; - case ('['): - type = 0; - break; - default: - do_reserved(p, wp, 1); - *word = wp; - return; - } + return(&p->fontq[p->fonti]); +} - } else if ('f' == *wp) { - if (0 == *(++wp)) { - *word = wp; - return; - } - switch (*wp) { - case ('B'): - p->bold++; - break; - case ('I'): - p->under++; - break; - case ('P'): - /* FALLTHROUGH */ - case ('R'): - p->bold = p->under = 0; - break; - default: - break; - } +enum termfont +term_fonttop(struct termp *p) +{ - *word = wp; - return; + return(p->fontq[p->fonti]); +} - } else if ('[' != *wp) { - do_special(p, wp, 1); - *word = wp; - return; - } - wp++; - for (j = 0; *wp && ']' != *wp; wp++, j++) - /* Loop... */ ; +void +term_fontpopq(struct termp *p, const void *key) +{ - if (0 == *wp) { - *word = wp; - return; - } + while (p->fonti >= 0 && key != &p->fontq[p->fonti]) + p->fonti--; + assert(p->fonti >= 0); +} - if (type) - do_special(p, wp - j, (size_t)j); - else - do_reserved(p, wp - j, (size_t)j); - *word = wp; + +void +term_fontpop(struct termp *p) +{ + + assert(p->fonti); + p->fonti--; } @@ -472,11 +414,14 @@ do_escaped(struct termp *p, const char **word) void term_word(struct termp *p, const char *word) { - const char *sv; + const char *sv, *seq; + int sz; + size_t ssz; + enum roffdeco deco; sv = word; - if (word[0] && 0 == word[1]) + if (word[0] && '\0' == word[1]) switch (word[0]) { case('.'): /* FALLTHROUGH */ @@ -503,16 +448,50 @@ term_word(struct termp *p, const char *word) } if ( ! (TERMP_NOSPACE & p->flags)) - buffer(p, ' '); + bufferc(p, ' '); if ( ! (p->flags & TERMP_NONOSPACE)) p->flags &= ~TERMP_NOSPACE; - for ( ; *word; word++) - if ('\\' != *word) - encode(p, *word); - else - do_escaped(p, &word); + /* FIXME: use strcspn. */ + + while (*word) { + if ('\\' != *word) { + encode(p, word, 1); + word++; + continue; + } + + seq = ++word; + sz = a2roffdeco(&deco, &seq, &ssz); + + switch (deco) { + case (DECO_RESERVED): + res(p, seq, ssz); + break; + case (DECO_SPECIAL): + spec(p, seq, ssz); + break; + case (DECO_BOLD): + term_fontrepl(p, TERMFONT_BOLD); + break; + case (DECO_ITALIC): + term_fontrepl(p, TERMFONT_UNDER); + break; + case (DECO_ROMAN): + term_fontrepl(p, TERMFONT_NONE); + break; + case (DECO_PREVIOUS): + term_fontlast(p); + break; + default: + break; + } + + word += sz; + if (DECO_NOSPACE == deco && '\0' == *word) + p->flags |= TERMP_NOSPACE; + } if (sv[0] && 0 == sv[1]) switch (sv[0]) { @@ -529,46 +508,77 @@ term_word(struct termp *p, const char *word) } -/* - * Insert a single character into the line-buffer. If the buffer's - * space is exceeded, then allocate more space by doubling the buffer - * size. - */ static void -buffer(struct termp *p, char c) +adjbuf(struct termp *p, size_t sz) { - size_t s; - - if (p->col + 1 >= p->maxcols) { - if (0 == p->maxcols) - p->maxcols = 256; - s = p->maxcols * 2; - p->buf = realloc(p->buf, s); - if (NULL == p->buf) { - perror(NULL); - exit(EXIT_FAILURE); - } - p->maxcols = s; + + if (0 == p->maxcols) + p->maxcols = 1024; + while (sz >= p->maxcols) + p->maxcols <<= 2; + + p->buf = realloc(p->buf, p->maxcols); + if (NULL == p->buf) { + perror(NULL); + exit(EXIT_FAILURE); } - p->buf[(int)(p->col)++] = c; } static void -encode(struct termp *p, char c) +buffera(struct termp *p, const char *word, size_t sz) { - if (' ' != c) { - if (p->under) { - buffer(p, '_'); - buffer(p, 8); - } - if (p->bold) { - buffer(p, c); - buffer(p, 8); + if (p->col + sz >= p->maxcols) + adjbuf(p, p->col + sz); + + memcpy(&p->buf[(int)p->col], word, sz); + p->col += sz; +} + + +static void +bufferc(struct termp *p, char c) +{ + + if (p->col + 1 >= p->maxcols) + adjbuf(p, p->col + 1); + + p->buf[(int)p->col++] = c; +} + + +static void +encode(struct termp *p, const char *word, size_t sz) +{ + enum termfont f; + int i; + + /* + * Encode and buffer a string of characters. If the current + * font mode is unset, buffer directly, else encode then buffer + * character by character. + */ + + if (TERMFONT_NONE == (f = term_fonttop(p))) { + buffera(p, word, sz); + return; + } + + for (i = 0; i < (int)sz; i++) { + if ( ! isgraph((u_char)word[i])) { + bufferc(p, word[i]); + continue; } + + if (TERMFONT_UNDER == f) + bufferc(p, '_'); + else + bufferc(p, word[i]); + + bufferc(p, 8); + bufferc(p, word[i]); } - buffer(p, c); } diff --git a/usr.bin/mandoc/term.h b/usr.bin/mandoc/term.h index f0d820166e..134c83de64 100644 --- a/usr.bin/mandoc/term.h +++ b/usr.bin/mandoc/term.h @@ -1,4 +1,4 @@ -/* $Id: term.h,v 1.49 2009/10/18 19:03:37 kristaps Exp $ */ +/* $Id: term.h,v 1.51 2009/11/12 05:50:13 kristaps Exp $ */ /* * Copyright (c) 2008, 2009 Kristaps Dzonsons * @@ -23,6 +23,12 @@ enum termenc { TERMENC_ASCII }; +enum termfont { + TERMFONT_NONE = 0, + TERMFONT_BOLD, + TERMFONT_UNDER +}; + struct termp { size_t rmargin; /* Current right margin. */ size_t maxrmargin; /* Max right margin. */ @@ -41,11 +47,12 @@ struct termp { #define TERMP_NOSPLIT (1 << 11) /* See termp_an_pre/post(). */ #define TERMP_SPLIT (1 << 12) /* See termp_an_pre/post(). */ #define TERMP_ANPREC (1 << 13) /* See termp_an_pre(). */ - int bold; - int under; char *buf; /* Output buffer. */ enum termenc enc; /* Type of encoding. */ void *symtab; /* Encoded-symbol table. */ + enum termfont fontl; /* Last font set. */ + enum termfont fontq[10]; /* Symmetric fonts. */ + int fonti; /* Index of font stack. */ }; void term_newln(struct termp *); @@ -56,6 +63,14 @@ void term_flushln(struct termp *); size_t term_hspan(const struct roffsu *); size_t term_vspan(const struct roffsu *); +enum termfont term_fonttop(struct termp *); +const void *term_fontq(struct termp *); +void term_fontpush(struct termp *, enum termfont); +void term_fontpop(struct termp *); +void term_fontpopq(struct termp *, const void *); +void term_fontrepl(struct termp *, enum termfont); +void term_fontlast(struct termp *); + __END_DECLS #endif /*!TERM_H*/ -- 2.41.0