From a4c7eb570f02f94484dd27694bc4c315771426d0 Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Thu, 2 Jun 2011 20:31:59 +0200 Subject: [PATCH] Import mdocml-1.11.3 --- contrib/mdocml/Makefile | 89 ++- contrib/mdocml/att.in | 30 +- contrib/mdocml/chars.c | 127 ++--- contrib/mdocml/chars.in | 75 +-- contrib/mdocml/example.style.css | 32 +- contrib/mdocml/html.c | 341 +++++------- contrib/mdocml/html.h | 18 +- contrib/mdocml/index.css | 61 +- contrib/mdocml/index.sgml | 744 ++++++++++++------------- contrib/mdocml/libmandoc.h | 4 +- contrib/mdocml/libmdoc.h | 30 +- contrib/mdocml/main.c | 23 +- contrib/mdocml/main.h | 4 +- contrib/mdocml/makewhatis.1 | 152 +++++ contrib/mdocml/makewhatis.c | 920 +++++++++++++++++++++++++++++++ contrib/mdocml/man.7 | 16 +- contrib/mdocml/man_html.c | 14 +- contrib/mdocml/man_macro.c | 2 +- contrib/mdocml/man_term.c | 11 +- contrib/mdocml/man_validate.c | 67 +-- contrib/mdocml/mandoc.1 | 44 +- contrib/mdocml/mandoc.3 | 191 ++++++- contrib/mdocml/mandoc.c | 485 +++++++++++----- contrib/mdocml/mandoc.h | 27 +- contrib/mdocml/mandoc_char.7 | 32 +- contrib/mdocml/mdoc.7 | 163 +++--- contrib/mdocml/mdoc_argv.c | 377 ++++++------- contrib/mdocml/mdoc_html.c | 69 ++- contrib/mdocml/mdoc_macro.c | 30 +- contrib/mdocml/mdoc_term.c | 11 +- contrib/mdocml/mdoc_validate.c | 38 +- contrib/mdocml/out.c | 239 +------- contrib/mdocml/out.h | 32 +- contrib/mdocml/preconv.1 | 161 ++++++ contrib/mdocml/preconv.c | 522 ++++++++++++++++++ contrib/mdocml/predefs.in | 65 +++ contrib/mdocml/read.c | 24 +- contrib/mdocml/roff.7 | 25 +- contrib/mdocml/roff.c | 192 +++---- contrib/mdocml/st.in | 50 +- contrib/mdocml/style.css | 75 ++- contrib/mdocml/tbl.c | 6 +- contrib/mdocml/tbl_layout.c | 33 +- contrib/mdocml/tbl_opts.c | 6 +- contrib/mdocml/term.c | 313 ++++++----- contrib/mdocml/term.h | 52 +- contrib/mdocml/term_ascii.c | 132 ++++- contrib/mdocml/term_ps.c | 380 +++++++------ contrib/mdocml/test-mmap.c | 10 + 49 files changed, 4298 insertions(+), 2246 deletions(-) create mode 100644 contrib/mdocml/makewhatis.1 create mode 100644 contrib/mdocml/makewhatis.c create mode 100644 contrib/mdocml/preconv.1 create mode 100644 contrib/mdocml/preconv.c create mode 100644 contrib/mdocml/predefs.in create mode 100644 contrib/mdocml/test-mmap.c diff --git a/contrib/mdocml/Makefile b/contrib/mdocml/Makefile index 3cc7daecb4..c535f105b9 100644 --- a/contrib/mdocml/Makefile +++ b/contrib/mdocml/Makefile @@ -11,9 +11,14 @@ # in the lower-left hand corner of -mdoc manuals. # CFLAGS += -DOSNAME="\"OpenBSD 4.5\"" -VERSION = 1.11.1 -VDATE = 04 April 2011 -CFLAGS += -g -DHAVE_CONFIG_H -DVERSION="\"$(VERSION)\"" +VERSION = 1.11.3 +VDATE = 26 May 2011 +# IFF your system supports multi-byte functions (setlocale(), wcwidth(), +# putwchar()) AND has __STDC_ISO_10646__ (that is, wchar_t is simply a +# UCS-4 value) should you define USE_WCHAR. If you define it and your +# system DOESN'T support this, -Tlocale will produce garbage. +# If you don't define it, -Tlocale is a synonym for -Tacsii. +CFLAGS += -g -DUSE_WCHAR -DHAVE_CONFIG_H -DVERSION="\"$(VERSION)\"" CFLAGS += -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings PREFIX = /usr/local BINDIR = $(PREFIX)/bin @@ -27,7 +32,7 @@ INSTALL_DATA = $(INSTALL) -m 0444 INSTALL_LIB = $(INSTALL) -m 0644 INSTALL_MAN = $(INSTALL_DATA) -all: mandoc +all: mandoc preconv SRCS = Makefile \ arch.c \ @@ -67,8 +72,8 @@ SRCS = Makefile \ mandoc.3 \ mandoc.c \ mandoc.h \ - mandoc-db.1 \ - mandoc-db.c \ + makewhatis.1 \ + makewhatis.c \ mandoc_char.7 \ mdoc.h \ mdoc.7 \ @@ -83,6 +88,9 @@ SRCS = Makefile \ msec.in \ out.c \ out.h \ + preconv.1 \ + preconv.c \ + predefs.in \ read.c \ roff.7 \ roff.c \ @@ -100,6 +108,7 @@ SRCS = Makefile \ term.h \ term_ascii.c \ term_ps.c \ + test-mmap.c \ test-strlcat.c \ test-strlcpy.c \ tree.c \ @@ -154,18 +163,22 @@ LIBROFF_LNS = eqn.ln \ LIBMANDOC_OBJS = $(LIBMAN_OBJS) \ $(LIBMDOC_OBJS) \ $(LIBROFF_OBJS) \ + chars.o \ mandoc.o \ read.o LIBMANDOC_LNS = $(LIBMAN_LNS) \ $(LIBMDOC_LNS) \ $(LIBROFF_LNS) \ + chars.ln \ mandoc.ln \ read.ln arch.o arch.ln: arch.in att.o att.ln: att.in +chars.o chars.ln: chars.in lib.o lib.ln: lib.in msec.o msec.ln: msec.in +roff.o roff.ln: predefs.in st.o st.ln: st.in vol.o vol.ln: vol.in @@ -198,31 +211,37 @@ MANDOC_TERM_LNS = man_term.ln \ MANDOC_OBJS = $(MANDOC_HTML_OBJS) \ $(MANDOC_TERM_OBJS) \ - chars.o \ main.o \ out.o \ tree.o MANDOC_LNS = $(MANDOC_HTML_LNS) \ $(MANDOC_TERM_LNS) \ - chars.ln \ main.ln \ out.ln \ tree.ln -chars.o chars.ln: chars.in - $(MANDOC_HTML_OBJS) $(MANDOC_HTML_LNS): html.h $(MANDOC_TERM_OBJS) $(MANDOC_TERM_LNS): term.h $(MANDOC_OBJS) $(MANDOC_LNS): main.h mandoc.h mdoc.h man.h config.h out.h compat.o compat.ln: config.h -MANDOCDB_OBJS = mandoc-db.o -MANDOCDB_LNS = mandoc-db.ln +MAKEWHATIS_OBJS = makewhatis.o +MAKEWHATIS_LNS = makewhatis.ln + +$(MAKEWHATIS_OBJS) $(MAKEWHATIS_LNS): mandoc.h mdoc.h man.h config.h -$(MANDOCDB_OBJS) $(MANDOCDB_LNS): mandoc.h mdoc.h man.h config.h +PRECONV_OBJS = preconv.o +PRECONV_LNS = preconv.ln -INDEX_MANS = mandoc.1.html \ +$(PRECONV_OBJS) $(PRECONV_LNS): config.h + +INDEX_MANS = makewhatis.1.html \ + makewhatis.1.xhtml \ + makewhatis.1.ps \ + makewhatis.1.pdf \ + makewhatis.1.txt \ + mandoc.1.html \ mandoc.1.xhtml \ mandoc.1.ps \ mandoc.1.pdf \ @@ -252,6 +271,11 @@ INDEX_MANS = mandoc.1.html \ mdoc.7.ps \ mdoc.7.pdf \ mdoc.7.txt \ + preconv.1.html \ + preconv.1.xhtml \ + preconv.1.ps \ + preconv.1.pdf \ + preconv.1.txt \ roff.7.html \ roff.7.xhtml \ roff.7.ps \ @@ -274,13 +298,15 @@ INDEX_OBJS = $(INDEX_MANS) \ www: index.html -lint: llib-llibmandoc.ln llib-lmandoc.ln +lint: llib-llibmandoc.ln llib-lmandoc.ln llib-lpreconv.ln clean: rm -f libmandoc.a $(LIBMANDOC_OBJS) rm -f llib-llibmandoc.ln $(LIBMANDOC_LNS) - rm -f mandoc-db $(MANDOCDB_OBJS) - rm -f llib-lmandoc-db.ln $(MANDOCDB_LNS) + rm -f makewhatis $(MAKEWHATIS_OBJS) + rm -f llib-lmakewhatis.ln $(MAKEWHATIS_LNS) + rm -f preconv $(PRECONV_OBJS) + rm -f llib-lpreconv.ln $(PRECONV_LNS) rm -f mandoc $(MANDOC_OBJS) rm -f llib-lmandoc.ln $(MANDOC_LNS) rm -f config.h config.log compat.o compat.ln @@ -290,12 +316,15 @@ clean: install: all mkdir -p $(DESTDIR)$(BINDIR) mkdir -p $(DESTDIR)$(EXAMPLEDIR) + mkdir -p $(DESTDIR)$(LIBDIR) + mkdir -p $(DESTDIR)$(INCLUDEDIR) mkdir -p $(DESTDIR)$(MANDIR)/man1 mkdir -p $(DESTDIR)$(MANDIR)/man3 mkdir -p $(DESTDIR)$(MANDIR)/man7 - $(INSTALL_PROGRAM) mandoc $(DESTDIR)$(BINDIR) + $(INSTALL_PROGRAM) mandoc preconv $(DESTDIR)$(BINDIR) $(INSTALL_LIB) libmandoc.a $(DESTDIR)$(LIBDIR) - $(INSTALL_MAN) mandoc.1 $(DESTDIR)$(MANDIR)/man1 + $(INSTALL_LIB) mandoc.h $(DESTDIR)$(INCLUDEDIR) + $(INSTALL_MAN) mandoc.1 preconv.1 $(DESTDIR)$(MANDIR)/man1 $(INSTALL_MAN) mandoc.3 $(DESTDIR)$(MANDIR)/man3 $(INSTALL_MAN) man.7 mdoc.7 roff.7 eqn.7 tbl.7 mandoc_char.7 $(DESTDIR)$(MANDIR)/man7 $(INSTALL_DATA) example.style.css $(DESTDIR)$(EXAMPLEDIR) @@ -319,15 +348,21 @@ llib-llibmandoc.ln: compat.ln $(LIBMANDOC_LNS) mandoc: $(MANDOC_OBJS) libmandoc.a $(CC) -o $@ $(MANDOC_OBJS) libmandoc.a -# You'll need -ldb for Linux. -mandoc-db: $(MANDOCDB_OBJS) libmandoc.a - $(CC) -o $@ $(MANDOCDB_OBJS) libmandoc.a - llib-lmandoc.ln: $(MANDOC_LNS) $(LINT) $(LINTFLAGS) -Cmandoc $(MANDOC_LNS) -llib-lmandoc-db.ln: $(MANDOCDB_LNS) - $(LINT) $(LINTFLAGS) -Cmandoc-db $(MANDOCDB_LNS) +# You'll need -ldb for Linux. +makewhatis: $(MAKEWHATIS_OBJS) libmandoc.a + $(CC) -o $@ $(MAKEWHATIS_OBJS) libmandoc.a + +llib-lmakewhatis.ln: $(MAKEWHATIS_LNS) + $(LINT) $(LINTFLAGS) -Cmakewhatis $(MAKEWHATIS_LNS) + +preconv: $(PRECONV_OBJS) + $(CC) -o $@ $(PRECONV_OBJS) + +llib-lpreconv.ln: $(PRECONV_LNS) + $(LINT) $(LINTFLAGS) -Cpreconv $(PRECONV_LNS) mdocml.md5: mdocml.tar.gz md5 mdocml.tar.gz >$@ @@ -348,6 +383,10 @@ config.h: config.h.pre config.h.post echo '#define HAVE_STRLCAT'; \ rm test-strlcat; \ fi; \ + if $(CC) $(CFLAGS) -Werror -o test-mmap test-mmap.c >> config.log 2>&1; then \ + echo '#define HAVE_MMAP'; \ + rm test-mmap; \ + fi; \ if $(CC) $(CFLAGS) -Werror -o test-strlcpy test-strlcpy.c >> config.log 2>&1; then \ echo '#define HAVE_STRLCPY'; \ rm test-strlcpy; \ diff --git a/contrib/mdocml/att.in b/contrib/mdocml/att.in index 48fcd30b99..95af2ef22f 100644 --- a/contrib/mdocml/att.in +++ b/contrib/mdocml/att.in @@ -1,4 +1,4 @@ -/* $Id: att.in,v 1.6 2010/06/19 20:46:27 kristaps Exp $ */ +/* $Id: att.in,v 1.7 2011/04/24 17:56:44 schwarze Exp $ */ /* * Copyright (c) 2009 Kristaps Dzonsons * @@ -20,18 +20,20 @@ * isn't going to change. The right-hand side is the formatted string. * * Be sure to escape strings. + * The non-breaking blanks prevent ending an output line right before + * a number. Groff prevent line breaks at the same places. */ -LINE("v1", "Version 1 AT&T UNIX") -LINE("v2", "Version 2 AT&T UNIX") -LINE("v3", "Version 3 AT&T UNIX") -LINE("v4", "Version 4 AT&T UNIX") -LINE("v5", "Version 5 AT&T UNIX") -LINE("v6", "Version 6 AT&T UNIX") -LINE("v7", "Version 7 AT&T UNIX") -LINE("32v", "Version 32V AT&T UNIX") -LINE("V", "AT&T System V UNIX") -LINE("V.1", "AT&T System V.1 UNIX") -LINE("V.2", "AT&T System V.2 UNIX") -LINE("V.3", "AT&T System V.3 UNIX") -LINE("V.4", "AT&T System V.4 UNIX") +LINE("v1", "Version\\~1 AT&T UNIX") +LINE("v2", "Version\\~2 AT&T UNIX") +LINE("v3", "Version\\~3 AT&T UNIX") +LINE("v4", "Version\\~4 AT&T UNIX") +LINE("v5", "Version\\~5 AT&T UNIX") +LINE("v6", "Version\\~6 AT&T UNIX") +LINE("v7", "Version\\~7 AT&T UNIX") +LINE("32v", "Version\\~32V AT&T UNIX") +LINE("V", "AT&T System\\~V UNIX") +LINE("V.1", "AT&T System\\~V Release\\~1 UNIX") +LINE("V.2", "AT&T System\\~V Release\\~2 UNIX") +LINE("V.3", "AT&T System\\~V Release\\~3 UNIX") +LINE("V.4", "AT&T System\\~V Release\\~4 UNIX") diff --git a/contrib/mdocml/chars.c b/contrib/mdocml/chars.c index 03e44910d8..5158612a32 100644 --- a/contrib/mdocml/chars.c +++ b/contrib/mdocml/chars.c @@ -1,4 +1,4 @@ -/* $Id: chars.c,v 1.34 2011/03/22 10:13:01 kristaps Exp $ */ +/* $Id: chars.c,v 1.46 2011/05/24 21:31:23 kristaps Exp $ */ /* * Copyright (c) 2009, 2010 Kristaps Dzonsons * Copyright (c) 2011 Ingo Schwarze @@ -20,12 +20,13 @@ #endif #include +#include #include #include #include #include "mandoc.h" -#include "out.h" +#include "libmandoc.h" #define PRINT_HI 126 #define PRINT_LO 32 @@ -35,52 +36,37 @@ struct ln { const char *code; const char *ascii; int unicode; - int type; -#define CHARS_CHAR (1 << 0) -#define CHARS_STRING (1 << 1) -#define CHARS_BOTH (CHARS_CHAR | CHARS_STRING) }; -#define LINES_MAX 351 +#define LINES_MAX 325 #define CHAR(in, ch, code) \ - { NULL, (in), (ch), (code), CHARS_CHAR }, -#define STRING(in, ch, code) \ - { NULL, (in), (ch), (code), CHARS_STRING }, -#define BOTH(in, ch, code) \ - { NULL, (in), (ch), (code), CHARS_BOTH }, + { NULL, (in), (ch), (code) }, #define CHAR_TBL_START static struct ln lines[LINES_MAX] = { #define CHAR_TBL_END }; #include "chars.in" -struct ctab { - enum chars type; +struct mchars { struct ln **htab; }; -static inline int match(const struct ln *, - const char *, size_t, int); -static const struct ln *find(struct ctab *, const char *, size_t, int); - +static inline int match(const struct ln *, const char *, size_t); +static const struct ln *find(struct mchars *, const char *, size_t); void -chars_free(void *arg) +mchars_free(struct mchars *arg) { - struct ctab *tab; - - tab = (struct ctab *)arg; - free(tab->htab); - free(tab); + free(arg->htab); + free(arg); } - -void * -chars_init(enum chars type) +struct mchars * +mchars_alloc(void) { - struct ctab *tab; + struct mchars *tab; struct ln **htab; struct ln *pp; int i, hash; @@ -92,7 +78,7 @@ chars_init(enum chars type) * (they're in-line re-ordered during lookup). */ - tab = mandoc_malloc(sizeof(struct ctab)); + tab = mandoc_malloc(sizeof(struct mchars)); htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **)); for (i = 0; i < LINES_MAX; i++) { @@ -109,7 +95,6 @@ chars_init(enum chars type) } tab->htab = htab; - tab->type = type; return(tab); } @@ -118,79 +103,57 @@ chars_init(enum chars type) * Special character to Unicode codepoint. */ int -chars_spec2cp(void *arg, const char *p, size_t sz) -{ - const struct ln *ln; - - ln = find((struct ctab *)arg, p, sz, CHARS_CHAR); - if (NULL == ln) - return(-1); - return(ln->unicode); -} - - -/* - * Reserved word to Unicode codepoint. - */ -int -chars_res2cp(void *arg, const char *p, size_t sz) +mchars_spec2cp(struct mchars *arg, const char *p, size_t sz) { const struct ln *ln; - ln = find((struct ctab *)arg, p, sz, CHARS_STRING); + ln = find(arg, p, sz); if (NULL == ln) return(-1); return(ln->unicode); } - /* - * Numbered character to literal character, - * represented as a null-terminated string for additional safety. + * Numbered character string to ASCII codepoint. + * This can only be a printable character (i.e., alnum, punct, space) so + * prevent the character from ruining our state (backspace, newline, and + * so on). + * If the character is illegal, returns '\0'. */ -const char * -chars_num2char(const char *p, size_t sz) +char +mchars_num2char(const char *p, size_t sz) { int i; - static char c[2]; - if (sz > 3) - return(NULL); - i = atoi(p); - if (i < 0 || i > 255) - return(NULL); - c[0] = (char)i; - c[1] = '\0'; - return(c); + if ((i = mandoc_strntou(p, sz, 10)) < 0) + return('\0'); + return(isprint(i) ? i : '\0'); } - -/* - * Special character to string array. +/* + * Hex character string to Unicode codepoint. + * If the character is illegal, returns '\0'. */ -const char * -chars_spec2str(void *arg, const char *p, size_t sz, size_t *rsz) +int +mchars_num2uc(const char *p, size_t sz) { - const struct ln *ln; - - ln = find((struct ctab *)arg, p, sz, CHARS_CHAR); - if (NULL == ln) - return(NULL); + int i; - *rsz = strlen(ln->ascii); - return(ln->ascii); + if ((i = mandoc_strntou(p, sz, 16)) < 0) + return('\0'); + /* FIXME: make sure we're not in a bogus range. */ + return(i > 0x80 && i <= 0x10FFFF ? i : '\0'); } - /* - * Reserved word to string array. + * Special character to string array. */ const char * -chars_res2str(void *arg, const char *p, size_t sz, size_t *rsz) +mchars_spec2str(struct mchars *arg, const char *p, size_t sz, size_t *rsz) { const struct ln *ln; - ln = find((struct ctab *)arg, p, sz, CHARS_STRING); + ln = find(arg, p, sz); if (NULL == ln) return(NULL); @@ -198,9 +161,8 @@ chars_res2str(void *arg, const char *p, size_t sz, size_t *rsz) return(ln->ascii); } - static const struct ln * -find(struct ctab *tab, const char *p, size_t sz, int type) +find(struct mchars *tab, const char *p, size_t sz) { struct ln *pp, *prev; struct ln **htab; @@ -226,7 +188,7 @@ find(struct ctab *tab, const char *p, size_t sz, int type) return(NULL); for (prev = NULL; pp; pp = pp->next) { - if ( ! match(pp, p, sz, type)) { + if ( ! match(pp, p, sz)) { prev = pp; continue; } @@ -243,13 +205,10 @@ find(struct ctab *tab, const char *p, size_t sz, int type) return(NULL); } - static inline int -match(const struct ln *ln, const char *p, size_t sz, int type) +match(const struct ln *ln, const char *p, size_t sz) { - if ( ! (ln->type & type)) - return(0); if (strncmp(ln->code, p, sz)) return(0); return('\0' == ln->code[(int)sz]); diff --git a/contrib/mdocml/chars.in b/contrib/mdocml/chars.in index f628960c2d..483a2bb828 100644 --- a/contrib/mdocml/chars.in +++ b/contrib/mdocml/chars.in @@ -1,4 +1,4 @@ -/* $Id: chars.in,v 1.36 2011/03/16 22:49:55 schwarze Exp $ */ +/* $Id: chars.in,v 1.39 2011/05/24 21:40:14 kristaps Exp $ */ /* * Copyright (c) 2009, 2010 Kristaps Dzonsons * @@ -16,15 +16,12 @@ */ /* - * The ASCII translation tables. STRING corresponds to predefined - * strings (cf. mdoc_samples.7 and tmac/mdoc/doc-nroff). CHAR - * corresponds to special characters (cf. groff_char.7). BOTH contains - * sequences that are equivalent in both STRING and CHAR. + * The ASCII translation tables. * - * Either way, the left-hand side corresponds to the input sequence (\x, - * \(xx, \*(xx and so on) whose length is listed second element. The - * right-hand side is what's produced by the front-end, with the fourth - * element being its length. + * The left-hand side corresponds to the input sequence (\x, \(xx, \*(xx + * and so on) whose length is listed second element. The right-hand + * side is what's produced by the front-end, with the fourth element + * being its length. * * XXX - C-escape strings! * XXX - update LINES_MAX if adding more! @@ -36,25 +33,25 @@ static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' }; CHAR_TBL_START /* Spacing. */ -CHAR("c", "", 0) +CHAR("c", "", 8203) CHAR("0", " ", 8194) CHAR(" ", ascii_nbrsp, 160) CHAR("~", ascii_nbrsp, 160) -CHAR("%", "", 0) -CHAR("&", "", 0) -CHAR("^", "", 0) -CHAR("|", "", 0) -CHAR("}", "", 0) +CHAR("%", "", 8203) +CHAR("&", "", 8203) +CHAR("^", "", 8203) +CHAR("|", "", 8203) +CHAR("}", "", 8203) /* Accents. */ CHAR("a\"", "\"", 779) CHAR("a-", "-", 175) CHAR("a.", ".", 729) CHAR("a^", "^", 770) -BOTH("\'", "\'", 769) -BOTH("aa", "\'", 769) -BOTH("ga", "`", 768) -BOTH("`", "`", 768) +CHAR("\'", "\'", 769) +CHAR("aa", "\'", 769) +CHAR("ga", "`", 768) +CHAR("`", "`", 768) CHAR("ab", "`", 774) CHAR("ac", ",", 807) CHAR("ad", "\"", 776) @@ -68,8 +65,8 @@ CHAR("ti", "~", 126) /* Quotes. */ CHAR("Bq", ",,", 8222) CHAR("bq", ",", 8218) -BOTH("lq", "``", 8220) -BOTH("rq", "\'\'", 8221) +CHAR("lq", "``", 8220) +CHAR("rq", "\'\'", 8221) CHAR("oq", "`", 8216) CHAR("cq", "\'", 8217) CHAR("aq", "\'", 39) @@ -232,8 +229,8 @@ CHAR("<-", "<-", 8592) CHAR("->", "->", 8594) CHAR("<>", "<>", 8596) CHAR("da", "v", 8595) -BOTH("ua", "^", 8593) -BOTH("va", "^v", 8597) +CHAR("ua", "^", 8593) +CHAR("va", "^v", 8597) CHAR("lA", "<=", 8656) CHAR("rA", "=>", 8658) CHAR("hA", "<=>", 8660) @@ -270,8 +267,8 @@ CHAR("di", "-:-", 247) CHAR("tdi", "-:-", 247) CHAR("f/", "/", 8260) CHAR("**", "*", 8727) -BOTH("<=", "<=", 8804) -BOTH(">=", ">=", 8805) +CHAR("<=", "<=", 8804) +CHAR(">=", ">=", 8805) CHAR("<<", "<<", 8810) CHAR(">>", ">>", 8811) CHAR("eq", "=", 61) @@ -348,34 +345,6 @@ CHAR("Po", "L", 163) CHAR("Cs", "x", 164) CHAR("Fn", "f", 402) -/* Old style. */ -STRING("Am", "&", 38) -STRING("Ba", "|", 124) -STRING("Ge", ">=", 8805) -STRING("Gt", ">", 62) -STRING("If", "infinity", 0) -STRING("Le", "<=", 8804) -STRING("Lq", "``", 8220) -STRING("Lt", "<", 60) -STRING("Na", "NaN", 0) -STRING("Ne", "!=", 8800) -STRING("Pi", "pi", 960) -STRING("Pm", "+-", 177) -STRING("Rq", "\'\'", 8221) -STRING("left-bracket", "[", 91) -STRING("left-parenthesis", "(", 40) -STRING("left-singlequote", "`", 8216) -STRING("lp", "(", 40) -STRING("q", "\"", 34) -STRING("quote-left", "`", 8216) -STRING("quote-right", "\'", 8217) -STRING("R", "(R)", 174) -STRING("right-bracket", "]", 93) -STRING("right-parenthesis", ")", 41) -STRING("right-singlequote", "\'", 8217) -STRING("rp", ")", 41) -STRING("Tm", "(Tm)", 8482) - /* Lines. */ CHAR("ba", "|", 124) CHAR("br", "|", 9474) diff --git a/contrib/mdocml/example.style.css b/contrib/mdocml/example.style.css index c7cc484f35..39075460a4 100644 --- a/contrib/mdocml/example.style.css +++ b/contrib/mdocml/example.style.css @@ -1,4 +1,4 @@ -/* $Id: example.style.css,v 1.42 2011/02/09 09:52:47 kristaps Exp $ */ +/* $Id: example.style.css,v 1.43 2011/04/11 22:58:28 kristaps Exp $ */ /* * This is an example style-sheet provided for mandoc(1) and the -Thtml @@ -11,32 +11,26 @@ html { min-width: 580px; width: 580px; } body { font-family: monospace; } +h1 { margin-bottom: 0ex; font-size: inherit; margin-left: -4ex; } /* Section header (Sh, SH). */ +h2 { margin-bottom: 0ex; font-size: inherit; margin-left: -2ex; } /* Sub-section header (Ss, SS). */ +table { width: 100%; margin-top: 0ex; margin-bottom: 0ex; } /* All tables. */ +td { vertical-align: top; } /* All table cells. */ +p { } /* Paragraph: Pp, Lp. */ +blockquote { margin-top: 0ex; margin-bottom: 0ex; } /* D1. */ +div.section { margin-bottom: 2ex; margin-left: 5ex; } /* Sections (Sh, SH). */ +div.subsection { } /* Sub-sections (Ss, SS). */ +table.synopsis { } /* SYNOPSIS section table. */ /* Preamble structure. */ -table.foot { width: 100%; } /* Document footer. */ +table.foot { } /* Document footer. */ td.foot-date { width: 50%; } /* Document footer: date. */ td.foot-os { width: 50%; text-align: right; } /* Document footer: OS/source. */ -table.head { width: 100%; } /* Document header. */ +table.head { } /* Document header. */ td.head-ltitle { width: 10%; } /* Document header: left-title. */ td.head-vol { width: 80%; text-align: center; } /* Document header: volume. */ td.head-rtitle { width: 10%; text-align: right; } /* Document header: right-title. */ -/* Sections. */ - -h1 { margin-bottom: 0px; font-size: medium; margin-left: -4ex; } /* Section header (Sh, SH). */ -h2 { margin-bottom: 0px; font-size: medium; margin-left: -2ex; } /* Sub-section header (Ss, SS). */ -div.section { margin-bottom: 2ex; margin-left: 4ex; } /* Sections (Sh, SH). */ -div.subsection { } /* Sub-sections (Ss, SS). */ -table.synopsis { } /* SYNOPSIS section table. */ - -/* Vertical spacing. */ - -p { } /* Paragraph: Pp, Lp. */ -blockquote { margin-top: 0px; margin-bottom: 0px; } -table { margin-top: 0px; margin-bottom: 0px; } -td { vertical-align: top; } - /* General font modes. */ i { } /* Italic: BI, IB, I, (implicit). */ @@ -116,7 +110,7 @@ dd.list-inset { } dl.list-ohang { } dt.list-ohang { } -dd.list-ohang { margin-left: 0em; } +dd.list-ohang { margin-left: 0ex; } dl.list-tag { } dt.list-tag { } diff --git a/contrib/mdocml/html.c b/contrib/mdocml/html.c index ab57c3743f..45471fe3b0 100644 --- a/contrib/mdocml/html.c +++ b/contrib/mdocml/html.c @@ -1,4 +1,4 @@ -/* $Id: html.c,v 1.131 2011/03/22 14:05:45 kristaps Exp $ */ +/* $Id: html.c,v 1.147 2011/05/24 21:40:14 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2011 Ingo Schwarze @@ -31,6 +31,7 @@ #include #include "mandoc.h" +#include "libmandoc.h" #include "out.h" #include "html.h" #include "main.h" @@ -93,19 +94,25 @@ static const char *const htmlattrs[ATTR_MAX] = { "colspan", /* ATTR_COLSPAN */ }; -static void print_num(struct html *, const char *, size_t); -static void print_spec(struct html *, enum roffdeco, - const char *, size_t); -static void print_res(struct html *, const char *, size_t); -static void print_ctag(struct html *, enum htmltag); -static void print_doctype(struct html *); -static void print_xmltype(struct html *); -static int print_encode(struct html *, const char *, int); -static void print_metaf(struct html *, enum roffdeco); -static void print_attr(struct html *, - const char *, const char *); -static void *ml_alloc(char *, enum htmltype); +static const char *const roffscales[SCALE_MAX] = { + "cm", /* SCALE_CM */ + "in", /* SCALE_IN */ + "pc", /* SCALE_PC */ + "pt", /* SCALE_PT */ + "em", /* SCALE_EM */ + "em", /* SCALE_MM */ + "ex", /* SCALE_EN */ + "ex", /* SCALE_BU */ + "em", /* SCALE_VS */ + "ex", /* SCALE_FS */ +}; +static void bufncat(struct html *, const char *, size_t); +static void print_ctag(struct html *, enum htmltag); +static int print_encode(struct html *, const char *, int); +static void print_metaf(struct html *, enum mandoc_esc); +static void print_attr(struct html *, const char *, const char *); +static void *ml_alloc(char *, enum htmltype); static void * ml_alloc(char *outopts, enum htmltype type) @@ -123,7 +130,7 @@ ml_alloc(char *outopts, enum htmltype type) h->type = type; h->tags.head = NULL; - h->symtab = chars_init(CHARS_HTML); + h->symtab = mchars_alloc(); while (outopts && *outopts) switch (getsubopt(&outopts, UNCONST(toks), &v)) { @@ -173,7 +180,7 @@ html_free(void *p) } if (h->symtab) - chars_free(h->symtab); + mchars_free(h->symtab); free(h); } @@ -209,72 +216,24 @@ print_gen_head(struct html *h) } } -/* ARGSUSED */ -static void -print_num(struct html *h, const char *p, size_t len) -{ - const char *rhs; - - rhs = chars_num2char(p, len); - if (rhs) - putchar((int)*rhs); -} - static void -print_spec(struct html *h, enum roffdeco d, const char *p, size_t len) -{ - int cp; - const char *rhs; - size_t sz; - - if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) { - printf("&#%d;", cp); - return; - } else if (-1 == cp && DECO_SSPECIAL == d) { - fwrite(p, 1, len, stdout); - return; - } else if (-1 == cp) - return; - - if (NULL != (rhs = chars_spec2str(h->symtab, p, len, &sz))) - fwrite(rhs, 1, sz, stdout); -} - - -static void -print_res(struct html *h, const char *p, size_t len) -{ - int cp; - const char *rhs; - size_t sz; - - if ((cp = chars_res2cp(h->symtab, p, len)) > 0) { - printf("&#%d;", cp); - return; - } else if (-1 == cp) - return; - - if (NULL != (rhs = chars_res2str(h->symtab, p, len, &sz))) - fwrite(rhs, 1, sz, stdout); -} - - -static void -print_metaf(struct html *h, enum roffdeco deco) +print_metaf(struct html *h, enum mandoc_esc deco) { enum htmlfont font; switch (deco) { - case (DECO_PREVIOUS): + case (ESCAPE_FONTPREV): font = h->metal; break; - case (DECO_ITALIC): + case (ESCAPE_FONTITALIC): font = HTMLFONT_ITALIC; break; - case (DECO_BOLD): + case (ESCAPE_FONTBOLD): font = HTMLFONT_BOLD; break; - case (DECO_ROMAN): + case (ESCAPE_FONT): + /* FALLTHROUGH */ + case (ESCAPE_FONTROMAN): font = HTMLFONT_NONE; break; default: @@ -296,80 +255,123 @@ print_metaf(struct html *h, enum roffdeco deco) print_otag(h, TAG_I, 0, NULL); } +int +html_strlen(const char *cp) +{ + int ssz, sz; + const char *seq, *p; + + /* + * Account for escaped sequences within string length + * calculations. This follows the logic in term_strlen() as we + * must calculate the width of produced strings. + * Assume that characters are always width of "1". This is + * hacky, but it gets the job done for approximation of widths. + */ + + sz = 0; + while (NULL != (p = strchr(cp, '\\'))) { + sz += (int)(p - cp); + ++cp; + switch (mandoc_escape(&cp, &seq, &ssz)) { + case (ESCAPE_ERROR): + return(sz); + case (ESCAPE_UNICODE): + /* FALLTHROUGH */ + case (ESCAPE_NUMBERED): + /* FALLTHROUGH */ + case (ESCAPE_SPECIAL): + sz++; + break; + default: + break; + } + } + + assert(sz >= 0); + return(sz + strlen(cp)); +} static int print_encode(struct html *h, const char *p, int norecurse) { size_t sz; - int len, nospace; + int c, len, nospace; const char *seq; - enum roffdeco deco; + enum mandoc_esc esc; static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' }; nospace = 0; - for (; *p; p++) { + while ('\0' != *p) { sz = strcspn(p, rejs); fwrite(p, 1, sz, stdout); - p += /* LINTED */ - sz; + p += (int)sz; + + if ('\0' == *p) + break; - if ('<' == *p) { + switch (*p++) { + case ('<'): printf("<"); continue; - } else if ('>' == *p) { + case ('>'): printf(">"); continue; - } else if ('&' == *p) { + case ('&'): printf("&"); continue; - } else if (ASCII_HYPH == *p) { - /* - * Note: "soft hyphens" aren't graphically - * displayed when not breaking the text; we want - * them to be displayed. - */ - /*printf("­");*/ + case (ASCII_HYPH): putchar('-'); continue; - } else if ('\0' == *p) + default: break; + } - seq = ++p; - len = a2roffdeco(&deco, &seq, &sz); + esc = mandoc_escape(&p, &seq, &len); + if (ESCAPE_ERROR == esc) + break; - switch (deco) { - case (DECO_NUMBERED): - print_num(h, seq, sz); + switch (esc) { + case (ESCAPE_UNICODE): + /* Skip passed "u" header. */ + c = mchars_num2uc(seq + 1, len - 1); + if ('\0' != c) + printf("&#x%x;", c); break; - case (DECO_RESERVED): - print_res(h, seq, sz); + case (ESCAPE_NUMBERED): + c = mchars_num2char(seq, len); + if ('\0' != c) + putchar(c); break; - case (DECO_SSPECIAL): - /* FALLTHROUGH */ - case (DECO_SPECIAL): - print_spec(h, deco, seq, sz); + case (ESCAPE_SPECIAL): + c = mchars_spec2cp(h->symtab, seq, len); + if (c > 0) + printf("&#%d;", c); + else if (-1 == c && 1 == len) + putchar((int)*seq); break; - case (DECO_PREVIOUS): + case (ESCAPE_FONT): + /* FALLTHROUGH */ + case (ESCAPE_FONTPREV): /* FALLTHROUGH */ - case (DECO_BOLD): + case (ESCAPE_FONTBOLD): /* FALLTHROUGH */ - case (DECO_ITALIC): + case (ESCAPE_FONTITALIC): /* FALLTHROUGH */ - case (DECO_ROMAN): + case (ESCAPE_FONTROMAN): if (norecurse) break; - print_metaf(h, deco); + print_metaf(h, esc); + break; + case (ESCAPE_NOSPACE): + if ('\0' == *p) + nospace = 1; break; default: break; } - - p += len - 1; - - if (DECO_NOSPACE == deco && '\0' == *(p + 1)) - nospace = 1; } return(nospace); @@ -432,7 +434,7 @@ print_otag(struct html *h, enum htmltag tag, print_attr(h, "lang", "en"); } - /* Accomodate for XML "well-formed" singleton escaping. */ + /* Accommodate for XML "well-formed" singleton escaping. */ if (HTML_AUTOCLOSE & htmltags[tag].flags) switch (h->type) { @@ -465,27 +467,8 @@ print_ctag(struct html *h, enum htmltag tag) } } - void print_gen_decls(struct html *h) -{ - - print_xmltype(h); - print_doctype(h); -} - - -static void -print_xmltype(struct html *h) -{ - - if (HTML_XHTML_1_0_STRICT == h->type) - puts(""); -} - - -static void -print_doctype(struct html *h) { const char *doctype; const char *dtd; @@ -498,6 +481,7 @@ print_doctype(struct html *h) dtd = "http://www.w3.org/TR/html4/strict.dtd"; break; default: + puts(""); name = "html"; doctype = "-//W3C//DTD XHTML 1.0 Strict//EN"; dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; @@ -587,7 +571,6 @@ print_stagq(struct html *h, const struct tag *suntil) } } - void bufinit(struct html *h) { @@ -596,28 +579,27 @@ bufinit(struct html *h) h->buflen = 0; } - void bufcat_style(struct html *h, const char *key, const char *val) { bufcat(h, key); - bufncat(h, ":", 1); + bufcat(h, ":"); bufcat(h, val); - bufncat(h, ";", 1); + bufcat(h, ";"); } - void bufcat(struct html *h, const char *p) { - bufncat(h, p, strlen(p)); + h->buflen = strlcat(h->buf, p, BUFSIZ); + assert(h->buflen < BUFSIZ); + h->buflen--; } - void -buffmt(struct html *h, const char *fmt, ...) +bufcat_fmt(struct html *h, const char *fmt, ...) { va_list ap; @@ -628,19 +610,15 @@ buffmt(struct html *h, const char *fmt, ...) h->buflen = strlen(h->buf); } - -void +static void bufncat(struct html *h, const char *p, size_t sz) { - if (h->buflen + sz > BUFSIZ - 1) - sz = BUFSIZ - 1 - h->buflen; - - (void)strncat(h->buf, p, sz); + assert(h->buflen + sz + 1 < BUFSIZ); + strncat(h->buf, p, sz); h->buflen += sz; } - void buffmt_includes(struct html *h, const char *name) { @@ -648,6 +626,7 @@ buffmt_includes(struct html *h, const char *name) pp = h->base_includes; + bufinit(h); while (NULL != (p = strchr(pp, '%'))) { bufncat(h, pp, (size_t)(p - pp)); switch (*(p + 1)) { @@ -664,7 +643,6 @@ buffmt_includes(struct html *h, const char *name) bufcat(h, pp); } - void buffmt_man(struct html *h, const char *name, const char *sec) @@ -673,7 +651,7 @@ buffmt_man(struct html *h, pp = h->base_man; - /* LINTED */ + bufinit(h); while (NULL != (p = strchr(pp, '%'))) { bufncat(h, pp, (size_t)(p - pp)); switch (*(p + 1)) { @@ -681,7 +659,7 @@ buffmt_man(struct html *h, bufcat(h, sec ? sec : "1"); break; case('N'): - buffmt(h, name); + bufcat_fmt(h, name); break; default: bufncat(h, p, 2); @@ -693,85 +671,24 @@ buffmt_man(struct html *h, bufcat(h, pp); } - void bufcat_su(struct html *h, const char *p, const struct roffsu *su) { double v; - const char *u; v = su->scale; + if (SCALE_MM == su->unit && 0.0 == (v /= 100.0)) + v = 1.0; - switch (su->unit) { - case (SCALE_CM): - u = "cm"; - break; - case (SCALE_IN): - u = "in"; - break; - case (SCALE_PC): - u = "pc"; - break; - case (SCALE_PT): - u = "pt"; - break; - case (SCALE_EM): - u = "em"; - break; - case (SCALE_MM): - if (0 == (v /= 100)) - v = 1; - u = "em"; - break; - case (SCALE_EN): - u = "ex"; - break; - case (SCALE_BU): - u = "ex"; - break; - case (SCALE_VS): - u = "em"; - break; - default: - u = "ex"; - break; - } - - /* - * XXX: the CSS spec isn't clear as to which types accept - * integer or real numbers, so we just make them all decimals. - */ - buffmt(h, "%s: %.2f%s;", p, v, u); + bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]); } - void -html_idcat(char *dst, const char *src, int sz) +bufcat_id(struct html *h, const char *src) { - int ssz; - - assert(sz > 2); /* Cf. . */ - /* We can't start with a number (bah). */ - - if ('#' == *dst) { - dst++; - sz--; - } - if ('\0' == *dst) { - *dst++ = 'x'; - *dst = '\0'; - sz--; - } - - for ( ; *dst != '\0' && sz; dst++, sz--) - /* Jump to end. */ ; - - for ( ; *src != '\0' && sz > 1; src++) { - ssz = snprintf(dst, (size_t)sz, "%.2x", *src); - sz -= ssz; - dst += ssz; - } + while ('\0' != *src) + bufcat_fmt(h, "%.2x", *src++); } diff --git a/contrib/mdocml/html.h b/contrib/mdocml/html.h index 561d06e2de..aba635f144 100644 --- a/contrib/mdocml/html.h +++ b/contrib/mdocml/html.h @@ -1,4 +1,4 @@ -/* $Id: html.h,v 1.40 2011/01/29 14:49:44 kristaps Exp $ */ +/* $Id: html.h,v 1.44 2011/05/17 11:34:31 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons * @@ -120,7 +120,7 @@ struct html { struct tagq tags; /* stack of open tags */ struct rofftbl tbl; /* current table */ struct tag *tblt; /* current open table scope */ - void *symtab; /* character-escapes */ + struct mchars *symtab; /* character-escapes */ char *base_man; /* base for manpage href */ char *base_includes; /* base for include href */ char *style; /* style-sheet URI */ @@ -142,19 +142,19 @@ void print_text(struct html *, const char *); void print_tblclose(struct html *); void print_tbl(struct html *, const struct tbl_span *); +void bufcat_fmt(struct html *, const char *, ...); +void bufcat(struct html *, const char *); +void bufcat_id(struct html *, const char *); +void bufcat_style(struct html *, + const char *, const char *); void bufcat_su(struct html *, const char *, const struct roffsu *); +void bufinit(struct html *); void buffmt_man(struct html *, const char *, const char *); void buffmt_includes(struct html *, const char *); -void buffmt(struct html *, const char *, ...); -void bufcat(struct html *, const char *); -void bufcat_style(struct html *, - const char *, const char *); -void bufncat(struct html *, const char *, size_t); -void bufinit(struct html *); -void html_idcat(char *, const char *, int); +int html_strlen(const char *); __END_DECLS diff --git a/contrib/mdocml/index.css b/contrib/mdocml/index.css index d8d0b2d80f..ce0898d0d3 100644 --- a/contrib/mdocml/index.css +++ b/contrib/mdocml/index.css @@ -1,48 +1,43 @@ -body { color: #333333; - font-size: 0.93em; - font-family: Times, sans-serif; } +html { min-width: 40em; + margin-top: 2em; + margin-left: auto; + margin-right: auto; + width: 80%; } -table.frame { max-width: 800px; - padding-right: 2em; - padding-left: 1em; } +body { text-align: justify; + font-family: Helvetica,Arial,sans-serif; + line-height: 120%; + font-size: small; } -table { padding-left: 40px; } +p,ul,table { margin-left: 3em; } -p { padding-left: 40px; - text-align: justify; } +p.head, p.foot { margin-left: 0.0em; margin-right: 0.0em; } -h1 { font-weight: bold; - font-size: small; - font-family: Verdana, Tahoma, Arial, sans-serif; } +p.news { margin-left: 2.0em; } -h2 { font-weight: bold; - font-size: small; - padding-left: 20px; - margin-bottom: 0px; - font-family: Verdana, Tahoma, Arial, sans-serif; } +li { margin: 0.25em; } -span.nm { font-weight: bold; } +h1 { font-size: 110%; } +h2 { font-size: 105%; margin-left: 1.5em } + +p.head { margin-bottom: 1.75em; + border-bottom: 1px solid #dddddd; + padding-bottom: 0.2em; } + +p.foot { border-top: 1px solid #dddddd; + color: #666666; + padding-top: 0.2em; + margin-top: 1.75em; } + +span.nm { color: green; } span.file { font-style: italic; } -span.attn { color: #000000; font-weight: bold; } +span.attn { font-weight: bold; } span.flag { font-weight: bold; } -div.head { border-bottom: 1px solid #dddddd; - padding-bottom: 5px; - text-align: right; } - -div.foot { border-top: 1px solid #dddddd; - padding-top: 5px; - font-size: smaller; - text-align: right; } +a { text-decoration: none; } a.external { background: transparent url(external.png) center right no-repeat; padding-right: 12px; } - -span.date { color: #000000; } - -div.news { margin-bottom: 2em; } - -div.news ul { margin-left: 4em; } diff --git a/contrib/mdocml/index.sgml b/contrib/mdocml/index.sgml index d4fd1edc62..fa7d8b431d 100644 --- a/contrib/mdocml/index.sgml +++ b/contrib/mdocml/index.sgml @@ -2,408 +2,386 @@ - mdocml | mdoc macro compiler - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- mdocml – mdoc macro compiler -
-
-

- DESCRIPTION -

+

+ mdocml – mdoc macro compiler +

+

+ Description +

+

+ mdocml is a suite of tools compiling mdoc, the roff macro + package of choice for BSD manual pages, and man, the predominant historical package for + UNIX manuals. The mission of mdocml is to deprecate groff, the GNU troff implementation, for displaying mdoc + pages whilst providing token support for man. +

+

+ Why? groff amounts to over 5 MB of source code, most of which is C++ and all of which is GPL. It runs slowly, produces + uncertain output, and varies in operation from system to system. mdocml strives to fix this (respectively small, C, ISC-licensed, fast and regular). +

+

+ mdocml consists of the libmandoc validating compiler and mandoc, which interfaces with the compiler library to format output for UNIX terminals (with + support for wide-character locales), XHTML, HTML, PostScript, and PDF. + It also includes preconv, for recoding multibyte manuals; and makewhatis, for indexing manuals. + It is a BSD.lv project. +

+

+ Disambiguation: mdocml is often referred to by its installed binary, mandoc. +

+

+ Sources +

+

+ mdocml is in plain-old ANSI C and should build and run on any UNIX system, although makewhatis requires Berkeley Database (this is + installed by default on all BSD operating systems). + To compile mdocml, run make, then make install to install into + /usr/local. + Be aware: if you have an existing groff installation, + this may overwrite its preconv binary. + The makewhatis utility is not yet linked to the build. You must run make + makewhatis to build it (it does not install). +

+

+ The most current version of mdocml is @VERSION@, dated @VDATE@. +

-

- mdocml is a suite of tools compiling -mdoc, the - roff macro package of choice for BSD manual pages, and -man, the - predominant historical package for UNIX manuals. The mission of mdocml is to - deprecate groff, the GNU troff - implementation, for displaying -mdoc pages whilst providing token support for -man. -

+

+ Current +

-

- Why? groff amounts to over 5 MB of source code, most of which is C++ and all of which is GPL. It runs - slowly, produces uncertain output, and varies in operation from system to system. mdocml strives to fix - this (respectively small, C, ISC-licensed, fast and regular). -

+ + + + + + + + + + + + + +
Source archive + /snapshots/mdocml.tar.gz + (md5) +
Online source + cvsweb +
-

- mdocml consists of the libmandoc validating - compiler and mandoc, which interfaces with the compiler library to format - output for UNIX terminals, XHTML, HTML, PostScript, and PDF. It is a BSD.lv project. -

+

+ Downstream +

-

- Disambiguation: mdocml is often referred to by its installed binary, - mandoc. -

-
-

- SOURCES -

+ + + + + + + + + + + + + + + + + + + + + +
DragonFly BSD + usr.bin/mandoc +
FreeBSD + ports/textproc/mdocml +
NetBSD + src/external/bsd/mdocml +
OpenBSD + src/usr.bin/mandoc +
-

- mdocml is in plain-old ANSI C and should build and run on any UNIX system. - The most current version is @VERSION@, dated @VDATE@. -

+

+ Historical +

-

- Current -

+ + + + + + + + + +
Source archive + /snapshots/ +
- - - - - - - - - - - - - -
Source archive - /snapshots/mdocml.tar.gz - (md5) -
Online source - cvsweb -
+

+ Documentation +

-

- Downstream -

+

+ These manuals are generated automatically and refer to the current snapshot. +

- - - - - - - - - - - - - - - - - - - - - -
DragonFly BSD - usr.bin/mandoc -
FreeBSD - ports/textproc/mdocml -
NetBSD - src/external/bsd/mdocml -
OpenBSD - src/usr.bin/mandoc -
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
mandoc(1) + format and display UNIX manuals + + (text | + xhtml | + pdf | + postscript) + +
makewhatis(1) + index UNIX manuals + + (text | + xhtml | + pdf | + postscript) + +
preconv(1) + recode multibyte UNIX manuals + + (text | + xhtml | + pdf | + postscript) + +
mandoc(3) + mandoc macro compiler library + + (text | + xhtml | + pdf | + postscript) + +
man(7) + man language reference + + (text | + xhtml | + pdf | + postscript) + +
eqn(7) + eqn-mandoc language reference + + (text | + xhtml | + pdf | + postscript) + +
mandoc_char(7) + mandoc special characters + + (text | + xhtml | + pdf | + postscript) + +
mdoc(7) + mdoc language reference + + (text | + xhtml | + pdf | + postscript) + +
roff(7) + roff-mandoc language reference + + (text | + xhtml | + pdf | + postscript) + +
tbl(7) + tbl-mandoc language reference + + (text | + xhtml | + pdf | + postscript) + +
-

- Historical -

+

+ Contact +

- - - - - - - - - -
Source archive - /snapshots/ -
-
-

- DOCUMENTATION -

+

+ Use the mailing lists for bug-reports, patches, questions, etc. (these require subscription). Please check the + TODO for known issues + before posting. Beyond that, contact Kristaps at kris...@bsd.lv. +

-

- These manuals are generated automatically and refer to the current snapshot. -

+ + + + + + + + + + + + + + + + + +
+ disc...@mdocml.bsd.lv + + bug-reports, general questions, and announcements + (archive) +
+ tec...@mdocml.bsd.lv + + patches and system discussions + (archive) +
+ sou...@mdocml.bsd.lv + + source commit messages + (archive) +
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
mandoc(1) - format and display UNIX manuals - - (text | - xhtml | - pdf | - postscript) - -
mandoc(3) - mandoc macro compiler library - - (text | - xhtml | - pdf | - postscript) - -
man(7) - man language reference - - (text | - xhtml | - pdf | - postscript) - -
eqn(7) - eqn-mandoc language reference - - (text | - xhtml | - pdf | - postscript) - -
mandoc_char(7) - mandoc special characters - - (text | - xhtml | - pdf | - postscript) - -
mdoc(7) - mdoc language reference - - (text | - xhtml | - pdf | - postscript) - -
roff(7) - roff-mandoc language reference - - (text | - xhtml | - pdf | - postscript) - -
tbl(7) - tbl-mandoc language reference - - (text | - xhtml | - pdf | - postscript) - -
+

+ News +

-

- See Writing UNIX Manual Pages for a general - introduction to manpages and mdoc. -

-
-

- CONTACT -

+

+ 26-05-2011: version 1.11.3 +

+

+ Introduce locale-encoding of output with the -Tlocale output option and Unicode escaped-character input. + See mandoc and mandoc_char, respectively, for details. + This allows for non-ASCII characters (e.g., \[u5000]) to be rendered in the locale's encoding, if said + environment supports wide-character encoding (if it does not, -Tascii is used instead). + Locale support can be turned off at compile time by removing -DUSE_WCHAR in the Makefile, in which case + -Tlocale is always a synonym for -Tascii. +

+

+ Furthermore, multibyte-encoded documents, such as those in UTF-8, may be on-the-fly recoded into mandoc input by using the newly-added preconv utility. + Note: in the future, this feature may be integrated into mandoc. +

-

- Use the mailing lists for bug-reports, patches, questions, etc. (these require - subscription). - Please check the - TODO for known issues - before posting. - Beyond that, contact Kristaps at kris...@bsd.lv. -

+

+ 12-05-2011: version 1.11.2 +

+

+ Corrected some installation issues in version 1.11.1. + Further migration to libmandoc. + Initial public release (this utility is very much under development) of makewhatis, + initially named mandoc-db. + This utility produces keyword databases of manual content + mandoc-cgi, which features semantic querying of manual content. +

- - - - - - - - - - - - - - - - - -
- disc...@mdocml.bsd.lv - - bug-reports, general questions, and announcements - (archive) -
- tec...@mdocml.bsd.lv - - patches and system discussions - (archive) -
- sou...@mdocml.bsd.lv - - source commit messages - (archive) -
-
-

- NEWS -

-
-

- 04-04-2011: - version 1.11.1 -

-

- The earlier libroff, libmdoc, and libman soup have been merged into - a single library, libmandoc, which manages all aspects of - parsing real manuals (from line-handling to tbl parsing). -

-

- Beyond this structural change, initial eqn functionality is in - place. For the time being, this is limited to the recognition of equation blocks; - future version of mdocml will expand upon this framework. -

-

- As usual, many general fixes and improvements have also occured. In particular, a great - deal of redundancy and superfluous code has been removed with the merging of the backend - libraries. -

-
-
-

- 07-01-2011: - version 1.10.9 -

-

- Many back-end fixes have been implemented: argument handling (quoting), man improvements, error/warning classes, and many more. -

-

- Initial tbl functionality (see the TS, TE, and - T& macros in the roff manual) has been - merged from tbl.bsd.lv. Output is - still minimal, especially for -Thtml and -Txhtml, but manages to at least display data. This means that mandoc now has built-in support for two troff preprocessors via - libroff: soelim and tbl. -

-
-
-

- 24-12-2010: - version 1.10.8 -

-

- Significant improvements merged from OpenBSD downstream, including -

-
    -
  • many new roff components,
  • -
  • in-line implementation of troff's soelim,
  • -
  • broken-block handling,
  • -
  • overhauled error classifications, and
  • -
  • cleaned up handling of error conditions.
  • -
-

- Also overhauled the -Thtml and -Txhtml output modes. They now display readable output in arbitrary - browsers, including text-based ones like lynx. See HTML and XHTML manuals in the DOCUMENTATION section for examples. Attention: available style-sheet classes have been considerably - changed! See the example.style.css file for details. - Lastly, libmdoc and libman have been - cleaned up and reduced in size and complexity. -

-
-

- See cvsweb for - historical notes. -

-
-
- Copyright © 2008–2011 Kristaps Dzonsons, $Date: 2011/04/04 21:07:20 $ -
-
+

+ 04-04-2011: version 1.11.1 +

+

+ The earlier libroff, libmdoc, and libman soup have been merged into + a single library, libmandoc, which manages all aspects of + parsing real manuals (from line-handling to tbl parsing). +

+

+ Beyond this structural change, initial eqn functionality is in + place. For the time being, this is limited to the recognition of equation blocks; + future version of mdocml will expand upon this framework. +

+

+ As usual, many general fixes and improvements have also occurred. In particular, a great + deal of redundancy and superfluous code has been removed with the merging of the backend + libraries. +

+ +

+ See cvsweb for + historical notes. +

+ +

+ + Copyright © 2008–2011 + Kristaps Dzonsons, + $Date: 2011/05/26 21:23:50 $ + +

diff --git a/contrib/mdocml/libmandoc.h b/contrib/mdocml/libmandoc.h index 3157f290f2..5f8379aff2 100644 --- a/contrib/mdocml/libmandoc.h +++ b/contrib/mdocml/libmandoc.h @@ -1,4 +1,4 @@ -/* $Id: libmandoc.h,v 1.17 2011/03/28 23:52:13 kristaps Exp $ */ +/* $Id: libmandoc.h,v 1.21 2011/05/14 16:06:09 kristaps Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons * @@ -73,13 +73,13 @@ void mandoc_msg(enum mandocerr, struct mparse *, int, int, const char *); void mandoc_vmsg(enum mandocerr, struct mparse *, int, int, const char *, ...); -int mandoc_special(char *); char *mandoc_strdup(const char *); char *mandoc_getarg(struct mparse *, char **, int, int *); char *mandoc_normdate(struct mparse *, char *, int, int); int mandoc_eos(const char *, size_t, int); int mandoc_hyph(const char *, const char *); int mandoc_getcontrol(const char *, int *); +int mandoc_strntou(const char *, size_t, int); void mdoc_free(struct mdoc *); struct mdoc *mdoc_alloc(struct regset *, struct mparse *); diff --git a/contrib/mdocml/libmdoc.h b/contrib/mdocml/libmdoc.h index 2a55eb37be..0e4b125f42 100644 --- a/contrib/mdocml/libmdoc.h +++ b/contrib/mdocml/libmdoc.h @@ -1,4 +1,4 @@ -/* $Id: libmdoc.h,v 1.72 2011/03/22 14:33:05 kristaps Exp $ */ +/* $Id: libmdoc.h,v 1.74 2011/04/19 16:38:48 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons * @@ -62,20 +62,20 @@ struct mdoc_macro { enum margserr { ARGS_ERROR, - ARGS_EOLN, - ARGS_WORD, - ARGS_PUNCT, - ARGS_QWORD, - ARGS_PHRASE, - ARGS_PPHRASE, - ARGS_PEND + ARGS_EOLN, /* end-of-line */ + ARGS_WORD, /* normal word */ + ARGS_PUNCT, /* series of punctuation */ + ARGS_QWORD, /* quoted word */ + ARGS_PHRASE, /* Ta'd phrase (-column) */ + ARGS_PPHRASE, /* tabbed phrase (-column) */ + ARGS_PEND /* last phrase (-column) */ }; enum margverr { ARGV_ERROR, - ARGV_EOLN, - ARGV_ARG, - ARGV_WORD + ARGV_EOLN, /* end of line */ + ARGV_ARG, /* valid argument */ + ARGV_WORD /* normal word (or bad argument---same thing) */ }; /* @@ -133,14 +133,8 @@ void mdoc_argv_free(struct mdoc_arg *); enum margserr mdoc_args(struct mdoc *, int, int *, char *, enum mdoct, char **); enum margserr mdoc_zargs(struct mdoc *, int, - int *, char *, int, char **); -#define ARGS_DELIM (1 << 1) -#define ARGS_TABSEP (1 << 2) -#define ARGS_NOWARN (1 << 3) - + int *, char *, char **); int mdoc_macroend(struct mdoc *); - -#define DELIMSZ 6 /* hint: max possible size of a delimiter */ enum mdelim mdoc_isdelim(const char *); __END_DECLS diff --git a/contrib/mdocml/main.c b/contrib/mdocml/main.c index bbbb88466a..4c2e7e790c 100644 --- a/contrib/mdocml/main.c +++ b/contrib/mdocml/main.c @@ -1,4 +1,4 @@ -/* $Id: main.c,v 1.161 2011/03/31 10:53:43 kristaps Exp $ */ +/* $Id: main.c,v 1.163 2011/05/20 15:51:18 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010, 2011 Ingo Schwarze @@ -43,6 +43,8 @@ typedef void (*out_free)(void *); enum outt { OUTT_ASCII = 0, /* -Tascii */ + OUTT_LOCALE, /* -Tlocale */ + OUTT_UTF8, /* -Tutf8 */ OUTT_TREE, /* -Ttree */ OUTT_HTML, /* -Thtml */ OUTT_XHTML, /* -Txhtml */ @@ -206,9 +208,19 @@ parse(struct curparse *curp, int fd, switch (curp->outtype) { case (OUTT_XHTML): curp->outdata = xhtml_alloc(curp->outopts); + curp->outfree = html_free; break; case (OUTT_HTML): curp->outdata = html_alloc(curp->outopts); + curp->outfree = html_free; + break; + case (OUTT_UTF8): + curp->outdata = utf8_alloc(curp->outopts); + curp->outfree = ascii_free; + break; + case (OUTT_LOCALE): + curp->outdata = locale_alloc(curp->outopts); + curp->outfree = ascii_free; break; case (OUTT_ASCII): curp->outdata = ascii_alloc(curp->outopts); @@ -232,7 +244,6 @@ parse(struct curparse *curp, int fd, case (OUTT_XHTML): curp->outman = html_man; curp->outmdoc = html_mdoc; - curp->outfree = html_free; break; case (OUTT_TREE): curp->outman = tree_man; @@ -242,6 +253,10 @@ parse(struct curparse *curp, int fd, /* FALLTHROUGH */ case (OUTT_ASCII): /* FALLTHROUGH */ + case (OUTT_UTF8): + /* FALLTHROUGH */ + case (OUTT_LOCALE): + /* FALLTHROUGH */ case (OUTT_PS): curp->outman = terminal_man; curp->outmdoc = terminal_mdoc; @@ -299,6 +314,10 @@ toptions(struct curparse *curp, char *arg) curp->outtype = OUTT_TREE; else if (0 == strcmp(arg, "html")) curp->outtype = OUTT_HTML; + else if (0 == strcmp(arg, "utf8")) + curp->outtype = OUTT_UTF8; + else if (0 == strcmp(arg, "locale")) + curp->outtype = OUTT_LOCALE; else if (0 == strcmp(arg, "xhtml")) curp->outtype = OUTT_XHTML; else if (0 == strcmp(arg, "ps")) diff --git a/contrib/mdocml/main.h b/contrib/mdocml/main.h index bb503eb79e..07b9e879fa 100644 --- a/contrib/mdocml/main.h +++ b/contrib/mdocml/main.h @@ -1,4 +1,4 @@ -/* $Id: main.h,v 1.10 2010/07/31 23:52:58 schwarze Exp $ */ +/* $Id: main.h,v 1.12 2011/05/20 15:48:22 kristaps Exp $ */ /* * Copyright (c) 2009, 2010 Kristaps Dzonsons * @@ -41,6 +41,8 @@ void html_free(void *); void tree_mdoc(void *, const struct mdoc *); void tree_man(void *, const struct man *); +void *locale_alloc(char *); +void *utf8_alloc(char *); void *ascii_alloc(char *); void ascii_free(void *); diff --git a/contrib/mdocml/makewhatis.1 b/contrib/mdocml/makewhatis.1 new file mode 100644 index 0000000000..2eb385cd00 --- /dev/null +++ b/contrib/mdocml/makewhatis.1 @@ -0,0 +1,152 @@ +.\" $Id: makewhatis.1,v 1.2 2011/05/14 23:43:03 kristaps Exp $ +.\" +.\" Copyright (c) 2011 Kristaps Dzonsons +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: May 14 2011 $ +.Dt MAKEWHATIS 1 +.Os +.Sh NAME +.Nm makewhatis +.Nd index UNIX manuals +.Sh SYNOPSIS +.Nm +.Op Fl d Ar dir +.Ar +.Sh DESCRIPTION +The +.Nm +utility extracts keywords from +.Ux +manuals and indexes them for fast retrieval. +The arguments are as follows: +.Bl -tag -width Ds +.It Fl d Ar dir +The directory into which to write the keyword and index databases. +.It Ar +Read input from zero or more files in +.Xr mdoc 7 +or +.Xr man 7 +.Ux +manual format. +.El +.Pp +By default, +.Nm +constructs the +.Sx Index Database +and +.Sx Keyword Database +in the current working directory. +.Pp +If fatal parse errors are encountered, the offending file is printed to +stderr, omitted from the index, and the parse continues with the next +input file. +.Ss Index Database +The index database, +.Pa mandoc.index , +is a +.Xr recno 3 +database with record values consisting of +.Pp +.Bl -enum -compact +.It +a nil-terminated filename, +.It +a nil-terminated manual section, +.It +a nil-terminated manual title, +.It +a nil-terminated architecture +.Pq this is not often available +.It +and a nil-terminated description. +.El +.Pp +Both the manual section and description may be zero-length. +Entries are sequentially-numbered, but the filenames are unordered. +.Ss Keyword Database +The keyword database, +.Pa mandoc.db , +is a +.Xr btree 3 +database of nil-terminated keywords (record length is non-zero string +length plus one) mapping to a 8-byte binary field consisting of the +keyword type and source +.Sx Index Database +record number. +The type, an unsigned 32-bit integer in host order, is one of the +following: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It Li 0x01 +The name of a manual page as given in the NAME section. +.It Li 0x02 +A function prototype name as given in the SYNOPSIS section. +.It Li 0x03 +A utility name as given in the SYNOPSIS section. +.It Li 0x04 +An include file as given in the SYNOPSIS section. +.It Li 0x05 +A variable name as given in the SYNOPSIS section. +.It Li 0x06 +A standard as given in the STANDARDS section. +.It Li 0x07 +An author as given in the AUTHORS section. +.It Li 0x08 +A configuration as given in the SYNOPSIS section. +.El +.Pp +If a value is encountered outside of this range, the database is +corrupt. +.Pp +The latter four bytes are a host-ordered record number within the +.Sx Index Database . +.Pp +The +.Nm +utility is +.Ud +.Sh FILES +.Bl -tag -width Ds +.It Pa mandoc.db +A +.Xr btree 3 +keyword database mapping keywords to a type and file reference in +.Pa mandoc.index . +.It Pa mandoc.db~ +Working copy of +.Pa mandoc.db . +.It Pa mandoc.index +A +.Xr recno 3 +database of indexed file-names. +.It Pa mandoc.index~ +Working copy of +.Pa mandoc.index . +.El +.Sh EXIT STATUS +.Ex -std +.Sh SEE ALSO +.Xr mandoc 1 +.Sh AUTHORS +The +.Nm +utility was written by +.An Kristaps Dzonsons Aq kristaps@bsd.lv . +.Sh CAVEATS +Only +.Xr mdoc 7 +manuals are processed. diff --git a/contrib/mdocml/makewhatis.c b/contrib/mdocml/makewhatis.c new file mode 100644 index 0000000000..01d0f6a0bd --- /dev/null +++ b/contrib/mdocml/makewhatis.c @@ -0,0 +1,920 @@ +/* $Id: makewhatis.c,v 1.2 2011/05/15 02:47:17 kristaps Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#ifdef __linux__ +# include +#else +# include +#endif +#include +#include +#include +#include +#include +#include + +#include "man.h" +#include "mdoc.h" +#include "mandoc.h" + +#define MANDOC_DB "mandoc.db" +#define MANDOC_IDX "mandoc.index" +#define MANDOC_BUFSZ BUFSIZ +#define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR + +enum type { + MANDOC_NONE = 0, + MANDOC_NAME, + MANDOC_FUNCTION, + MANDOC_UTILITY, + MANDOC_INCLUDES, + MANDOC_VARIABLE, + MANDOC_STANDARD, + MANDOC_AUTHOR, + MANDOC_CONFIG +}; + +#define MAN_ARGS DB *db, \ + const char *dbn, \ + DBT *key, size_t *ksz, \ + DBT *val, \ + DBT *rval, size_t *rsz, \ + const struct man_node *n +#define MDOC_ARGS DB *db, \ + const char *dbn, \ + DBT *key, size_t *ksz, \ + DBT *val, \ + DBT *rval, size_t *rsz, \ + const struct mdoc_node *n + +static void dbt_append(DBT *, size_t *, const char *); +static void dbt_appendb(DBT *, size_t *, + const void *, size_t); +static void dbt_init(DBT *, size_t *); +static void dbt_put(DB *, const char *, DBT *, DBT *); +static void usage(void); +static void pman(DB *, const char *, DBT *, size_t *, + DBT *, DBT *, size_t *, struct man *); +static int pman_node(MAN_ARGS); +static void pmdoc(DB *, const char *, DBT *, size_t *, + DBT *, DBT *, size_t *, struct mdoc *); +static void pmdoc_node(MDOC_ARGS); +static void pmdoc_An(MDOC_ARGS); +static void pmdoc_Cd(MDOC_ARGS); +static void pmdoc_Fd(MDOC_ARGS); +static void pmdoc_In(MDOC_ARGS); +static void pmdoc_Fn(MDOC_ARGS); +static void pmdoc_Fo(MDOC_ARGS); +static void pmdoc_Nd(MDOC_ARGS); +static void pmdoc_Nm(MDOC_ARGS); +static void pmdoc_St(MDOC_ARGS); +static void pmdoc_Vt(MDOC_ARGS); + +typedef void (*pmdoc_nf)(MDOC_ARGS); + +static const char *progname; + +static const pmdoc_nf mdocs[MDOC_MAX] = { + NULL, /* Ap */ + NULL, /* Dd */ + NULL, /* Dt */ + NULL, /* Os */ + NULL, /* Sh */ + NULL, /* Ss */ + NULL, /* Pp */ + NULL, /* D1 */ + NULL, /* Dl */ + NULL, /* Bd */ + NULL, /* Ed */ + NULL, /* Bl */ + NULL, /* El */ + NULL, /* It */ + NULL, /* Ad */ + pmdoc_An, /* An */ + NULL, /* Ar */ + pmdoc_Cd, /* Cd */ + NULL, /* Cm */ + NULL, /* Dv */ + NULL, /* Er */ + NULL, /* Ev */ + NULL, /* Ex */ + NULL, /* Fa */ + pmdoc_Fd, /* Fd */ + NULL, /* Fl */ + pmdoc_Fn, /* Fn */ + NULL, /* Ft */ + NULL, /* Ic */ + pmdoc_In, /* In */ + NULL, /* Li */ + pmdoc_Nd, /* Nd */ + pmdoc_Nm, /* Nm */ + NULL, /* Op */ + NULL, /* Ot */ + NULL, /* Pa */ + NULL, /* Rv */ + pmdoc_St, /* St */ + pmdoc_Vt, /* Va */ + pmdoc_Vt, /* Vt */ + NULL, /* Xr */ + NULL, /* %A */ + NULL, /* %B */ + NULL, /* %D */ + NULL, /* %I */ + NULL, /* %J */ + NULL, /* %N */ + NULL, /* %O */ + NULL, /* %P */ + NULL, /* %R */ + NULL, /* %T */ + NULL, /* %V */ + NULL, /* Ac */ + NULL, /* Ao */ + NULL, /* Aq */ + NULL, /* At */ + NULL, /* Bc */ + NULL, /* Bf */ + NULL, /* Bo */ + NULL, /* Bq */ + NULL, /* Bsx */ + NULL, /* Bx */ + NULL, /* Db */ + NULL, /* Dc */ + NULL, /* Do */ + NULL, /* Dq */ + NULL, /* Ec */ + NULL, /* Ef */ + NULL, /* Em */ + NULL, /* Eo */ + NULL, /* Fx */ + NULL, /* Ms */ + NULL, /* No */ + NULL, /* Ns */ + NULL, /* Nx */ + NULL, /* Ox */ + NULL, /* Pc */ + NULL, /* Pf */ + NULL, /* Po */ + NULL, /* Pq */ + NULL, /* Qc */ + NULL, /* Ql */ + NULL, /* Qo */ + NULL, /* Qq */ + NULL, /* Re */ + NULL, /* Rs */ + NULL, /* Sc */ + NULL, /* So */ + NULL, /* Sq */ + NULL, /* Sm */ + NULL, /* Sx */ + NULL, /* Sy */ + NULL, /* Tn */ + NULL, /* Ux */ + NULL, /* Xc */ + NULL, /* Xo */ + pmdoc_Fo, /* Fo */ + NULL, /* Fc */ + NULL, /* Oo */ + NULL, /* Oc */ + NULL, /* Bk */ + NULL, /* Ek */ + NULL, /* Bt */ + NULL, /* Hf */ + NULL, /* Fr */ + NULL, /* Ud */ + NULL, /* Lb */ + NULL, /* Lp */ + NULL, /* Lk */ + NULL, /* Mt */ + NULL, /* Brq */ + NULL, /* Bro */ + NULL, /* Brc */ + NULL, /* %C */ + NULL, /* Es */ + NULL, /* En */ + NULL, /* Dx */ + NULL, /* %Q */ + NULL, /* br */ + NULL, /* sp */ + NULL, /* %U */ + NULL, /* Ta */ +}; + +int +main(int argc, char *argv[]) +{ + struct mparse *mp; /* parse sequence */ + struct mdoc *mdoc; /* resulting mdoc */ + struct man *man; /* resulting man */ + char *fn; /* current file being parsed */ + const char *msec, /* manual section */ + *mtitle, /* manual title */ + *arch, /* manual architecture */ + *dir; /* result dir (default: cwd) */ + char ibuf[MAXPATHLEN], /* index fname */ + ibbuf[MAXPATHLEN], /* index backup fname */ + fbuf[MAXPATHLEN], /* btree fname */ + fbbuf[MAXPATHLEN]; /* btree backup fname */ + int ch; + DB *idx, /* index database */ + *db; /* keyword database */ + DBT rkey, rval, /* recno entries */ + key, val; /* persistent keyword entries */ + size_t sv, + ksz, rsz; /* entry buffer size */ + char vbuf[8]; /* stringified record number */ + BTREEINFO info; /* btree configuration */ + recno_t rec; /* current record number */ + extern int optind; + extern char *optarg; + + progname = strrchr(argv[0], '/'); + if (progname == NULL) + progname = argv[0]; + else + ++progname; + + dir = ""; + + while (-1 != (ch = getopt(argc, argv, "d:"))) + switch (ch) { + case ('d'): + dir = optarg; + break; + default: + usage(); + return((int)MANDOCLEVEL_BADARG); + } + + argc -= optind; + argv += optind; + + /* + * Set up temporary file-names into which we're going to write + * all of our data (both for the index and database). These + * will be securely renamed to the real file-names after we've + * written all of our data. + */ + + ibuf[0] = ibuf[MAXPATHLEN - 2] = + ibbuf[0] = ibbuf[MAXPATHLEN - 2] = + fbuf[0] = fbuf[MAXPATHLEN - 2] = + fbbuf[0] = fbbuf[MAXPATHLEN - 2] = '\0'; + + strlcat(fbuf, dir, MAXPATHLEN); + strlcat(fbuf, MANDOC_DB, MAXPATHLEN); + + strlcat(fbbuf, fbuf, MAXPATHLEN); + strlcat(fbbuf, "~", MAXPATHLEN); + + strlcat(ibuf, dir, MAXPATHLEN); + strlcat(ibuf, MANDOC_IDX, MAXPATHLEN); + + strlcat(ibbuf, ibuf, MAXPATHLEN); + strlcat(ibbuf, "~", MAXPATHLEN); + + if ('\0' != fbuf[MAXPATHLEN - 2] || + '\0' != fbbuf[MAXPATHLEN - 2] || + '\0' != ibuf[MAXPATHLEN - 2] || + '\0' != ibbuf[MAXPATHLEN - 2]) { + fprintf(stderr, "%s: Path too long\n", progname); + exit((int)MANDOCLEVEL_SYSERR); + } + + /* + * For the keyword database, open a BTREE database that allows + * duplicates. For the index database, use a standard RECNO + * database type. + */ + + memset(&info, 0, sizeof(BTREEINFO)); + info.flags = R_DUP; + db = dbopen(fbbuf, MANDOC_FLAGS, 0644, DB_BTREE, &info); + + if (NULL == db) { + perror(fbbuf); + exit((int)MANDOCLEVEL_SYSERR); + } + + idx = dbopen(ibbuf, MANDOC_FLAGS, 0644, DB_RECNO, NULL); + + if (NULL == db) { + perror(ibbuf); + (*db->close)(db); + exit((int)MANDOCLEVEL_SYSERR); + } + + /* + * Try parsing the manuals given on the command line. If we + * totally fail, then just keep on going. Take resulting trees + * and push them down into the database code. + * Use the auto-parser and don't report any errors. + */ + + mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL); + + memset(&key, 0, sizeof(DBT)); + memset(&val, 0, sizeof(DBT)); + memset(&rkey, 0, sizeof(DBT)); + memset(&rval, 0, sizeof(DBT)); + + val.size = sizeof(vbuf); + val.data = vbuf; + rkey.size = sizeof(recno_t); + + rec = 1; + ksz = rsz = 0; + + while (NULL != (fn = *argv++)) { + mparse_reset(mp); + + /* Parse and get (non-empty) AST. */ + + if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) { + fprintf(stderr, "%s: Parse failure\n", fn); + continue; + } + mparse_result(mp, &mdoc, &man); + if (NULL == mdoc && NULL == man) + continue; + + /* Manual section: can be empty string. */ + + msec = NULL != mdoc ? + mdoc_meta(mdoc)->msec : + man_meta(man)->msec; + mtitle = NULL != mdoc ? + mdoc_meta(mdoc)->title : + man_meta(man)->title; + arch = NULL != mdoc ? mdoc_meta(mdoc)->arch : NULL; + + assert(msec); + assert(mtitle); + + /* + * The index record value consists of a nil-terminated + * filename, a nil-terminated manual section, and a + * nil-terminated description. Since the description + * may not be set, we set a sentinel to see if we're + * going to write a nil byte in its place. + */ + + dbt_init(&rval, &rsz); + dbt_appendb(&rval, &rsz, fn, strlen(fn) + 1); + dbt_appendb(&rval, &rsz, msec, strlen(msec) + 1); + dbt_appendb(&rval, &rsz, mtitle, strlen(mtitle) + 1); + dbt_appendb(&rval, &rsz, arch ? arch : "", + arch ? strlen(arch) + 1 : 1); + + sv = rval.size; + + /* Fix the record number in the btree value. */ + + memset(val.data, 0, sizeof(uint32_t)); + memcpy(val.data + 4, &rec, sizeof(uint32_t)); + + if (mdoc) + pmdoc(db, fbbuf, &key, &ksz, + &val, &rval, &rsz, mdoc); + else + pman(db, fbbuf, &key, &ksz, + &val, &rval, &rsz, man); + + /* + * Apply this to the index. If we haven't had a + * description set, put an empty one in now. + */ + + if (rval.size == sv) + dbt_appendb(&rval, &rsz, "", 1); + + rkey.data = &rec; + dbt_put(idx, ibbuf, &rkey, &rval); + + printf("Indexed: %s\n", fn); + rec++; + } + + (*db->close)(db); + (*idx->close)(idx); + + mparse_free(mp); + + free(key.data); + free(rval.data); + + /* Atomically replace the file with our temporary one. */ + + if (-1 == rename(fbbuf, fbuf)) + perror(fbuf); + if (-1 == rename(ibbuf, ibuf)) + perror(fbuf); + + return((int)MANDOCLEVEL_OK); +} + +/* + * Initialise the stored database key whose data buffer is shared + * between uses (as the key must sometimes be constructed from an array + * of + */ +static void +dbt_init(DBT *key, size_t *ksz) +{ + + if (0 == *ksz) { + assert(0 == key->size); + assert(NULL == key->data); + key->data = mandoc_malloc(MANDOC_BUFSZ); + *ksz = MANDOC_BUFSZ; + } + + key->size = 0; +} + +/* + * Append a binary value to a database entry. This can be invoked + * multiple times; the buffer is automatically resized. + */ +static void +dbt_appendb(DBT *key, size_t *ksz, const void *cp, size_t sz) +{ + + assert(key->data); + + /* Overshoot by MANDOC_BUFSZ. */ + + while (key->size + sz >= *ksz) { + *ksz = key->size + sz + MANDOC_BUFSZ; + key->data = mandoc_realloc(key->data, *ksz); + } + + memcpy(key->data + (int)key->size, cp, sz); + key->size += sz; +} + +/* + * Append a nil-terminated string to the database entry. This can be + * invoked multiple times. The database entry will be nil-terminated as + * well; if invoked multiple times, a space is put between strings. + */ +static void +dbt_append(DBT *key, size_t *ksz, const char *cp) +{ + size_t sz; + + if (0 == (sz = strlen(cp))) + return; + + assert(key->data); + + if (key->size) + ((char *)key->data)[(int)key->size - 1] = ' '; + + dbt_appendb(key, ksz, cp, sz + 1); +} + +/* ARGSUSED */ +static void +pmdoc_An(MDOC_ARGS) +{ + uint32_t fl; + + if (SEC_AUTHORS != n->sec) + return; + + for (n = n->child; n; n = n->next) + if (MDOC_TEXT == n->type) + dbt_append(key, ksz, n->string); + + fl = (uint32_t)MANDOC_AUTHOR; + memcpy(val->data, &fl, 4); +} + +/* ARGSUSED */ +static void +pmdoc_Fd(MDOC_ARGS) +{ + uint32_t fl; + const char *start, *end; + size_t sz; + + if (SEC_SYNOPSIS != n->sec) + return; + if (NULL == (n = n->child) || MDOC_TEXT != n->type) + return; + + /* + * Only consider those `Fd' macro fields that begin with an + * "inclusion" token (versus, e.g., #define). + */ + if (strcmp("#include", n->string)) + return; + + if (NULL == (n = n->next) || MDOC_TEXT != n->type) + return; + + /* + * Strip away the enclosing angle brackets and make sure we're + * not zero-length. + */ + + start = n->string; + if ('<' == *start || '"' == *start) + start++; + + if (0 == (sz = strlen(start))) + return; + + end = &start[(int)sz - 1]; + if ('>' == *end || '"' == *end) + end--; + + assert(end >= start); + dbt_appendb(key, ksz, start, (size_t)(end - start + 1)); + dbt_appendb(key, ksz, "", 1); + + fl = (uint32_t)MANDOC_INCLUDES; + memcpy(val->data, &fl, 4); +} + +/* ARGSUSED */ +static void +pmdoc_Cd(MDOC_ARGS) +{ + uint32_t fl; + + if (SEC_SYNOPSIS != n->sec) + return; + + for (n = n->child; n; n = n->next) + if (MDOC_TEXT == n->type) + dbt_append(key, ksz, n->string); + + fl = (uint32_t)MANDOC_CONFIG; + memcpy(val->data, &fl, 4); +} + +/* ARGSUSED */ +static void +pmdoc_In(MDOC_ARGS) +{ + uint32_t fl; + + if (SEC_SYNOPSIS != n->sec) + return; + if (NULL == n->child || MDOC_TEXT != n->child->type) + return; + + dbt_append(key, ksz, n->child->string); + fl = (uint32_t)MANDOC_INCLUDES; + memcpy(val->data, &fl, 4); +} + +/* ARGSUSED */ +static void +pmdoc_Fn(MDOC_ARGS) +{ + uint32_t fl; + const char *cp; + + if (SEC_SYNOPSIS != n->sec) + return; + if (NULL == n->child || MDOC_TEXT != n->child->type) + return; + + /* .Fn "struct type *arg" "foo" */ + + cp = strrchr(n->child->string, ' '); + if (NULL == cp) + cp = n->child->string; + + /* Strip away pointer symbol. */ + + while ('*' == *cp) + cp++; + + dbt_append(key, ksz, cp); + fl = (uint32_t)MANDOC_FUNCTION; + memcpy(val->data, &fl, 4); +} + +/* ARGSUSED */ +static void +pmdoc_St(MDOC_ARGS) +{ + uint32_t fl; + + if (SEC_STANDARDS != n->sec) + return; + if (NULL == n->child || MDOC_TEXT != n->child->type) + return; + + dbt_append(key, ksz, n->child->string); + fl = (uint32_t)MANDOC_STANDARD; + memcpy(val->data, &fl, 4); +} + +/* ARGSUSED */ +static void +pmdoc_Vt(MDOC_ARGS) +{ + uint32_t fl; + const char *start; + size_t sz; + + if (SEC_SYNOPSIS != n->sec) + return; + if (MDOC_Vt == n->tok && MDOC_BODY != n->type) + return; + if (NULL == n->last || MDOC_TEXT != n->last->type) + return; + + /* + * Strip away leading pointer symbol '*' and trailing ';'. + */ + + start = n->last->string; + + while ('*' == *start) + start++; + + if (0 == (sz = strlen(start))) + return; + + if (';' == start[(int)sz - 1]) + sz--; + + if (0 == sz) + return; + + dbt_appendb(key, ksz, start, sz); + dbt_appendb(key, ksz, "", 1); + + fl = (uint32_t)MANDOC_VARIABLE; + memcpy(val->data, &fl, 4); +} + +/* ARGSUSED */ +static void +pmdoc_Fo(MDOC_ARGS) +{ + uint32_t fl; + + if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) + return; + if (NULL == n->child || MDOC_TEXT != n->child->type) + return; + + dbt_append(key, ksz, n->child->string); + fl = (uint32_t)MANDOC_FUNCTION; + memcpy(val->data, &fl, 4); +} + + +/* ARGSUSED */ +static void +pmdoc_Nd(MDOC_ARGS) +{ + int first; + + for (first = 1, n = n->child; n; n = n->next) { + if (MDOC_TEXT != n->type) + continue; + if (first) + dbt_appendb(rval, rsz, n->string, strlen(n->string) + 1); + else + dbt_append(rval, rsz, n->string); + first = 0; + } +} + +/* ARGSUSED */ +static void +pmdoc_Nm(MDOC_ARGS) +{ + uint32_t fl; + + if (SEC_NAME == n->sec) { + for (n = n->child; n; n = n->next) { + if (MDOC_TEXT != n->type) + continue; + dbt_append(key, ksz, n->string); + } + fl = (uint32_t)MANDOC_NAME; + memcpy(val->data, &fl, 4); + return; + } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) + return; + + for (n = n->child; n; n = n->next) { + if (MDOC_TEXT != n->type) + continue; + dbt_append(key, ksz, n->string); + } + + fl = (uint32_t)MANDOC_UTILITY; + memcpy(val->data, &fl, 4); +} + +static void +dbt_put(DB *db, const char *dbn, DBT *key, DBT *val) +{ + + if (0 == key->size) + return; + + assert(key->data); + assert(val->size); + assert(val->data); + + if (0 == (*db->put)(db, key, val, 0)) + return; + + perror(dbn); + exit((int)MANDOCLEVEL_SYSERR); + /* NOTREACHED */ +} + +/* + * Call out to per-macro handlers after clearing the persistent database + * key. If the macro sets the database key, flush it to the database. + */ +static void +pmdoc_node(MDOC_ARGS) +{ + + if (NULL == n) + return; + + switch (n->type) { + case (MDOC_HEAD): + /* FALLTHROUGH */ + case (MDOC_BODY): + /* FALLTHROUGH */ + case (MDOC_TAIL): + /* FALLTHROUGH */ + case (MDOC_BLOCK): + /* FALLTHROUGH */ + case (MDOC_ELEM): + if (NULL == mdocs[n->tok]) + break; + + dbt_init(key, ksz); + + (*mdocs[n->tok])(db, dbn, key, ksz, val, rval, rsz, n); + dbt_put(db, dbn, key, val); + break; + default: + break; + } + + pmdoc_node(db, dbn, key, ksz, val, rval, rsz, n->child); + pmdoc_node(db, dbn, key, ksz, val, rval, rsz, n->next); +} + +static int +pman_node(MAN_ARGS) +{ + const struct man_node *head, *body; + const char *start, *sv; + size_t sz; + uint32_t fl; + + if (NULL == n) + return(0); + + /* + * We're only searching for one thing: the first text child in + * the BODY of a NAME section. Since we don't keep track of + * sections in -man, run some hoops to find out whether we're in + * the correct section or not. + */ + + if (MAN_BODY == n->type && MAN_SH == n->tok) { + body = n; + assert(body->parent); + if (NULL != (head = body->parent->head) && + 1 == head->nchild && + NULL != (head = (head->child)) && + MAN_TEXT == head->type && + 0 == strcmp(head->string, "NAME") && + NULL != (body = body->child) && + MAN_TEXT == body->type) { + + fl = (uint32_t)MANDOC_NAME; + memcpy(val->data, &fl, 4); + + assert(body->string); + start = sv = body->string; + + /* + * Go through a special heuristic dance here. + * This is why -man manuals are great! + * (I'm being sarcastic: my eyes are bleeding.) + * Conventionally, one or more manual names are + * comma-specified prior to a whitespace, then a + * dash, then a description. Try to puzzle out + * the name parts here. + */ + + for ( ;; ) { + sz = strcspn(start, " ,"); + if ('\0' == start[(int)sz]) + break; + + dbt_init(key, ksz); + dbt_appendb(key, ksz, start, sz); + dbt_appendb(key, ksz, "", 1); + + dbt_put(db, dbn, key, val); + + if (' ' == start[(int)sz]) { + start += (int)sz + 1; + break; + } + + assert(',' == start[(int)sz]); + start += (int)sz + 1; + while (' ' == *start) + start++; + } + + if (sv == start) { + dbt_init(key, ksz); + dbt_append(key, ksz, start); + return(1); + } + + while (' ' == *start) + start++; + + if (0 == strncmp(start, "-", 1)) + start += 1; + else if (0 == strncmp(start, "\\-", 2)) + start += 2; + else if (0 == strncmp(start, "\\(en", 4)) + start += 4; + else if (0 == strncmp(start, "\\(em", 4)) + start += 4; + + while (' ' == *start) + start++; + + dbt_appendb(rval, rsz, start, strlen(start) + 1); + } + } + + if (pman_node(db, dbn, key, ksz, val, rval, rsz, n->child)) + return(1); + if (pman_node(db, dbn, key, ksz, val, rval, rsz, n->next)) + return(1); + + return(0); +} + +static void +pman(DB *db, const char *dbn, DBT *key, size_t *ksz, + DBT *val, DBT *rval, size_t *rsz, struct man *m) +{ + + pman_node(db, dbn, key, ksz, val, rval, rsz, man_node(m)); +} + + +static void +pmdoc(DB *db, const char *dbn, DBT *key, size_t *ksz, + DBT *val, DBT *rval, size_t *rsz, struct mdoc *m) +{ + + pmdoc_node(db, dbn, key, ksz, val, rval, rsz, mdoc_node(m)); +} + +static void +usage(void) +{ + + fprintf(stderr, "usage: %s " + "[-d path] " + "[file...]\n", + progname); +} diff --git a/contrib/mdocml/man.7 b/contrib/mdocml/man.7 index 876f32a312..8aa1f2c0ec 100644 --- a/contrib/mdocml/man.7 +++ b/contrib/mdocml/man.7 @@ -1,4 +1,4 @@ -.\" $Id: man.7,v 1.99 2011/03/07 01:35:51 schwarze Exp $ +.\" $Id: man.7,v 1.100 2011/05/26 09:26:16 kristaps Exp $ .\" .\" Copyright (c) 2009, 2010 Kristaps Dzonsons .\" @@ -14,7 +14,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: March 7 2011 $ +.Dd $Mdocdate: May 26 2011 $ .Dt MAN 7 .Os .Sh NAME @@ -205,16 +205,20 @@ appears as the first macro. Beyond .Sx \&TH , at least one macro or text node must appear in the document. -Documents are generally structured as follows: +.Pp +The following is a well-formed skeleton +.Nm +file for a utility +.Qq progname : .Bd -literal -offset indent -\&.TH FOO 1 2009-10-10 +\&.TH PROGNAME 1 2009-10-10 \&.SH NAME -\efBfoo\efR \e(en a description goes here +\efBprogname\efR \e(en a description goes here \&.\e\*q .SH LIBRARY \&.\e\*q For sections 2 & 3 only. \&.\e\*q Not used in OpenBSD. \&.SH SYNOPSIS -\efBfoo\efR [\efB\e-options\efR] arguments... +\efBprogname\efR [\efB\e-options\efR] arguments... \&.SH DESCRIPTION The \efBfoo\efR utility processes files... \&.\e\*q .SH IMPLEMENTATION NOTES diff --git a/contrib/mdocml/man_html.c b/contrib/mdocml/man_html.c index 610e58fdbd..73953ecdd2 100644 --- a/contrib/mdocml/man_html.c +++ b/contrib/mdocml/man_html.c @@ -1,4 +1,4 @@ -/* $Id: man_html.c,v 1.70 2011/03/07 01:35:51 schwarze Exp $ */ +/* $Id: man_html.c,v 1.72 2011/05/17 11:34:31 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * @@ -157,9 +157,7 @@ print_man_head(MAN_ARGS) { print_gen_head(h); - bufinit(h); - buffmt(h, "%s(%s)", m->title, m->msec); - + bufcat_fmt(h, "%s(%s)", m->title, m->msec); print_otag(h, TAG_TITLE, 0, NULL); print_text(h, h->buf); } @@ -184,7 +182,6 @@ print_man_node(MAN_ARGS) child = 1; t = h->tags.head; - bufinit(h); switch (n->type) { case (MAN_ROOT): @@ -259,8 +256,6 @@ print_man_node(MAN_ARGS) /* This will automatically close out any font scope. */ print_stagq(h, t); - bufinit(h); - switch (n->type) { case (MAN_ROOT): man_root_post(m, n, mh, h); @@ -401,6 +396,7 @@ man_br_pre(MAN_ARGS) } else su.scale = 0; + bufinit(h); bufcat_su(h, "height", &su); PAIR_STYLE_INIT(&tag, h); print_otag(h, TAG_DIV, 1, &tag); @@ -569,6 +565,7 @@ man_IP_pre(MAN_ARGS) if (MAN_BLOCK == n->type) { print_otag(h, TAG_P, 0, NULL); print_otag(h, TAG_TABLE, 0, NULL); + bufinit(h); bufcat_su(h, "width", &su); PAIR_STYLE_INIT(&tag, h); print_otag(h, TAG_COL, 1, &tag); @@ -604,6 +601,8 @@ man_HP_pre(MAN_ARGS) struct roffsu su; const struct man_node *np; + bufinit(h); + np = MAN_BLOCK == n->type ? n->head->child : n->parent->head->child; @@ -704,6 +703,7 @@ man_RS_pre(MAN_ARGS) if (n->head->child) a2width(n->head->child, &su); + bufinit(h); bufcat_su(h, "margin-left", &su); PAIR_STYLE_INIT(&tag, h); print_otag(h, TAG_DIV, 1, &tag); diff --git a/contrib/mdocml/man_macro.c b/contrib/mdocml/man_macro.c index b3212e6806..915648b430 100644 --- a/contrib/mdocml/man_macro.c +++ b/contrib/mdocml/man_macro.c @@ -1,4 +1,4 @@ -/* $Id: man_macro.c,v 1.60 2011/03/23 15:33:57 kristaps Exp $ */ +/* $Id: man_macro.c,v 1.62 2011/04/19 16:38:48 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons * diff --git a/contrib/mdocml/man_term.c b/contrib/mdocml/man_term.c index cb0b08d7e6..38ceeabdbd 100644 --- a/contrib/mdocml/man_term.c +++ b/contrib/mdocml/man_term.c @@ -1,4 +1,4 @@ -/* $Id: man_term.c,v 1.105 2011/03/22 10:13:01 kristaps Exp $ */ +/* $Id: man_term.c,v 1.109 2011/05/17 14:38:34 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010, 2011 Ingo Schwarze @@ -156,14 +156,7 @@ terminal_man(void *arg, const struct man *man) p->tabwidth = term_len(p, 5); if (NULL == p->symtab) - switch (p->enc) { - case (TERMENC_ASCII): - p->symtab = chars_init(CHARS_ASCII); - break; - default: - abort(); - /* NOTREACHED */ - } + p->symtab = mchars_alloc(); n = man_node(man); m = man_meta(man); diff --git a/contrib/mdocml/man_validate.c b/contrib/mdocml/man_validate.c index 03bb120f56..e0c882d49b 100644 --- a/contrib/mdocml/man_validate.c +++ b/contrib/mdocml/man_validate.c @@ -1,4 +1,4 @@ -/* $Id: man_validate.c,v 1.67 2011/03/22 15:30:30 kristaps Exp $ */ +/* $Id: man_validate.c,v 1.69 2011/04/13 09:57:08 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010 Ingo Schwarze @@ -54,7 +54,7 @@ static int check_par(CHKARGS); static int check_part(CHKARGS); static int check_root(CHKARGS); static int check_sec(CHKARGS); -static int check_text(CHKARGS); +static void check_text(CHKARGS); static int post_AT(CHKARGS); static int post_fi(CHKARGS); @@ -151,7 +151,8 @@ man_valid_post(struct man *m) switch (m->last->type) { case (MAN_TEXT): - return(check_text(m, m->last)); + check_text(m, m->last); + return(1); case (MAN_ROOT): return(check_root(m, m->last)); case (MAN_EQN): @@ -204,43 +205,48 @@ check_root(CHKARGS) return(1); } - -static int +static void check_text(CHKARGS) { - char *p; - int pos, c; + char *p, *pp, *cpp; + int pos; size_t sz; - for (p = n->string, pos = n->pos + 1; *p; p++, pos++) { - sz = strcspn(p, "\t\\"); - p += (int)sz; + p = n->string; + pos = n->pos + 1; - if ('\0' == *p) - break; + while ('\0' != *p) { + sz = strcspn(p, "\t\\"); + p += (int)sz; pos += (int)sz; if ('\t' == *p) { - if (MAN_LITERAL & m->flags) - continue; - man_pmsg(m, n->line, pos, MANDOCERR_BADTAB); + if ( ! (MAN_LITERAL & m->flags)) + man_pmsg(m, n->line, pos, MANDOCERR_BADTAB); + p++; + pos++; continue; - } + } else if ('\0' == *p) + break; - /* Check the special character. */ + pos++; + pp = ++p; - c = mandoc_special(p); - if (c) { - p += c - 1; - pos += c - 1; - } else + if (ESCAPE_ERROR == mandoc_escape + ((const char **)&pp, NULL, NULL)) { man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE); - } + break; + } - return(1); -} + cpp = p; + while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp))) + *cpp = '-'; + pos += pp - p; + p = pp; + } +} #define INEQ_DEFINE(x, ineq, name) \ static int \ @@ -319,14 +325,11 @@ static int check_sec(CHKARGS) { - if (MAN_HEAD == n->type && 0 == n->nchild) { - man_nmsg(m, n, MANDOCERR_SYNTARGCOUNT); - return(0); - } else if (MAN_BODY == n->type && 0 == n->nchild) - mandoc_msg(MANDOCERR_ARGCWARN, m->parse, n->line, - n->pos, "want children (have none)"); + if ( ! (MAN_HEAD == n->type && 0 == n->nchild)) + return(1); - return(1); + man_nmsg(m, n, MANDOCERR_SYNTARGCOUNT); + return(0); } diff --git a/contrib/mdocml/mandoc.1 b/contrib/mdocml/mandoc.1 index 91cb8fe890..7cf9ca5c9b 100644 --- a/contrib/mdocml/mandoc.1 +++ b/contrib/mdocml/mandoc.1 @@ -1,4 +1,4 @@ -.\" $Id: mandoc.1,v 1.85 2011/02/09 10:03:02 kristaps Exp $ +.\" $Id: mandoc.1,v 1.88 2011/05/20 15:51:18 kristaps Exp $ .\" .\" Copyright (c) 2009, 2010 Kristaps Dzonsons .\" @@ -14,7 +14,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: February 9 2011 $ +.Dd $Mdocdate: May 20 2011 $ .Dt MANDOC 1 .Os .Sh NAME @@ -158,6 +158,15 @@ utility accepts the following .Fl T arguments, which correspond to output modes: .Bl -tag -width Ds +.It Fl T Ns Cm utf8 +Encode output in the UTF-8 multi-byte format. +See +.Xr UTF-8 Output . +.It Fl T Ns Cm locale +Encode output using the current +.Xr locale 1 . +See +.Sx Locale Output . .It Fl T Ns Cm ascii Produce 7-bit ASCII output. This is the default. @@ -189,6 +198,23 @@ See .Pp If multiple input files are specified, these will be processed by the corresponding filter in-order. +.Ss UTF-8 Output +Use +.Fl T Ns Cm utf8 +to force a UTF-8 locale. +See +.Sx Locale Output +for details and options. +.Ss Locale Output +Locale-depending output encoding is triggered with +.Fl T Ns Cm locale . +This option is not available on all systems: systems without locale +support, or those whose internal representation is not natively UCS-4, +will fall back to +.Fl T Ns Cm ascii . +See +.Sx ASCII Output +for font style specification and available command-line arguments. .Ss ASCII Output Output produced by .Fl T Ns Cm ascii , @@ -209,6 +235,9 @@ Emboldened characters are rendered as The special characters documented in .Xr mandoc_char 7 are rendered best-effort in an ASCII equivalent. +If no equivalent is found, +.Sq \&? +is used instead. .Pp Output width is limited to 78 visible columns unless literal input lines exceed this limit. @@ -460,6 +489,13 @@ Each input and output format is separately noted. .Ss ASCII Compatibility .Bl -bullet -compact .It +Unrenderable unicode codepoints specified with +.Sq \e[uNNNN] +escapes are printed as +.Sq \&? +in mandoc. +In GNU troff, these raise an error. +.It The .Sq \&Bd \-literal and @@ -470,7 +506,7 @@ in .Fl T Ns Cm ascii are synonyms, as are \-filled and \-ragged. .It -In GNU troff, the +In historic GNU troff, the .Sq \&Pa .Xr mdoc 7 macro does not underline when scoped under an @@ -495,8 +531,6 @@ macro in has no effect. .It Words aren't hyphenated. -.It -Sentences are unilaterally monospaced. .El .Ss HTML/XHTML Compatibility .Bl -bullet -compact diff --git a/contrib/mdocml/mandoc.3 b/contrib/mdocml/mandoc.3 index 2fd887b8f0..300f2981bb 100644 --- a/contrib/mdocml/mandoc.3 +++ b/contrib/mdocml/mandoc.3 @@ -1,4 +1,4 @@ -.\" $Id: mandoc.3,v 1.2 2011/03/28 21:49:42 kristaps Exp $ +.\" $Id: mandoc.3,v 1.10 2011/05/24 21:41:11 kristaps Exp $ .\" .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons .\" Copyright (c) 2010 Ingo Schwarze @@ -15,13 +15,20 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: March 28 2011 $ +.Dd $Mdocdate: May 24 2011 $ .Dt MANDOC 3 .Os .Sh NAME .Nm mandoc , +.Nm mandoc_escape , .Nm man_meta , .Nm man_node , +.Nm mchars_alloc , +.Nm mchars_free , +.Nm mchars_num2char , +.Nm mchars_num2uc , +.Nm mchars_spec2cp , +.Nm mchars_spec2str , .Nm mdoc_meta , .Nm mdoc_node , .Nm mparse_alloc , @@ -32,10 +39,18 @@ .Nm mparse_strerror , .Nm mparse_strlevel .Nd mandoc macro compiler library +.Sh LIBRARY +.Lb mandoc .Sh SYNOPSIS .In man.h .In mdoc.h .In mandoc.h +.Ft "enum mandoc_esc" +.Fo mandoc_escape +.Fa "const char **in" +.Fa "const char **seq" +.Fa "int *len" +.Fc .Ft "const struct man_meta *" .Fo man_meta .Fa "const struct man *man" @@ -44,6 +59,28 @@ .Fo man_node .Fa "const struct man *man" .Fc +.Ft "struct mchars *" +.Fn mchars_alloc +.Ft void +.Fn mchars_free "struct mchars *p" +.Ft char +.Fn mchars_num2char "const char *cp" "size_t sz" +.Ft int +.Fn mchars_num2uc "const char *cp" "size_t sz" +.Ft "const char *" +.Fo mchars_spec2str +.Fa "struct mchars *p" +.Fa "const char *cp" +.Fa "size_t sz" +.Fa "size_t *rsz" +.Fc +.Ft int +.Fo mchars_spec2cp +.Fa "struct mchars *p" +.Fa "const char *cp" +.Fa "size_t sz" +.Ft "const char *" +.Fc .Ft "const struct mdoc_meta *" .Fo mdoc_meta .Fa "const struct mdoc *mdoc" @@ -90,6 +127,8 @@ .Vt extern const char * const * man_macronames; .Vt extern const char * const * mdoc_argnames; .Vt extern const char * const * mdoc_macronames; +.Fd "#define ASCII_NBRSP" +.Fd "#define ASCII_HYPH" .Sh DESCRIPTION The .Nm mandoc @@ -131,6 +170,151 @@ or invoke .Fn mparse_reset and parse new files. .El +.Pp +The +.Nm +library also contains routines for translating character strings into glyphs +.Pq see Fn mchars_alloc +and parsing escape sequences from strings +.Pq see Fn mandoc_escape . +.Pp +This library is +.Ud +.Sh REFERENCE +This section documents the functions, types, and variables available +via +.In mandoc.h . +.Ss Types +.Bl -ohang +.It Vt "enum mandoc_esc" +.It Vt "enum mandocerr" +.It Vt "enum mandoclevel" +.It Vt "struct mchars" +An opaque pointer to an object allowing for translation between +character strings and glyphs. +See +.Fn mchars_alloc . +.It Vt "enum mparset" +.It Vt "struct mparse" +.It Vt "mandocmsg" +.El +.Ss Functions +.Bl -ohang +.It Fn mandoc_escape +Scan an escape sequence, i.e., a character string beginning with +.Sq \e . +Pass a pointer to this string as +.Va end ; +it will be set to the supremum of the parsed escape sequence unless +returning ESCAPE_ERROR, in which case the string is bogus and should be +thrown away. +If not ESCAPE_ERROR or ESCAPE_IGNORE, +.Va start +is set to the first relevant character of the substring (font, glyph, +whatever) of length +.Va sz . +Both +.Va start +and +.Va sz +may be NULL. +.It Fn man_meta +Obtain the meta-data of a successful parse. +This may only be used on a pointer returned by +.Fn mparse_result . +.It Fn man_node +Obtain the root node of a successful parse. +This may only be used on a pointer returned by +.Fn mparse_result . +.It Fn mchars_alloc +Allocate an +.Vt "struct mchars *" +object for translating special characters into glyphs. +See +.Xr mandoc_char 7 +for an overview of special characters. +The object must be freed with +.Fn mchars_free . +.It Fn mchars_free +Free an object created with +.Fn mchars_alloc . +.It Fn mchars_num2char +Convert a character index (e.g., the \eN\(aq\(aq escape) into a +printable ASCII character. +Returns \e0 (the nil character) if the input sequence is malformed. +.It Fn mchars_num2uc +Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into +a Unicode codepoint. +Returns \e0 (the nil character) if the input sequence is malformed. +.It Fn mchars_spec2cp +Convert a special character into a valid Unicode codepoint. +Returns \-1 on failure or a non-zero Unicode codepoint on success. +.It Fn mchars_spec2str +Convert a special character into an ASCII string. +Returns NULL on failure. +.It Fn mdoc_meta +Obtain the meta-data of a successful parse. +This may only be used on a pointer returned by +.Fn mparse_result . +.It Fn mdoc_node +Obtain the root node of a successful parse. +This may only be used on a pointer returned by +.Fn mparse_result . +.It Fn mparse_alloc +Allocate a parser. +The same parser may be used for multiple files so long as +.Fn mparse_reset +is called between parses. +.Fn mparse_free +must be called to free the memory allocated by this function. +.It Fn mparse_free +Free all memory allocated by +.Fn mparse_alloc . +.It Fn mparse_readfd +Parse a file or file descriptor. +If +.Va fd +is -1, +.Va fname +is opened for reading. +Otherwise, +.Va fname +is assumed to be the name associated with +.Va fd . +This may be called multiple times with different parameters; however, +.Fn mparse_reset +should be invoked between parses. +.It Fn mparse_reset +Reset a parser so that +.Fn mparse_readfd +may be used again. +.It Fn mparse_result +Obtain the result of a parse. +Only successful parses +.Po +i.e., those where +.Fn mparse_readfd +returned less than MANDOCLEVEL_FATAL +.Pc +should invoke this function, in which case one of the two pointers will +be filled in. +.It Fn mparse_strerror +Return a statically-allocated string representation of an error code. +.It Fn mparse_strlevel +Return a statically-allocated string representation of a level code. +.El +.Ss Variables +.Bl -ohang +.It Va man_macronames +The string representation of a man macro as indexed by +.Vt "enum mant" . +.It Va mdoc_argnames +The string representation of a mdoc macro argument as indexed by +.Vt "enum mdocargt" . +.It Va mdoc_macronames +The string representation of a mdoc macro as indexed by +.Vt "enum mdoct" . +.El .Sh IMPLEMENTATION NOTES This section consists of structural documentation for .Xr mdoc 7 @@ -251,7 +435,7 @@ where a new body introduces a new phrase. .Pp The .Xr mdoc 7 -syntax tree accomodates for broken block structures as well. +syntax tree accommodates for broken block structures as well. The ENDBODY node is available to end the formatting associated with a given block before the physical end of that block. It has a non-null @@ -323,6 +507,7 @@ levels of badly-nested blocks. .Xr mandoc 1 , .Xr eqn 7 , .Xr man 7 , +.Xr mandoc_char 7 , .Xr mdoc 7 , .Xr roff 7 , .Xr tbl 7 diff --git a/contrib/mdocml/mandoc.c b/contrib/mdocml/mandoc.c index da4a16067c..465965a469 100644 --- a/contrib/mdocml/mandoc.c +++ b/contrib/mdocml/mandoc.c @@ -1,4 +1,4 @@ -/* $Id: mandoc.c,v 1.44 2011/03/28 23:52:13 kristaps Exp $ */ +/* $Id: mandoc.c,v 1.53 2011/05/24 21:31:23 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons * Copyright (c) 2011 Ingo Schwarze @@ -23,6 +23,8 @@ #include #include +#include +#include #include #include #include @@ -35,199 +37,358 @@ static int a2time(time_t *, const char *, const char *); static char *time2a(time_t); +static int numescape(const char *); -int -mandoc_special(char *p) +/* + * Pass over recursive numerical expressions. This context of this + * function is important: it's only called within character-terminating + * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial + * recursion: we don't care about what's in these blocks. + * This returns the number of characters skipped or -1 if an error + * occurs (the caller should bail). + */ +static int +numescape(const char *start) { - int len, i; - char term; - char *sv; - - len = 0; - term = '\0'; - sv = p; - - assert('\\' == *p); - p++; - - switch (*p++) { -#if 0 - case ('Z'): + int i; + size_t sz; + const char *cp; + + i = 0; + + /* The expression consists of a subexpression. */ + + if ('\\' == start[i]) { + cp = &start[++i]; + /* + * Read past the end of the subexpression. + * Bail immediately on errors. + */ + if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) + return(-1); + return(i + cp - &start[i]); + } + + if ('(' != start[i++]) + return(0); + + /* + * A parenthesised subexpression. Read until the closing + * parenthesis, making sure to handle any nested subexpressions + * that might ruin our parse. + */ + + while (')' != start[i]) { + sz = strcspn(&start[i], ")\\"); + i += (int)sz; + + if ('\0' == start[i]) + return(-1); + else if ('\\' != start[i]) + continue; + + cp = &start[++i]; + if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) + return(-1); + i += cp - &start[i]; + } + + /* Read past the terminating ')'. */ + return(++i); +} + +enum mandoc_esc +mandoc_escape(const char **end, const char **start, int *sz) +{ + char c, term, numeric; + int i, lim, ssz, rlim; + const char *cp, *rstart; + enum mandoc_esc gly; + + cp = *end; + rstart = cp; + if (start) + *start = rstart; + i = lim = 0; + gly = ESCAPE_ERROR; + term = numeric = '\0'; + + switch ((c = cp[i++])) { + /* + * First the glyphs. There are several different forms of + * these, but each eventually returns a substring of the glyph + * name. + */ + case ('('): + gly = ESCAPE_SPECIAL; + lim = 2; + break; + case ('['): + gly = ESCAPE_SPECIAL; + /* + * Unicode escapes are defined in groff as \[uXXXX] to + * \[u10FFFF], where the contained value must be a valid + * Unicode codepoint. Here, however, only check whether + * it's not a zero-width escape. + */ + if ('u' == cp[i] && ']' != cp[i + 1]) + gly = ESCAPE_UNICODE; + term = ']'; + break; + case ('C'): + if ('\'' != cp[i]) + return(ESCAPE_ERROR); + gly = ESCAPE_SPECIAL; + term = '\''; + break; + + /* + * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where + * 'X' is the trigger. These have opaque sub-strings. + */ + case ('F'): /* FALLTHROUGH */ - case ('X'): + case ('g'): /* FALLTHROUGH */ - case ('x'): + case ('k'): /* FALLTHROUGH */ - case ('S'): + case ('M'): /* FALLTHROUGH */ - case ('R'): + case ('m'): /* FALLTHROUGH */ - case ('N'): + case ('n'): /* FALLTHROUGH */ - case ('l'): + case ('V'): /* FALLTHROUGH */ - case ('L'): + case ('Y'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_IGNORE; /* FALLTHROUGH */ - case ('H'): + case ('f'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_FONT; + + rstart= &cp[i]; + if (start) + *start = rstart; + + switch (cp[i++]) { + case ('('): + lim = 2; + break; + case ('['): + term = ']'; + break; + default: + lim = 1; + i--; + break; + } + break; + + /* + * These escapes are of the form \X'Y', where 'X' is the trigger + * and 'Y' is any string. These have opaque sub-strings. + */ + case ('A'): /* FALLTHROUGH */ - case ('h'): + case ('b'): /* FALLTHROUGH */ case ('D'): /* FALLTHROUGH */ - case ('C'): - /* FALLTHROUGH */ - case ('b'): + case ('o'): /* FALLTHROUGH */ - case ('B'): + case ('R'): /* FALLTHROUGH */ - case ('a'): + case ('X'): /* FALLTHROUGH */ - case ('A'): - if (*p++ != '\'') - return(0); + case ('Z'): + if ('\'' != cp[i++]) + return(ESCAPE_ERROR); + gly = ESCAPE_IGNORE; term = '\''; break; -#endif + + /* + * These escapes are of the form \X'N', where 'X' is the trigger + * and 'N' resolves to a numerical expression. + */ + case ('B'): + /* FALLTHROUGH */ case ('h'): /* FALLTHROUGH */ + case ('H'): + /* FALLTHROUGH */ + case ('L'): + /* FALLTHROUGH */ + case ('l'): + /* FALLTHROUGH */ + case ('N'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_NUMBERED; + /* FALLTHROUGH */ + case ('S'): + /* FALLTHROUGH */ case ('v'): /* FALLTHROUGH */ + case ('w'): + /* FALLTHROUGH */ + case ('x'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_IGNORE; + if ('\'' != cp[i++]) + return(ESCAPE_ERROR); + term = numeric = '\''; + break; + + /* + * Sizes get a special category of their own. + */ case ('s'): - if (ASCII_HYPH == *p) - *p = '-'; + gly = ESCAPE_IGNORE; - i = 0; - if ('+' == *p || '-' == *p) { - p++; - i = 1; - } + rstart = &cp[i]; + if (start) + *start = rstart; - switch (*p++) { + /* See +/- counts as a sign. */ + c = cp[i]; + if ('+' == c || '-' == c || ASCII_HYPH == c) + ++i; + + switch (cp[i++]) { case ('('): - len = 2; + lim = 2; break; case ('['): - term = ']'; + term = numeric = ']'; break; case ('\''): - term = '\''; + term = numeric = '\''; break; - case ('0'): - i = 1; - /* FALLTHROUGH */ default: - len = 1; - p--; + lim = 1; + i--; break; } - if (ASCII_HYPH == *p) - *p = '-'; - if ('+' == *p || '-' == *p) { - if (i) - return(0); - p++; - } - - /* Handle embedded numerical subexp or escape. */ - - if ('(' == *p) { - while (*p && ')' != *p) - if ('\\' == *p++) { - i = mandoc_special(--p); - if (0 == i) - return(0); - p += i; - } - - if (')' == *p++) - break; + /* See +/- counts as a sign. */ + c = cp[i]; + if ('+' == c || '-' == c || ASCII_HYPH == c) + ++i; - return(0); - } else if ('\\' == *p) { - if (0 == (i = mandoc_special(p))) - return(0); - p += i; - } + break; + /* + * Anything else is assumed to be a glyph. + */ + default: + gly = ESCAPE_SPECIAL; + lim = 1; + i--; break; -#if 0 - case ('Y'): - /* FALLTHROUGH */ - case ('V'): - /* FALLTHROUGH */ - case ('$'): - /* FALLTHROUGH */ - case ('n'): - /* FALLTHROUGH */ -#endif - case ('k'): - /* FALLTHROUGH */ - case ('M'): - /* FALLTHROUGH */ - case ('m'): - /* FALLTHROUGH */ - case ('f'): - /* FALLTHROUGH */ - case ('F'): - /* FALLTHROUGH */ - case ('*'): - switch (*p++) { - case ('('): - len = 2; + } + + assert(ESCAPE_ERROR != gly); + + rstart = &cp[i]; + if (start) + *start = rstart; + + /* + * If a terminating block has been specified, we need to + * handle the case of recursion, which could have their + * own terminating blocks that mess up our parse. This, by the + * way, means that the "start" and "size" values will be + * effectively meaningless. + */ + + ssz = 0; + if (numeric && -1 == (ssz = numescape(&cp[i]))) + return(ESCAPE_ERROR); + + i += ssz; + rlim = -1; + + /* + * We have a character terminator. Try to read up to that + * character. If we can't (i.e., we hit the nil), then return + * an error; if we can, calculate our length, read past the + * terminating character, and exit. + */ + + if ('\0' != term) { + *end = strchr(&cp[i], term); + if ('\0' == *end) + return(ESCAPE_ERROR); + + rlim = *end - &cp[i]; + if (sz) + *sz = rlim; + (*end)++; + goto out; + } + + assert(lim > 0); + + /* + * We have a numeric limit. If the string is shorter than that, + * stop and return an error. Else adjust our endpoint, length, + * and return the current glyph. + */ + + if ((size_t)lim > strlen(&cp[i])) + return(ESCAPE_ERROR); + + rlim = lim; + if (sz) + *sz = rlim; + + *end = &cp[i] + lim; + +out: + assert(rlim >= 0 && rstart); + + /* Run post-processors. */ + + switch (gly) { + case (ESCAPE_FONT): + if (1 != rlim) break; - case ('['): - term = ']'; + switch (*rstart) { + case ('3'): + /* FALLTHROUGH */ + case ('B'): + gly = ESCAPE_FONTBOLD; break; - default: - len = 1; - p--; + case ('2'): + /* FALLTHROUGH */ + case ('I'): + gly = ESCAPE_FONTITALIC; + break; + case ('P'): + gly = ESCAPE_FONTPREV; + break; + case ('1'): + /* FALLTHROUGH */ + case ('R'): + gly = ESCAPE_FONTROMAN; break; } break; - case ('('): - len = 2; - break; - case ('['): - term = ']'; - break; - case ('z'): - len = 1; - if ('\\' == *p) { - if (0 == (i = mandoc_special(p))) - return(0); - p += i; - return(*p ? (int)(p - sv) : 0); - } - break; - case ('o'): - /* FALLTHROUGH */ - case ('w'): - if ('\'' == *p++) { - term = '\''; + case (ESCAPE_SPECIAL): + if (1 != rlim) break; - } - /* FALLTHROUGH */ + if ('c' == *rstart) + gly = ESCAPE_NOSPACE; + break; default: - len = 1; - p--; break; } - if (term) { - for ( ; *p && term != *p; p++) - if (ASCII_HYPH == *p) - *p = '-'; - return(*p ? (int)(p - sv) : 0); - } - - for (i = 0; *p && i < len; i++, p++) - if (ASCII_HYPH == *p) - *p = '-'; - return(i == len ? (int)(p - sv) : 0); + return(gly); } - void * mandoc_calloc(size_t num, size_t size) { @@ -303,11 +464,11 @@ mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos) /* Quoting can only start with a new word. */ start = *cpp; + quoted = 0; if ('"' == *start) { quoted = 1; start++; - } else - quoted = 0; + } pairs = 0; white = 0; @@ -448,7 +609,7 @@ mandoc_eos(const char *p, size_t sz, int enclosed) /* * End-of-sentence recognition must include situations where * some symbols, such as `)', allow prior EOS punctuation to - * propogate outward. + * propagate outward. */ found = 0; @@ -531,3 +692,35 @@ mandoc_getcontrol(const char *cp, int *ppos) *ppos = pos; return(1); } + +/* + * Convert a string to a long that may not be <0. + * If the string is invalid, or is less than 0, return -1. + */ +int +mandoc_strntou(const char *p, size_t sz, int base) +{ + char buf[32]; + char *ep; + long v; + + if (sz > 31) + return(-1); + + memcpy(buf, p, sz); + buf[(int)sz] = '\0'; + + errno = 0; + v = strtol(buf, &ep, base); + + if (buf[0] == '\0' || *ep != '\0') + return(-1); + + if ((errno == ERANGE && + (v == LONG_MAX || v == LONG_MIN)) || + (v > INT_MAX || v < 0)) + return(-1); + + return((int)v); +} + diff --git a/contrib/mdocml/mandoc.h b/contrib/mdocml/mandoc.h index 185c10bf47..20ab87a7b9 100644 --- a/contrib/mdocml/mandoc.h +++ b/contrib/mdocml/mandoc.h @@ -1,4 +1,4 @@ -/* $Id: mandoc.h,v 1.69 2011/03/28 21:49:42 kristaps Exp $ */ +/* $Id: mandoc.h,v 1.77 2011/05/24 21:31:23 kristaps Exp $ */ /* * Copyright (c) 2010, 2011 Kristaps Dzonsons * @@ -288,10 +288,25 @@ enum mparset { MPARSE_MAN /* assume -man */ }; +enum mandoc_esc { + ESCAPE_ERROR = 0, /* bail! unparsable escape */ + ESCAPE_IGNORE, /* escape to be ignored */ + ESCAPE_SPECIAL, /* a regular special character */ + ESCAPE_FONT, /* a generic font mode */ + ESCAPE_FONTBOLD, /* bold font mode */ + ESCAPE_FONTITALIC, /* italic font mode */ + ESCAPE_FONTROMAN, /* roman font mode */ + ESCAPE_FONTPREV, /* previous font mode */ + ESCAPE_NUMBERED, /* a numbered glyph */ + ESCAPE_UNICODE, /* a unicode codepoint */ + ESCAPE_NOSPACE /* suppress space if the last on a line */ +}; + typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel, const char *, int, int, const char *); struct mparse; +struct mchars; struct mdoc; struct man; @@ -310,6 +325,16 @@ void *mandoc_calloc(size_t, size_t); void *mandoc_malloc(size_t); void *mandoc_realloc(void *, size_t); +enum mandoc_esc mandoc_escape(const char **, const char **, int *); + +struct mchars *mchars_alloc(void); +char mchars_num2char(const char *, size_t); +int mchars_num2uc(const char *, size_t); +const char *mchars_spec2str(struct mchars *, const char *, size_t, size_t *); +int mchars_spec2cp(struct mchars *, const char *, size_t); +void mchars_free(struct mchars *); + + __END_DECLS #endif /*!MANDOC_H*/ diff --git a/contrib/mdocml/mandoc_char.7 b/contrib/mdocml/mandoc_char.7 index ec478e09d5..d0c5dd7f80 100644 --- a/contrib/mdocml/mandoc_char.7 +++ b/contrib/mdocml/mandoc_char.7 @@ -1,4 +1,4 @@ -.\" $Id: mandoc_char.7,v 1.42 2011/02/09 22:53:20 schwarze Exp $ +.\" $Id: mandoc_char.7,v 1.45 2011/05/15 15:30:33 kristaps Exp $ .\" .\" Copyright (c) 2009 Kristaps Dzonsons .\" @@ -14,7 +14,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: February 9 2011 $ +.Dd $Mdocdate: May 15 2011 $ .Dt MANDOC_CHAR 7 .Os .Sh NAME @@ -481,8 +481,13 @@ Greek letters: .It \e(ts Ta \(ts Ta sigma terminal .El .Sh PREDEFINED STRINGS -These are not recommended for use, as they differ across -implementations: +Predefined strings are inherited from the macro packages of historical +troff implementations. +They are +.Em not recommended +for use, as they differ across implementations. +Manuals using these predefined strings are almost certainly not +portable. .Pp .Bl -column -compact -offset indent "Input" "Rendered" "Description" .It Em Input Ta Em Rendered Ta Em Description @@ -512,7 +517,23 @@ implementations: .It \e*(>= Ta \*(>= Ta greater-than-equal .It \e*(aa Ta \*(aa Ta acute .It \e*(ga Ta \*(ga Ta grave +.It \e*(Px Ta \*(Px Ta POSIX standard name +.It \e*(Ai Ta \*(Ai Ta ANSI standard name .El +.Sh UNICODE CHARACTERS +The escape sequence +.Pp +.Dl \e[uXXXX] +.Pp +is interpreted as a Unicode codepoint. +The codepoint must be in the range above U+0080 and less than U+10FFFF. +For compatibility, points must be zero-padded to four characters; if +greater than four characters, no zero padding is allowed. +Unicode surrogates are not allowed. +.\" .Pp +.\" Unicode glyphs attenuate to the +.\" .Sq \&? +.\" character if invalid or not rendered by current output media. .Sh NUMBERED CHARACTERS For backward compatibility with existing manuals, .Xr mandoc 1 @@ -535,6 +556,9 @@ troff implementations, at this time limited to GNU troff .Pp .Bl -dash -compact .It +The \eN\(aq\(aq escape sequence is limited to printable characters; in +groff, it accepts arbitrary character numbers. +.It In .Fl T Ns Cm ascii , the diff --git a/contrib/mdocml/mdoc.7 b/contrib/mdocml/mdoc.7 index 3a68ca5498..5bd8aa109f 100644 --- a/contrib/mdocml/mdoc.7 +++ b/contrib/mdocml/mdoc.7 @@ -1,4 +1,4 @@ -.\" $Id: mdoc.7,v 1.184 2011/04/01 19:50:49 kristaps Exp $ +.\" $Id: mdoc.7,v 1.188 2011/05/26 09:26:16 kristaps Exp $ .\" .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons .\" Copyright (c) 2010 Ingo Schwarze @@ -15,7 +15,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: April 1 2011 $ +.Dd $Mdocdate: May 26 2011 $ .Dt MDOC 7 .Os .Sh NAME @@ -40,25 +40,25 @@ An .Nm document follows simple rules: lines beginning with the control character -.Sq \. +.Sq \&. are parsed for macros. -Other lines are interpreted within the scope of -prior macros: +Text lines, those not beginning with the control character, are +interpreted within the scope of prior macros: .Bd -literal -offset indent \&.Sh Macro lines change control state. -Other lines are interpreted within the current state. +Text lines are interpreted within the current state. .Ed .Sh LANGUAGE SYNTAX .Nm documents may contain only graphable 7-bit ASCII characters, the space character, and, in certain circumstances, the tab character. .Pp -If the first character of a line is a space, that line is printed +If the first character of a text line is a space, that line is printed with a leading newline. .Ss Comments Text following a .Sq \e\*q , -whether in a macro or free-form text line, is ignored to the end of +whether in a macro or text line, is ignored to the end of line. A macro line with only a control character and comment escape, .Sq \&.\e\*q , @@ -97,13 +97,12 @@ Within a macro line, the following terms are reserved: .Pq reserved-word vertical bar .El .Pp -Use of reserved terms is described in -.Sx MACRO SYNTAX . For general use in macro lines, these can be escaped with a non-breaking space .Pq Sq \e& . +In text lines, these may be used as normal punctuation. .Ss Special Characters -Special characters may occur in both macro and free-form lines. +Special characters may occur in both macro and text lines. Sequences begin with the escape character .Sq \e followed by either an open-parenthesis @@ -132,18 +131,15 @@ escape followed by an indicator: B (bold), I (italic), R (Roman), or P .Pp A numerical representation 3, 2, or 1 (bold, italic, and Roman, respectively) may be used instead. -A text decoration is valid within -the current font scope only: if a macro opens a font scope alongside -its own scope, such as -.Sx \&Bf -.Cm \&Sy , -in-scope invocations of -.Sq \ef -are only valid within the font scope of the macro. -If +If a macro opens a font scope after calling +.Sq \ef , +such as with +.Sx \&Bf , +the .Sq \ef -is specified outside of any font scope, such as in unenclosed, free-form -text, it will affect the remainder of the document. +mode will be restored upon exiting the +.Sx \&Bf +scope. .Pp Note this form is .Em not @@ -177,9 +173,9 @@ and .Pq vertical bar . .Ss Whitespace Whitespace consists of the space character. -In free-form lines, whitespace is preserved within a line; unescaped +In text lines, whitespace is preserved within a line; unescaped trailing spaces are stripped from input (unless in a literal context). -Blank free-form lines, which may include whitespace, are only permitted +Blank text lines, which may include whitespace, are only permitted within literal contexts. .Pp In macro lines, whitespace delimits arguments and is discarded. @@ -199,7 +195,7 @@ Thus, the following produces \&.Op "Fl a" .Ed .Pp -In free-form mode, quotes are regarded as opaque text. +In text lines, quotes are regarded as opaque text. .Ss Scaling Widths Many macros support scaled widths for their arguments, such as stipulating a two-inch list indentation with the following: @@ -270,8 +266,8 @@ The proper spacing is also intelligently preserved if a sentence ends at the boundary of a macro line. For example: .Pp -.Dl \&Xr mandoc 1 \. -.Dl \&Fl T \&Ns \&Cm ascii \. +.Dl \&.Xr mandoc 1 \&. +.Dl \&.Fl T \&Ns \&Cm ascii \&. .Sh MANUAL STRUCTURE A well-formed .Nm @@ -300,19 +296,20 @@ sections, although this varies between manual sections. .Pp The following is a well-formed skeleton .Nm -file: +file for a utility +.Qq progname : .Bd -literal -offset indent \&.Dd $\&Mdocdate$ -\&.Dt mdoc 7 +\&.Dt PROGNAME section \&.Os \&.Sh NAME -\&.Nm foo +\&.Nm progname \&.Nd a description goes here \&.\e\*q .Sh LIBRARY \&.\e\*q For sections 2, 3, & 9 only. \&.\e\*q Not used in OpenBSD. \&.Sh SYNOPSIS -\&.Nm foo +\&.Nm progname \&.Op Fl options \&.Ar \&.Sh DESCRIPTION @@ -359,6 +356,10 @@ The syntax for this as follows: \&.Nd a one line description .Ed .Pp +Multiple +.Sq \&Nm +names should be separated by commas. +.Pp The .Sx \&Nm macro(s) must precede the @@ -386,16 +387,18 @@ configuration. For the first, utilities (sections 1, 6, and 8), this is generally structured as follows: .Bd -literal -offset indent -\&.Nm foo +\&.Nm bar \&.Op Fl v \&.Op Fl o Ar file \&.Op Ar -\&.Nm bar +\&.Nm foo \&.Op Fl v \&.Op Fl o Ar file \&.Op Ar .Ed .Pp +Commands should be ordered alphabetically. +.Pp For the second, function calls (sections 2, 3, 9): .Bd -literal -offset indent \&.In header.h @@ -406,6 +409,14 @@ For the second, function calls (sections 2, 3, 9): \&.Fn bar "const char *src" .Ed .Pp +Ordering of +.Sx \&In , +.Sx \&Vt , +.Sx \&Fn , +and +.Sx \&Fo +macros should follow C header-file conventions. +.Pp And for the third, configurations (section 4): .Bd -literal -offset indent \&.Cd \*qit* at isa? port 0x2e\*q @@ -454,9 +465,15 @@ or .Sx \&Ss macro or the end of an enclosing block, whichever comes first. .It Em DESCRIPTION -This expands upon the brief, one line description in -.Em NAME . -It usually contains a breakdown of the options (if documenting a +This begins with an expansion of the brief, one line description in +.Em NAME : +.Bd -literal -offset indent +The +\&.Nm +utility does this, that, and the other. +.Ed +.Pp +It usually follows with a breakdown of the options (if documenting a command), such as: .Bd -literal -offset indent The arguments are as follows: @@ -604,7 +621,10 @@ column, if applicable, describes closure rules. Multi-line scope closed by an explicit closing macro. All macros contains bodies; only .Sx \&Bf -contains a head. +and +.Pq optionally +.Sx \&Bl +contain a head. .Bd -literal -offset indent \&.Yo \(lB\-arg \(lBparm...\(rB\(rB \(lBhead...\(rB \(lBbody...\(rB @@ -1040,7 +1060,7 @@ Its syntax is as follows: .Pp Display blocks are used to select a different indentation and justification than the one used by the surrounding text. -They may contain both macro lines and free-form text lines. +They may contain both macro lines and text lines. By default, a display block is preceded by a vertical space. .Pp The @@ -1155,9 +1175,10 @@ See also and .Sx \&Sy . .Ss \&Bk -Keep the output generated from each macro input line together -on one single output line. -Line breaks in free-form text lines are unaffected. +For each macro, keep its output together on the same output line, +until the end of the macro or the end of the input line is reached, +whichever comes first. +Line breaks in text lines are unaffected. The syntax is as follows: .Pp .D1 Pf \. Sx \&Bk Fl words @@ -1851,9 +1872,9 @@ A function name. Its syntax is as follows: .Bd -ragged -offset indent .Pf \. Ns Sx \&Fn -.Op Cm functype -.Cm funcname -.Op Oo Cm argtype Oc Cm argname +.Op Ar functype +.Ar funcname +.Op Oo Ar argtype Oc Ar argname .Ed .Pp Function arguments are surrounded in parenthesis and @@ -1882,15 +1903,15 @@ This is a multi-line version of .Sx \&Fn . Its syntax is as follows: .Pp -.D1 Pf \. Sx \&Fo Cm funcname +.D1 Pf \. Sx \&Fo Ar funcname .Pp Invocations usually occur in the following context: .Bd -ragged -offset indent -.Pf \. Sx \&Ft Cm functype +.Pf \. Sx \&Ft Ar functype .br -.Pf \. Sx \&Fo Cm funcname +.Pf \. Sx \&Fo Ar funcname .br -.Pf \. Sx \&Fa Oo Cm argtype Oc Cm argname +.Pf \. Sx \&Fa Oo Ar argtype Oc Ar argname .br \&.\.\. .br @@ -1911,7 +1932,7 @@ and A function type. Its syntax is as follows: .Pp -.D1 Pf \. Sx \&Ft Cm functype +.D1 Pf \. Sx \&Ft Ar functype .Pp Examples: .Dl \&.Ft int @@ -1992,7 +2013,7 @@ and .Fl diag have the following syntax: .Pp -.D1 Pf \. Sx \&It Cm args +.D1 Pf \. Sx \&It Ar args .Pp Lists of type .Fl bullet , @@ -2065,14 +2086,14 @@ See also Specify a library. The syntax is as follows: .Pp -.D1 Pf \. Sx \&Lb Cm library +.D1 Pf \. Sx \&Lb Ar library .Pp The -.Cm library +.Ar library parameter may be a system library, such as -.Cm libz +.Ar libz or -.Cm libpam , +.Ar libpam , in which case a small library description is printed next to the linker invocation; or a custom library, in which case the library name is printed in quotes. @@ -2098,7 +2119,7 @@ and Format a hyperlink. Its syntax is as follows: .Pp -.D1 Pf \. Sx \&Lk Cm uri Op Cm name +.D1 Pf \. Sx \&Lk Ar uri Op Ar name .Pp Examples: .Dl \&.Lk http://bsd.lv \*qThe BSD.lv Project\*q @@ -2113,7 +2134,7 @@ Synonym for Display a mathematical symbol. Its syntax is as follows: .Pp -.D1 Pf \. Sx \&Ms Cm symbol +.D1 Pf \. Sx \&Ms Ar symbol .Pp Examples: .Dl \&.Ms sigma @@ -2124,7 +2145,7 @@ Format a hyperlink. Its syntax is as follows: .Pp -.D1 Pf \. Sx \&Mt Cm address +.D1 Pf \. Sx \&Mt Ar address .Pp Examples: .Dl \&.Mt discuss@manpages.bsd.lv @@ -2262,10 +2283,10 @@ any file. Its syntax is as follows: .Pp -.D1 Pf \. Sx \&Os Op Cm system Op Cm version +.D1 Pf \. Sx \&Os Op Ar system Op Ar version .Pp The optional -.Cm system +.Ar system parameter specifies the relevant operating system or environment. Left unspecified, it defaults to the local operating system version. This is the suggested form. @@ -2324,14 +2345,14 @@ Removes the space between its arguments. Its syntax is as follows: .Pp -.D1 Pf \. \&Pf Cm prefix suffix +.D1 Pf \. \&Pf Ar prefix suffix .Pp The -.Cm suffix +.Ar suffix argument may be a macro. .Pp Examples: -.Dl \&.Pf \e. \&Sx \&Pf \&Cm prefix suffix +.Dl \&.Pf \e. \&Sx \&Pf \&Ar prefix suffix .Ss \&Po Multi-line version of .Sx \&Pq . @@ -2452,11 +2473,11 @@ Its syntax is as follows: .D1 Pf \. Sx \&Sm Cm on | off .Pp By default, spacing is -.Cm on . +.Ar on . When switched -.Cm off , +.Ar off , no white space is inserted between macro arguments and between the -output generated from adjacent macros, but free-form text lines +output generated from adjacent macros, but text lines still get normal spacing between words and sentences. .Ss \&So Multi-line version of @@ -2679,15 +2700,15 @@ Link to another manual .Pq Qq cross-reference . Its syntax is as follows: .Pp -.D1 Pf \. Sx \&Xr Cm name section +.D1 Pf \. Sx \&Xr Ar name section .Pp The -.Cm name +.Ar name and -.Cm section +.Ar section are the name and section of the linked manual. If -.Cm section +.Ar section is followed by non-punctuation, an .Sx \&Ns is inserted into the token stream. @@ -2712,10 +2733,10 @@ This macro should not be used; it is implemented for compatibility with historical manuals. Its syntax is as follows: .Pp -.D1 Pf \. Sx \&sp Op Cm height +.D1 Pf \. Sx \&sp Op Ar height .Pp The -.Cm height +.Ar height argument must be formatted as described in .Sx Scaling Widths . If unspecified, diff --git a/contrib/mdocml/mdoc_argv.c b/contrib/mdocml/mdoc_argv.c index c3fd74b0f3..38909f94b9 100644 --- a/contrib/mdocml/mdoc_argv.c +++ b/contrib/mdocml/mdoc_argv.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_argv.c,v 1.73 2011/03/23 15:46:02 kristaps Exp $ */ +/* $Id: mdoc_argv.c,v 1.77 2011/05/12 23:44:01 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons * @@ -32,12 +32,25 @@ #include "libmandoc.h" #define MULTI_STEP 5 /* pre-allocate argument values */ +#define DELIMSZ 6 /* max possible size of a delimiter */ + +enum argsflag { + ARGSFL_NONE = 0, + ARGSFL_DELIM, /* handle delimiters of [[::delim::][ ]+]+ */ + ARGSFL_TABSEP /* handle tab/`Ta' separated phrases */ +}; + +enum argvflag { + ARGV_NONE, /* no args to flag (e.g., -split) */ + ARGV_SINGLE, /* one arg to flag (e.g., -file xxx) */ + ARGV_MULTI, /* multiple args (e.g., -column xxx yyy) */ + ARGV_OPT_SINGLE /* optional arg (e.g., -offset [xxx]) */ +}; static enum mdocargt argv_a2arg(enum mdoct, const char *); static enum margserr args(struct mdoc *, int, int *, - char *, int, char **); -static int args_checkpunct(struct mdoc *, - const char *, int, int, int); + char *, enum argsflag, char **); +static int args_checkpunct(const char *, int); static int argv(struct mdoc *, int, struct mdoc_argv *, int *, char *); static int argv_single(struct mdoc *, int, @@ -48,13 +61,6 @@ static int argv_multi(struct mdoc *, int, struct mdoc_argv *, int *, char *); static void argn_free(struct mdoc_arg *, int); -enum argvflag { - ARGV_NONE, /* no args to flag (e.g., -split) */ - ARGV_SINGLE, /* one arg to flag (e.g., -file xxx) */ - ARGV_MULTI, /* multiple args (e.g., -column xxx yyy) */ - ARGV_OPT_SINGLE /* optional arg (e.g., -offset [xxx]) */ -}; - static const enum argvflag argvflags[MDOC_ARG_MAX] = { ARGV_NONE, /* MDOC_Split */ ARGV_NONE, /* MDOC_Nosplit */ @@ -84,129 +90,129 @@ static const enum argvflag argvflags[MDOC_ARG_MAX] = { ARGV_NONE /* MDOC_Symbolic */ }; -static const int argflags[MDOC_MAX] = { - 0, /* Ap */ - 0, /* Dd */ - 0, /* Dt */ - 0, /* Os */ - 0, /* Sh */ - 0, /* Ss */ - 0, /* Pp */ - ARGS_DELIM, /* D1 */ - ARGS_DELIM, /* Dl */ - 0, /* Bd */ - 0, /* Ed */ - 0, /* Bl */ - 0, /* El */ - 0, /* It */ - ARGS_DELIM, /* Ad */ - ARGS_DELIM, /* An */ - ARGS_DELIM, /* Ar */ - 0, /* Cd */ - ARGS_DELIM, /* Cm */ - ARGS_DELIM, /* Dv */ - ARGS_DELIM, /* Er */ - ARGS_DELIM, /* Ev */ - 0, /* Ex */ - ARGS_DELIM, /* Fa */ - 0, /* Fd */ - ARGS_DELIM, /* Fl */ - ARGS_DELIM, /* Fn */ - ARGS_DELIM, /* Ft */ - ARGS_DELIM, /* Ic */ - 0, /* In */ - ARGS_DELIM, /* Li */ - 0, /* Nd */ - ARGS_DELIM, /* Nm */ - ARGS_DELIM, /* Op */ - 0, /* Ot */ - ARGS_DELIM, /* Pa */ - 0, /* Rv */ - ARGS_DELIM, /* St */ - ARGS_DELIM, /* Va */ - ARGS_DELIM, /* Vt */ - ARGS_DELIM, /* Xr */ - 0, /* %A */ - 0, /* %B */ - 0, /* %D */ - 0, /* %I */ - 0, /* %J */ - 0, /* %N */ - 0, /* %O */ - 0, /* %P */ - 0, /* %R */ - 0, /* %T */ - 0, /* %V */ - ARGS_DELIM, /* Ac */ - 0, /* Ao */ - ARGS_DELIM, /* Aq */ - ARGS_DELIM, /* At */ - ARGS_DELIM, /* Bc */ - 0, /* Bf */ - 0, /* Bo */ - ARGS_DELIM, /* Bq */ - ARGS_DELIM, /* Bsx */ - ARGS_DELIM, /* Bx */ - 0, /* Db */ - ARGS_DELIM, /* Dc */ - 0, /* Do */ - ARGS_DELIM, /* Dq */ - ARGS_DELIM, /* Ec */ - 0, /* Ef */ - ARGS_DELIM, /* Em */ - 0, /* Eo */ - ARGS_DELIM, /* Fx */ - ARGS_DELIM, /* Ms */ - ARGS_DELIM, /* No */ - ARGS_DELIM, /* Ns */ - ARGS_DELIM, /* Nx */ - ARGS_DELIM, /* Ox */ - ARGS_DELIM, /* Pc */ - ARGS_DELIM, /* Pf */ - 0, /* Po */ - ARGS_DELIM, /* Pq */ - ARGS_DELIM, /* Qc */ - ARGS_DELIM, /* Ql */ - 0, /* Qo */ - ARGS_DELIM, /* Qq */ - 0, /* Re */ - 0, /* Rs */ - ARGS_DELIM, /* Sc */ - 0, /* So */ - ARGS_DELIM, /* Sq */ - 0, /* Sm */ - ARGS_DELIM, /* Sx */ - ARGS_DELIM, /* Sy */ - ARGS_DELIM, /* Tn */ - ARGS_DELIM, /* Ux */ - ARGS_DELIM, /* Xc */ - 0, /* Xo */ - 0, /* Fo */ - 0, /* Fc */ - 0, /* Oo */ - ARGS_DELIM, /* Oc */ - 0, /* Bk */ - 0, /* Ek */ - 0, /* Bt */ - 0, /* Hf */ - 0, /* Fr */ - 0, /* Ud */ - 0, /* Lb */ - 0, /* Lp */ - ARGS_DELIM, /* Lk */ - ARGS_DELIM, /* Mt */ - ARGS_DELIM, /* Brq */ - 0, /* Bro */ - ARGS_DELIM, /* Brc */ - 0, /* %C */ - 0, /* Es */ - 0, /* En */ - 0, /* Dx */ - 0, /* %Q */ - 0, /* br */ - 0, /* sp */ - 0, /* %U */ - 0, /* Ta */ +static const enum argsflag argflags[MDOC_MAX] = { + ARGSFL_NONE, /* Ap */ + ARGSFL_NONE, /* Dd */ + ARGSFL_NONE, /* Dt */ + ARGSFL_NONE, /* Os */ + ARGSFL_NONE, /* Sh */ + ARGSFL_NONE, /* Ss */ + ARGSFL_NONE, /* Pp */ + ARGSFL_DELIM, /* D1 */ + ARGSFL_DELIM, /* Dl */ + ARGSFL_NONE, /* Bd */ + ARGSFL_NONE, /* Ed */ + ARGSFL_NONE, /* Bl */ + ARGSFL_NONE, /* El */ + ARGSFL_NONE, /* It */ + ARGSFL_DELIM, /* Ad */ + ARGSFL_DELIM, /* An */ + ARGSFL_DELIM, /* Ar */ + ARGSFL_NONE, /* Cd */ + ARGSFL_DELIM, /* Cm */ + ARGSFL_DELIM, /* Dv */ + ARGSFL_DELIM, /* Er */ + ARGSFL_DELIM, /* Ev */ + ARGSFL_NONE, /* Ex */ + ARGSFL_DELIM, /* Fa */ + ARGSFL_NONE, /* Fd */ + ARGSFL_DELIM, /* Fl */ + ARGSFL_DELIM, /* Fn */ + ARGSFL_DELIM, /* Ft */ + ARGSFL_DELIM, /* Ic */ + ARGSFL_NONE, /* In */ + ARGSFL_DELIM, /* Li */ + ARGSFL_NONE, /* Nd */ + ARGSFL_DELIM, /* Nm */ + ARGSFL_DELIM, /* Op */ + ARGSFL_NONE, /* Ot */ + ARGSFL_DELIM, /* Pa */ + ARGSFL_NONE, /* Rv */ + ARGSFL_DELIM, /* St */ + ARGSFL_DELIM, /* Va */ + ARGSFL_DELIM, /* Vt */ + ARGSFL_DELIM, /* Xr */ + ARGSFL_NONE, /* %A */ + ARGSFL_NONE, /* %B */ + ARGSFL_NONE, /* %D */ + ARGSFL_NONE, /* %I */ + ARGSFL_NONE, /* %J */ + ARGSFL_NONE, /* %N */ + ARGSFL_NONE, /* %O */ + ARGSFL_NONE, /* %P */ + ARGSFL_NONE, /* %R */ + ARGSFL_NONE, /* %T */ + ARGSFL_NONE, /* %V */ + ARGSFL_DELIM, /* Ac */ + ARGSFL_NONE, /* Ao */ + ARGSFL_DELIM, /* Aq */ + ARGSFL_DELIM, /* At */ + ARGSFL_DELIM, /* Bc */ + ARGSFL_NONE, /* Bf */ + ARGSFL_NONE, /* Bo */ + ARGSFL_DELIM, /* Bq */ + ARGSFL_DELIM, /* Bsx */ + ARGSFL_DELIM, /* Bx */ + ARGSFL_NONE, /* Db */ + ARGSFL_DELIM, /* Dc */ + ARGSFL_NONE, /* Do */ + ARGSFL_DELIM, /* Dq */ + ARGSFL_DELIM, /* Ec */ + ARGSFL_NONE, /* Ef */ + ARGSFL_DELIM, /* Em */ + ARGSFL_NONE, /* Eo */ + ARGSFL_DELIM, /* Fx */ + ARGSFL_DELIM, /* Ms */ + ARGSFL_DELIM, /* No */ + ARGSFL_DELIM, /* Ns */ + ARGSFL_DELIM, /* Nx */ + ARGSFL_DELIM, /* Ox */ + ARGSFL_DELIM, /* Pc */ + ARGSFL_DELIM, /* Pf */ + ARGSFL_NONE, /* Po */ + ARGSFL_DELIM, /* Pq */ + ARGSFL_DELIM, /* Qc */ + ARGSFL_DELIM, /* Ql */ + ARGSFL_NONE, /* Qo */ + ARGSFL_DELIM, /* Qq */ + ARGSFL_NONE, /* Re */ + ARGSFL_NONE, /* Rs */ + ARGSFL_DELIM, /* Sc */ + ARGSFL_NONE, /* So */ + ARGSFL_DELIM, /* Sq */ + ARGSFL_NONE, /* Sm */ + ARGSFL_DELIM, /* Sx */ + ARGSFL_DELIM, /* Sy */ + ARGSFL_DELIM, /* Tn */ + ARGSFL_DELIM, /* Ux */ + ARGSFL_DELIM, /* Xc */ + ARGSFL_NONE, /* Xo */ + ARGSFL_NONE, /* Fo */ + ARGSFL_NONE, /* Fc */ + ARGSFL_NONE, /* Oo */ + ARGSFL_DELIM, /* Oc */ + ARGSFL_NONE, /* Bk */ + ARGSFL_NONE, /* Ek */ + ARGSFL_NONE, /* Bt */ + ARGSFL_NONE, /* Hf */ + ARGSFL_NONE, /* Fr */ + ARGSFL_NONE, /* Ud */ + ARGSFL_NONE, /* Lb */ + ARGSFL_NONE, /* Lp */ + ARGSFL_DELIM, /* Lk */ + ARGSFL_DELIM, /* Mt */ + ARGSFL_DELIM, /* Brq */ + ARGSFL_NONE, /* Bro */ + ARGSFL_DELIM, /* Brc */ + ARGSFL_NONE, /* %C */ + ARGSFL_NONE, /* Es */ + ARGSFL_NONE, /* En */ + ARGSFL_NONE, /* Dx */ + ARGSFL_NONE, /* %Q */ + ARGSFL_NONE, /* br */ + ARGSFL_NONE, /* sp */ + ARGSFL_NONE, /* %U */ + ARGSFL_NONE, /* Ta */ }; static const enum mdocargt args_Ex[] = { @@ -376,18 +382,17 @@ argn_free(struct mdoc_arg *p, int iarg) } enum margserr -mdoc_zargs(struct mdoc *m, int line, int *pos, - char *buf, int flags, char **v) +mdoc_zargs(struct mdoc *m, int line, int *pos, char *buf, char **v) { - return(args(m, line, pos, buf, flags, v)); + return(args(m, line, pos, buf, ARGSFL_NONE, v)); } enum margserr mdoc_args(struct mdoc *m, int line, int *pos, char *buf, enum mdoct tok, char **v) { - int fl; + enum argsflag fl; struct mdoc_node *n; fl = argflags[tok]; @@ -404,38 +409,21 @@ mdoc_args(struct mdoc *m, int line, int *pos, for (n = m->last; n; n = n->parent) if (MDOC_Bl == n->tok) - break; - - if (n && LIST_column == n->norm->Bl.type) { - fl |= ARGS_TABSEP; - fl &= ~ARGS_DELIM; - } + if (LIST_column == n->norm->Bl.type) { + fl = ARGSFL_TABSEP; + break; + } return(args(m, line, pos, buf, fl, v)); } static enum margserr args(struct mdoc *m, int line, int *pos, - char *buf, int fl, char **v) + char *buf, enum argsflag fl, char **v) { char *p, *pp; enum margserr rc; - /* - * Parse out the terms (like `val' in `.Xx -arg val' or simply - * `.Xx val'), which can have all sorts of properties: - * - * ARGS_DELIM: use special handling if encountering trailing - * delimiters in the form of [[::delim::][ ]+]+. - * - * ARGS_NOWARN: don't post warnings. This is only used when - * re-parsing delimiters, as the warnings have already been - * posted. - * - * ARGS_TABSEP: use special handling for tab/`Ta' separated - * phrases like in `Bl -column'. - */ - assert(' ' != buf[*pos]); if ('\0' == buf[*pos]) { @@ -455,8 +443,9 @@ args(struct mdoc *m, int line, int *pos, *v = &buf[*pos]; - if (ARGS_DELIM & fl && args_checkpunct(m, buf, *pos, line, fl)) - return(ARGS_PUNCT); + if (ARGSFL_DELIM == fl) + if (args_checkpunct(buf, *pos)) + return(ARGS_PUNCT); /* * First handle TABSEP items, restricted to `Bl -column'. This @@ -465,7 +454,7 @@ args(struct mdoc *m, int line, int *pos, * for arguments at a later phase. */ - if (ARGS_TABSEP & fl) { + if (ARGSFL_TABSEP == fl) { /* Scan ahead to tab (can't be escaped). */ p = strchr(*v, '\t'); pp = NULL; @@ -504,7 +493,7 @@ args(struct mdoc *m, int line, int *pos, } /* Whitespace check for eoln case... */ - if ('\0' == *p && ' ' == *(p - 1) && ! (ARGS_NOWARN & fl)) + if ('\0' == *p && ' ' == *(p - 1)) mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE); *pos += (int)(p - *v); @@ -547,7 +536,7 @@ args(struct mdoc *m, int line, int *pos, } if ('\0' == buf[*pos]) { - if (ARGS_NOWARN & fl || MDOC_PPHRASE & m->flags) + if (MDOC_PPHRASE & m->flags) return(ARGS_QWORD); mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE); return(ARGS_QWORD); @@ -562,31 +551,14 @@ args(struct mdoc *m, int line, int *pos, while (' ' == buf[*pos]) (*pos)++; - if (0 == buf[*pos] && ! (ARGS_NOWARN & fl)) + if ('\0' == buf[*pos]) mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE); return(ARGS_QWORD); } - /* - * A non-quoted term progresses until either the end of line or - * a non-escaped whitespace. - */ - - for ( ; buf[*pos]; (*pos)++) - if (*pos && ' ' == buf[*pos] && '\\' != buf[*pos - 1]) - break; - - if ('\0' == buf[*pos]) - return(ARGS_WORD); - - buf[(*pos)++] = '\0'; - - while (' ' == buf[*pos]) - (*pos)++; - - if ('\0' == buf[*pos] && ! (ARGS_NOWARN & fl)) - mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE); + p = &buf[*pos]; + *v = mandoc_getarg(m->parse, &p, line, pos); return(ARGS_WORD); } @@ -598,7 +570,7 @@ args(struct mdoc *m, int line, int *pos, * whitespace may separate these tokens. */ static int -args_checkpunct(struct mdoc *m, const char *buf, int i, int ln, int fl) +args_checkpunct(const char *buf, int i) { int j; char dbuf[DELIMSZ]; @@ -638,9 +610,6 @@ args_checkpunct(struct mdoc *m, const char *buf, int i, int ln, int fl) i++; } - if ( ! (ARGS_NOWARN & fl) && i && ' ' == buf[i - 1]) - mdoc_pmsg(m, ln, i - 1, MANDOCERR_EOLNSPACE); - return('\0' == buf[i]); } @@ -652,40 +621,40 @@ args_checkpunct(struct mdoc *m, const char *buf, int i, int ln, int fl) static enum mdocargt argv_a2arg(enum mdoct tok, const char *p) { - const enum mdocargt *args; + const enum mdocargt *argsp; - args = NULL; + argsp = NULL; switch (tok) { case (MDOC_An): - args = args_An; + argsp = args_An; break; case (MDOC_Bd): - args = args_Bd; + argsp = args_Bd; break; case (MDOC_Bf): - args = args_Bf; + argsp = args_Bf; break; case (MDOC_Bk): - args = args_Bk; + argsp = args_Bk; break; case (MDOC_Bl): - args = args_Bl; + argsp = args_Bl; break; case (MDOC_Rv): /* FALLTHROUGH */ case (MDOC_Ex): - args = args_Ex; + argsp = args_Ex; break; default: return(MDOC_ARG_MAX); } - assert(args); + assert(argsp); - for ( ; MDOC_ARG_MAX != *args ; args++) - if (0 == strcmp(p, mdoc_argnames[*args])) - return(*args); + for ( ; MDOC_ARG_MAX != *argsp ; argsp++) + if (0 == strcmp(p, mdoc_argnames[*argsp])) + return(*argsp); return(MDOC_ARG_MAX); } @@ -700,7 +669,7 @@ argv_multi(struct mdoc *m, int line, for (v->sz = 0; ; v->sz++) { if ('-' == buf[*pos]) break; - ac = args(m, line, pos, buf, 0, &p); + ac = args(m, line, pos, buf, ARGSFL_NONE, &p); if (ARGS_ERROR == ac) return(0); else if (ARGS_EOLN == ac) @@ -726,7 +695,7 @@ argv_opt_single(struct mdoc *m, int line, if ('-' == buf[*pos]) return(1); - ac = args(m, line, pos, buf, 0, &p); + ac = args(m, line, pos, buf, ARGSFL_NONE, &p); if (ARGS_ERROR == ac) return(0); if (ARGS_EOLN == ac) @@ -752,7 +721,7 @@ argv_single(struct mdoc *m, int line, ppos = *pos; - ac = args(m, line, pos, buf, 0, &p); + ac = args(m, line, pos, buf, ARGSFL_NONE, &p); if (ARGS_EOLN == ac) { mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTARGVCOUNT); return(0); diff --git a/contrib/mdocml/mdoc_html.c b/contrib/mdocml/mdoc_html.c index 49782a39f9..57ebc34804 100644 --- a/contrib/mdocml/mdoc_html.c +++ b/contrib/mdocml/mdoc_html.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_html.c,v 1.162 2011/04/04 16:48:18 kristaps Exp $ */ +/* $Id: mdoc_html.c,v 1.169 2011/05/17 11:38:18 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * @@ -288,7 +288,7 @@ a2width(const char *p, struct roffsu *su) if ( ! a2roffsu(p, su, SCALE_MAX)) { su->unit = SCALE_BU; - su->scale = (int)strlen(p); + su->scale = html_strlen(p); } } @@ -355,7 +355,7 @@ a2offs(const char *p, struct roffsu *su) SCALE_HS_INIT(su, INDENT * 2); else if ( ! a2roffsu(p, su, SCALE_MAX)) { su->unit = SCALE_BU; - su->scale = (int)strlen(p); + su->scale = html_strlen(p); } } @@ -382,13 +382,10 @@ print_mdoc_head(MDOC_ARGS) print_gen_head(h); bufinit(h); - buffmt(h, "%s(%s)", m->title, m->msec); + bufcat_fmt(h, "%s(%s)", m->title, m->msec); - if (m->arch) { - bufcat(h, " ("); - bufcat(h, m->arch); - bufcat(h, ")"); - } + if (m->arch) + bufcat_fmt(h, " (%s)", m->arch); print_otag(h, TAG_TITLE, 0, NULL); print_text(h, h->buf); @@ -415,7 +412,6 @@ print_mdoc_node(MDOC_ARGS) child = 1; t = h->tags.head; - bufinit(h); switch (n->type) { case (MDOC_ROOT): child = mdoc_root_pre(m, n, h); @@ -484,7 +480,6 @@ print_mdoc_node(MDOC_ARGS) print_stagq(h, t); - bufinit(h); switch (n->type) { case (MDOC_ROOT): mdoc_root_post(m, n, h); @@ -606,7 +601,6 @@ static int mdoc_sh_pre(MDOC_ARGS) { struct htmlpair tag; - char buf[BUFSIZ]; if (MDOC_BLOCK == n->type) { PAIR_CLASS_INIT(&tag, "section"); @@ -615,14 +609,14 @@ mdoc_sh_pre(MDOC_ARGS) } else if (MDOC_BODY == n->type) return(1); - buf[0] = '\0'; + bufinit(h); for (n = n->child; n; n = n->next) { - html_idcat(buf, n->string, BUFSIZ); + bufcat_id(h, n->string); if (n->next) - html_idcat(buf, " ", BUFSIZ); + bufcat_id(h, " "); } - PAIR_ID_INIT(&tag, buf); + PAIR_ID_INIT(&tag, h->buf); print_otag(h, TAG_H1, 1, &tag); return(1); } @@ -633,7 +627,6 @@ static int mdoc_ss_pre(MDOC_ARGS) { struct htmlpair tag; - char buf[BUFSIZ]; if (MDOC_BLOCK == n->type) { PAIR_CLASS_INIT(&tag, "subsection"); @@ -642,14 +635,14 @@ mdoc_ss_pre(MDOC_ARGS) } else if (MDOC_BODY == n->type) return(1); - buf[0] = '\0'; + bufinit(h); for (n = n->child; n; n = n->next) { - html_idcat(buf, n->string, BUFSIZ); + bufcat_id(h, n->string); if (n->next) - html_idcat(buf, " ", BUFSIZ); + bufcat_id(h, " "); } - PAIR_ID_INIT(&tag, buf); + PAIR_ID_INIT(&tag, h->buf); print_otag(h, TAG_H2, 1, &tag); return(1); } @@ -703,7 +696,7 @@ mdoc_nm_pre(MDOC_ARGS) { struct htmlpair tag; struct roffsu su; - size_t len; + int len; switch (n->type) { case (MDOC_ELEM): @@ -731,12 +724,13 @@ mdoc_nm_pre(MDOC_ARGS) for (len = 0, n = n->child; n; n = n->next) if (MDOC_TEXT == n->type) - len += strlen(n->string); + len += html_strlen(n->string); if (0 == len && m->name) - len = strlen(m->name); + len = html_strlen(m->name); SCALE_HS_INIT(&su, (double)len); + bufinit(h); bufcat_su(h, "width", &su); PAIR_STYLE_INIT(&tag, h); print_otag(h, TAG_COL, 1, &tag); @@ -899,6 +893,8 @@ mdoc_it_pre(MDOC_ARGS) assert(lists[type]); PAIR_CLASS_INIT(&tag[0], lists[type]); + bufinit(h); + if (MDOC_HEAD == n->type) { switch (type) { case(LIST_bullet): @@ -999,6 +995,8 @@ mdoc_bl_pre(MDOC_ARGS) struct roffsu su; char buf[BUFSIZ]; + bufinit(h); + if (MDOC_BODY == n->type) { if (LIST_column == n->norm->Bl.type) print_otag(h, TAG_TBODY, 0, NULL); @@ -1018,7 +1016,6 @@ mdoc_bl_pre(MDOC_ARGS) for (i = 0; i < (int)n->norm->Bl.ncols; i++) { a2width(n->norm->Bl.cols[i], &su); - bufinit(h); if (i < (int)n->norm->Bl.ncols - 1) bufcat_su(h, "width", &su); else @@ -1147,6 +1144,7 @@ mdoc_d1_pre(MDOC_ARGS) return(1); SCALE_VS_INIT(&su, 0); + bufinit(h); bufcat_su(h, "margin-top", &su); bufcat_su(h, "margin-bottom", &su); PAIR_STYLE_INIT(&tag[0], h); @@ -1171,17 +1169,17 @@ static int mdoc_sx_pre(MDOC_ARGS) { struct htmlpair tag[2]; - char buf[BUFSIZ]; - strlcpy(buf, "#", BUFSIZ); + bufinit(h); + bufcat(h, "#x"); for (n = n->child; n; n = n->next) { - html_idcat(buf, n->string, BUFSIZ); + bufcat_id(h, n->string); if (n->next) - html_idcat(buf, " ", BUFSIZ); + bufcat_id(h, " "); } PAIR_CLASS_INIT(&tag[0], "link-sec"); - PAIR_HREF_INIT(&tag[1], buf); + PAIR_HREF_INIT(&tag[1], h->buf); print_otag(h, TAG_I, 1, tag); print_otag(h, TAG_A, 2, tag); @@ -1219,7 +1217,8 @@ mdoc_bd_pre(MDOC_ARGS) SCALE_HS_INIT(&su, 0); if (n->norm->Bd.offs) a2offs(n->norm->Bd.offs, &su); - + + bufinit(h); bufcat_su(h, "margin-left", &su); PAIR_STYLE_INIT(&tag[0], h); @@ -1438,7 +1437,6 @@ mdoc_fd_pre(MDOC_ARGS) buf[sz - 1] = '\0'; PAIR_CLASS_INIT(&tag[0], "link-includes"); - bufinit(h); i = 1; if (h->base_includes) { @@ -1558,9 +1556,10 @@ mdoc_fn_pre(MDOC_ARGS) h->flags |= HTML_NOSPACE; print_text(h, "("); + h->flags |= HTML_NOSPACE; - bufinit(h); PAIR_CLASS_INIT(&tag[0], "farg"); + bufinit(h); bufcat_style(h, "white-space", "nowrap"); PAIR_STYLE_INIT(&tag[1], h); @@ -1639,6 +1638,7 @@ mdoc_sp_pre(MDOC_ARGS) } else su.scale = 0; + bufinit(h); bufcat_su(h, "height", &su); PAIR_STYLE_INIT(&tag, h); print_otag(h, TAG_DIV, 1, &tag); @@ -1775,10 +1775,8 @@ mdoc_in_pre(MDOC_ARGS) assert(MDOC_TEXT == n->type); PAIR_CLASS_INIT(&tag[0], "link-includes"); - bufinit(h); i = 1; - if (h->base_includes) { buffmt_includes(h, n->string); PAIR_HREF_INIT(&tag[i], h->buf); @@ -1917,6 +1915,7 @@ mdoc_bf_pre(MDOC_ARGS) * We want this to be inline-formatted, but needs to be div to * accept block children. */ + bufinit(h); bufcat_style(h, "display", "inline"); SCALE_HS_INIT(&su, 1); /* Needs a left-margin for spacing. */ diff --git a/contrib/mdocml/mdoc_macro.c b/contrib/mdocml/mdoc_macro.c index b334b4e402..03d1b91cb7 100644 --- a/contrib/mdocml/mdoc_macro.c +++ b/contrib/mdocml/mdoc_macro.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_macro.c,v 1.106 2011/03/22 14:33:05 kristaps Exp $ */ +/* $Id: mdoc_macro.c,v 1.109 2011/04/30 10:18:24 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons * Copyright (c) 2010 Ingo Schwarze @@ -600,7 +600,19 @@ dword(struct mdoc *m, int line, if (DELIM_OPEN == d) m->last->flags |= MDOC_DELIMO; - else if (DELIM_CLOSE == d) + + /* + * Closing delimiters only suppress the preceding space + * when they follow something, not when they start a new + * block or element, and not when they follow `No'. + * + * XXX Explicitly special-casing MDOC_No here feels + * like a layering violation. Find a better way + * and solve this in the code related to `No'! + */ + + else if (DELIM_CLOSE == d && m->last->prev && + m->last->prev->tok != MDOC_No) m->last->flags |= MDOC_DELIMC; return(1); @@ -618,7 +630,7 @@ append_delims(struct mdoc *m, int line, int *pos, char *buf) for (;;) { la = *pos; - ac = mdoc_zargs(m, line, pos, buf, ARGS_NOWARN, &p); + ac = mdoc_zargs(m, line, pos, buf, &p); if (ARGS_ERROR == ac) return(0); @@ -631,12 +643,12 @@ append_delims(struct mdoc *m, int line, int *pos, char *buf) * If we encounter end-of-sentence symbols, then trigger * the double-space. * - * XXX: it's easy to allow this to propogate outward to + * XXX: it's easy to allow this to propagate outward to * the last symbol, such that `. )' will cause the * correct double-spacing. However, (1) groff isn't * smart enough to do this and (2) it would require * knowing which symbols break this behaviour, for - * example, `. ;' shouldn't propogate the double-space. + * example, `. ;' shouldn't propagate the double-space. */ if (mandoc_eos(p, strlen(p), 0)) m->last->flags |= MDOC_EOS; @@ -995,7 +1007,7 @@ blk_full(MACRO_PROT_ARGS) } /* - * This routine accomodates implicitly- and explicitly-scoped + * This routine accommodates implicitly- and explicitly-scoped * macro openings. Implicit ones first close out prior scope * (seen above). Delay opening the head until necessary to * allow leading punctuation to print. Special consideration @@ -1292,7 +1304,7 @@ blk_part_imp(MACRO_PROT_ARGS) if (mandoc_eos(n->string, strlen(n->string), 1)) n->flags |= MDOC_EOS; - /* Up-propogate the end-of-space flag. */ + /* Up-propagate the end-of-space flag. */ if (n && (MDOC_EOS & n->flags)) { body->flags |= MDOC_EOS; @@ -1717,7 +1729,7 @@ phrase(struct mdoc *m, int line, int ppos, char *buf) for (pos = ppos; ; ) { la = pos; - ac = mdoc_zargs(m, line, &pos, buf, 0, &p); + ac = mdoc_zargs(m, line, &pos, buf, &p); if (ARGS_ERROR == ac) return(0); @@ -1762,7 +1774,7 @@ phrase_ta(MACRO_PROT_ARGS) for (;;) { la = *pos; - ac = mdoc_zargs(m, line, pos, buf, 0, &p); + ac = mdoc_zargs(m, line, pos, buf, &p); if (ARGS_ERROR == ac) return(0); diff --git a/contrib/mdocml/mdoc_term.c b/contrib/mdocml/mdoc_term.c index 47c212489d..1a5ce4c214 100644 --- a/contrib/mdocml/mdoc_term.c +++ b/contrib/mdocml/mdoc_term.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_term.c,v 1.226 2011/04/04 16:27:03 kristaps Exp $ */ +/* $Id: mdoc_term.c,v 1.230 2011/05/17 14:38:34 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010 Ingo Schwarze @@ -264,14 +264,7 @@ terminal_mdoc(void *arg, const struct mdoc *mdoc) p->tabwidth = term_len(p, 5); if (NULL == p->symtab) - switch (p->enc) { - case (TERMENC_ASCII): - p->symtab = chars_init(CHARS_ASCII); - break; - default: - abort(); - /* NOTREACHED */ - } + p->symtab = mchars_alloc(); n = mdoc_node(mdoc); m = mdoc_meta(mdoc); diff --git a/contrib/mdocml/mdoc_validate.c b/contrib/mdocml/mdoc_validate.c index 707864441c..a34a221d69 100644 --- a/contrib/mdocml/mdoc_validate.c +++ b/contrib/mdocml/mdoc_validate.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_validate.c,v 1.166 2011/04/03 09:53:50 kristaps Exp $ */ +/* $Id: mdoc_validate.c,v 1.169 2011/04/30 10:18:24 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010, 2011 Ingo Schwarze @@ -155,9 +155,9 @@ static v_post posts_notext[] = { ewarn_eq0, NULL }; static v_post posts_ns[] = { post_ns, NULL }; static v_post posts_os[] = { post_os, post_prol, NULL }; static v_post posts_rs[] = { post_rs, NULL }; -static v_post posts_sh[] = { post_ignpar, hwarn_ge1, bwarn_ge1, post_sh, NULL }; +static v_post posts_sh[] = { post_ignpar, hwarn_ge1, post_sh, NULL }; static v_post posts_sp[] = { ewarn_le1, NULL }; -static v_post posts_ss[] = { post_ignpar, hwarn_ge1, bwarn_ge1, NULL }; +static v_post posts_ss[] = { post_ignpar, hwarn_ge1, NULL }; static v_post posts_st[] = { post_st, NULL }; static v_post posts_std[] = { post_std, NULL }; static v_post posts_text[] = { ewarn_ge1, NULL }; @@ -545,31 +545,39 @@ check_argv(struct mdoc *m, struct mdoc_node *n, struct mdoc_argv *v) static void check_text(struct mdoc *m, int ln, int pos, char *p) { - int c; + char *cpp, *pp; size_t sz; - for ( ; *p; p++, pos++) { + while ('\0' != *p) { sz = strcspn(p, "\t\\"); - p += (int)sz; - - if ('\0' == *p) - break; + p += (int)sz; pos += (int)sz; if ('\t' == *p) { if ( ! (MDOC_LITERAL & m->flags)) mdoc_pmsg(m, ln, pos, MANDOCERR_BADTAB); + p++; + pos++; continue; - } + } else if ('\0' == *p) + break; + + pos++; + pp = ++p; - if (0 == (c = mandoc_special(p))) { + if (ESCAPE_ERROR == mandoc_escape + ((const char **)&pp, NULL, NULL)) { mdoc_pmsg(m, ln, pos, MANDOCERR_BADESCAPE); - continue; + break; } - p += c - 1; - pos += c - 1; + cpp = p; + while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp))) + *cpp = '-'; + + pos += pp - p; + p = pp; } } @@ -1527,7 +1535,7 @@ post_bl_head(POST_ARGS) assert(0 == np->args->argv[j].sz); /* - * Accomodate for new-style groff column syntax. Shuffle the + * Accommodate for new-style groff column syntax. Shuffle the * child nodes, all of which must be TEXT, as arguments for the * column field. Then, delete the head children. */ diff --git a/contrib/mdocml/out.c b/contrib/mdocml/out.c index eb303d5194..225d4639d8 100644 --- a/contrib/mdocml/out.c +++ b/contrib/mdocml/out.c @@ -1,4 +1,4 @@ -/* $Id: out.c,v 1.39 2011/03/17 08:49:34 kristaps Exp $ */ +/* $Id: out.c,v 1.40 2011/04/09 15:29:40 kristaps Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2011 Ingo Schwarze @@ -174,243 +174,6 @@ time2a(time_t t, char *dst, size_t sz) (void)strftime(p, sz, "%Y", &tm); } - -int -a2roffdeco(enum roffdeco *d, const char **word, size_t *sz) -{ - int i, j, lim; - char term, c; - const char *wp; - enum roffdeco dd; - - *d = DECO_NONE; - lim = i = 0; - term = '\0'; - wp = *word; - - switch ((c = wp[i++])) { - case ('('): - *d = DECO_SPECIAL; - lim = 2; - break; - case ('F'): - /* FALLTHROUGH */ - case ('f'): - *d = 'F' == c ? DECO_FFONT : DECO_FONT; - - switch (wp[i++]) { - case ('('): - lim = 2; - break; - case ('['): - term = ']'; - break; - case ('3'): - /* FALLTHROUGH */ - case ('B'): - *d = DECO_BOLD; - return(i); - case ('2'): - /* FALLTHROUGH */ - case ('I'): - *d = DECO_ITALIC; - return(i); - case ('P'): - *d = DECO_PREVIOUS; - return(i); - case ('1'): - /* FALLTHROUGH */ - case ('R'): - *d = DECO_ROMAN; - return(i); - default: - i--; - lim = 1; - break; - } - break; - case ('k'): - /* FALLTHROUGH */ - case ('M'): - /* FALLTHROUGH */ - case ('m'): - /* FALLTHROUGH */ - case ('*'): - if ('*' == c) - *d = DECO_RESERVED; - - switch (wp[i++]) { - case ('('): - lim = 2; - break; - case ('['): - term = ']'; - break; - default: - i--; - lim = 1; - break; - } - break; - - case ('N'): - - /* - * Sequence of characters: backslash, 'N' (i = 0), - * starting delimiter (i = 1), character number (i = 2). - */ - - *word = wp + 2; - *sz = 0; - - /* - * Cannot use a digit as a starting delimiter; - * but skip the digit anyway. - */ - - if (isdigit((int)wp[1])) - return(2); - - /* - * Any non-digit terminates the character number. - * That is, the terminating delimiter need not - * match the starting delimiter. - */ - - for (i = 2; isdigit((int)wp[i]); i++) - (*sz)++; - - /* - * This is only a numbered character - * if the character number has at least one digit. - */ - - if (*sz) - *d = DECO_NUMBERED; - - /* - * Skip the terminating delimiter, even if it does not - * match, and even if there is no character number. - */ - - return(++i); - - case ('h'): - /* FALLTHROUGH */ - case ('v'): - /* FALLTHROUGH */ - case ('s'): - j = 0; - if ('+' == wp[i] || '-' == wp[i]) { - i++; - j = 1; - } - - switch (wp[i++]) { - case ('('): - lim = 2; - break; - case ('['): - term = ']'; - break; - case ('\''): - term = '\''; - break; - case ('0'): - j = 1; - /* FALLTHROUGH */ - default: - i--; - lim = 1; - break; - } - - if ('+' == wp[i] || '-' == wp[i]) { - if (j) - return(i); - i++; - } - - /* Handle embedded numerical subexp or escape. */ - - if ('(' == wp[i]) { - while (wp[i] && ')' != wp[i]) - if ('\\' == wp[i++]) { - /* Handle embedded escape. */ - *word = &wp[i]; - i += a2roffdeco(&dd, word, sz); - } - - if (')' == wp[i++]) - break; - - *d = DECO_NONE; - return(i - 1); - } else if ('\\' == wp[i]) { - *word = &wp[++i]; - i += a2roffdeco(&dd, word, sz); - } - - break; - case ('['): - *d = DECO_SPECIAL; - term = ']'; - break; - case ('c'): - *d = DECO_NOSPACE; - return(i); - case ('z'): - *d = DECO_NONE; - if ('\\' == wp[i]) { - *word = &wp[++i]; - return(i + a2roffdeco(&dd, word, sz)); - } else - lim = 1; - break; - case ('o'): - /* FALLTHROUGH */ - case ('w'): - if ('\'' == wp[i++]) { - term = '\''; - break; - } - /* FALLTHROUGH */ - default: - *d = DECO_SSPECIAL; - i--; - lim = 1; - break; - } - - assert(term || lim); - *word = &wp[i]; - - if (term) { - j = i; - while (wp[i] && wp[i] != term) - i++; - if ('\0' == wp[i]) { - *d = DECO_NONE; - return(i); - } - - assert(i >= j); - *sz = (size_t)(i - j); - - return(i + 1); - } - - assert(lim > 0); - *sz = (size_t)lim; - - for (j = 0; wp[i] && j < lim; j++) - i++; - if (j < lim) - *d = DECO_NONE; - - return(i); -} - /* * Calculate the abstract widths and decimal positions of columns in a * table. This routine allocates the columns structures then runs over diff --git a/contrib/mdocml/out.h b/contrib/mdocml/out.h index 9bf5357b30..63f10c28ff 100644 --- a/contrib/mdocml/out.h +++ b/contrib/mdocml/out.h @@ -1,4 +1,4 @@ -/* $Id: out.h,v 1.18 2011/03/22 10:13:01 kristaps Exp $ */ +/* $Id: out.h,v 1.20 2011/04/29 22:18:12 kristaps Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons * @@ -31,27 +31,6 @@ enum roffscale { SCALE_MAX }; -enum roffdeco { - DECO_NONE, - DECO_NUMBERED, /* numbered character */ - DECO_SPECIAL, /* special character */ - DECO_SSPECIAL, /* single-char special */ - DECO_RESERVED, /* reserved word */ - DECO_BOLD, /* bold font */ - DECO_ITALIC, /* italic font */ - DECO_ROMAN, /* "normal" undecorated font */ - DECO_PREVIOUS, /* revert to previous font */ - DECO_NOSPACE, /* suppress spacing */ - DECO_FONT, /* font */ - DECO_FFONT, /* font family */ - DECO_MAX -}; - -enum chars { - CHARS_ASCII, /* 7-bit ascii representation */ - CHARS_HTML /* unicode values */ -}; - struct roffcol { size_t width; /* width of cell */ size_t decimal; /* decimal position in cell */ @@ -85,18 +64,9 @@ __BEGIN_DECLS while (/* CONSTCOND */ 0) int a2roffsu(const char *, struct roffsu *, enum roffscale); -int a2roffdeco(enum roffdeco *, const char **, size_t *); void time2a(time_t, char *, size_t); void tblcalc(struct rofftbl *tbl, const struct tbl_span *); -void *chars_init(enum chars); -const char *chars_num2char(const char *, size_t); -const char *chars_spec2str(void *, const char *, size_t, size_t *); -int chars_spec2cp(void *, const char *, size_t); -const char *chars_res2str(void *, const char *, size_t, size_t *); -int chars_res2cp(void *, const char *, size_t); -void chars_free(void *); - __END_DECLS #endif /*!OUT_H*/ diff --git a/contrib/mdocml/preconv.1 b/contrib/mdocml/preconv.1 new file mode 100644 index 0000000000..96fcaeb12e --- /dev/null +++ b/contrib/mdocml/preconv.1 @@ -0,0 +1,161 @@ +.\" $Id: preconv.1,v 1.4 2011/05/26 14:45:04 kristaps Exp $ +.\" +.\" Copyright (c) 2011 Kristaps Dzonsons +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: May 26 2011 $ +.Dt PRECONV 1 +.Os +.Sh NAME +.Nm preconv +.Nd recode multibyte UNIX manuals +.Sh SYNOPSIS +.Nm preconv +.Op Fl D Ar enc +.Op Fl e Ar enc +.Op Ar file +.Sh DESCRIPTION +The +.Nm +utility recodes multibyte +.Ux +manual files into +.Xr mandoc 1 +.Po +or other troff system supporting the +.Sq \e[uNNNN] +escape sequence +.Pc +input. +Its arguments are as follows: +.Bl -tag -width Ds +.It Fl D Ar enc +The default encoding. +.It Fl e Ar enc +The document's encoding. +.It Ar file +The input file. +.El +.Pp +If +.Ar file +is not provided, +.Nm +accepts standard input. +See +.Sx Algorithm +for encoding choice. +.Pp +The recoded input is written to standard output: Unicode characters in +the ASCII range are printed as regular ASCII characters, while those +above this range are printed using the +.Sq \e[uNNNN] +format documented in +.Xr mandoc_char 7 . +.Pp +If input bytes are improperly formed in the current encoding, they're +passed unmodified to standard output. +For some encodings, such as UTF-8, unrecoverable input sequences will +cause +.Nm +to stop processing and exit. +.Ss Algorithm +An encoding is chosen according to the following steps: +.Bl -enum +.It +From the argument passed to +.Fl e Ar enc . +.It +If a BOM exists, UTF\-8 encoding is selected. +.It +From the coding tags parsed from +.Qq File Variables +on the first two lines of input. +A file variable is an input line of the form +.Pp +.Dl \%.\e\(dq -*- key: val [; key: val ]* -*- +.Pp +A coding tag variable is where +.Cm key +is +.Qq coding +and +.Cm val +is the name of the encoding. +A typical file variable with a coding tag is +.Pp +.Dl \%.\e\(dq -*- mode: troff; coding: utf-8 -*- +.It +From the argument passed to +.Fl D Ar enc . +.It +If all else fails, Latin\-1 is used. +.El +.Pp +The +.Nm +utility recognises the UTF\-8, us\-ascii, and latin\-1 encodings as +passed to the +.Fl e +and +.Fl D +arguments, or as coding tags. +Encodings are matched case-insensitively. +.\" .Sh IMPLEMENTATION NOTES +.\" Not used in OpenBSD. +.\" .Sh RETURN VALUES +.\" For sections 2, 3, & 9 only. +.\" .Sh ENVIRONMENT +.\" For sections 1, 6, 7, & 8 only. +.\" .Sh FILES +.Sh EXIT STATUS +.Ex -std +.Sh EXAMPLES +Explicitly page a UTF\-8 manual +.Pa foo.1 +in the current locale: +.Pp +.Dl $ preconv \-e utf\-8 foo.1 | mandoc -Tlocale | less +.\" .Sh DIAGNOSTICS +.\" For sections 1, 4, 6, 7, & 8 only. +.\" .Sh ERRORS +.\" For sections 2, 3, & 9 only. +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr mandoc_char 7 +.Sh STANDARDS +The +.Nm +utility references the US-ASCII character set standard, ANSI_X3.4\-1968; +the Latin\-1 character set standard, ISO/IEC 8859\-1:1998; the UTF\-8 +character set standard; and UCS (Unicode), ISO/IEC 10646. +.Sh HISTORY +The +.Nm +utility first appeared in the GNU troff +.Pq Dq groff +system in December 2005, authored by Tomohiro Kubota and Werner +Lemberg. +The implementation that is part of the +.Xr mandoc 1 +utility appeared in May 2011. +.Sh AUTHORS +The +.Nm +utility was written by +.An Kristaps Dzonsons Aq kristaps@bsd.lv . +.\" .Sh CAVEATS +.\" .Sh BUGS +.\" .Sh SECURITY CONSIDERATIONS +.\" Not used in OpenBSD. diff --git a/contrib/mdocml/preconv.c b/contrib/mdocml/preconv.c new file mode 100644 index 0000000000..a53504ece9 --- /dev/null +++ b/contrib/mdocml/preconv.c @@ -0,0 +1,522 @@ +/* $Id: preconv.c,v 1.4 2011/05/26 21:13:07 kristaps Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include +#include +#include +#include +#include +#include + +/* + * The read_whole_file() and resize_buf() functions are copied from + * read.c, including all dependency code (MAP_FILE, etc.). + */ + +#ifndef MAP_FILE +#define MAP_FILE 0 +#endif + +enum enc { + ENC_UTF_8, /* UTF-8 */ + ENC_US_ASCII, /* US-ASCII */ + ENC_LATIN_1, /* Latin-1 */ + ENC__MAX +}; + +struct buf { + char *buf; /* binary input buffer */ + size_t sz; /* size of binary buffer */ + size_t offs; /* starting buffer offset */ +}; + +struct encode { + const char *name; + int (*conv)(const struct buf *); +}; + +static int cue_enc(const struct buf *, size_t *, enum enc *); +static int conv_latin_1(const struct buf *); +static int conv_us_ascii(const struct buf *); +static int conv_utf_8(const struct buf *); +static int read_whole_file(const char *, int, + struct buf *, int *); +static void resize_buf(struct buf *, size_t); +static void usage(void); + +static const struct encode encs[ENC__MAX] = { + { "utf-8", conv_utf_8 }, /* ENC_UTF_8 */ + { "us-ascii", conv_us_ascii }, /* ENC_US_ASCII */ + { "latin-1", conv_latin_1 }, /* ENC_LATIN_1 */ +}; + +static const char *progname; + +static void +usage(void) +{ + + fprintf(stderr, "usage: %s " + "[-D enc] " + "[-e ENC] " + "[file]\n", progname); +} + +static int +conv_latin_1(const struct buf *b) +{ + size_t i; + unsigned char cu; + const char *cp; + + cp = b->buf + (int)b->offs; + + /* + * Latin-1 falls into the first 256 code-points of Unicode, so + * there's no need for any sort of translation. Just make the + * 8-bit characters use the Unicode escape. + * Note that binary values 128 < v < 160 are passed through + * unmodified to mandoc. + */ + + for (i = b->offs; i < b->sz; i++) { + cu = (unsigned char)*cp++; + cu < 160U ? putchar(cu) : printf("\\[u%.4X]", cu); + } + + return(1); +} + +static int +conv_us_ascii(const struct buf *b) +{ + + /* + * US-ASCII has no conversion since it falls into the first 128 + * bytes of Unicode. + */ + + fwrite(b->buf, 1, b->sz, stdout); + return(1); +} + +static int +conv_utf_8(const struct buf *b) +{ + int state, be; + unsigned int accum; + size_t i; + unsigned char cu; + const char *cp; + const long one = 1L; + + cp = b->buf + (int)b->offs; + state = 0; + accum = 0U; + be = 0; + + /* Quick test for big-endian value. */ + + if ( ! (*((const char *)(&one)))) + be = 1; + + for (i = b->offs; i < b->sz; i++) { + cu = (unsigned char)*cp++; + if (state) { + if ( ! (cu & 128) || (cu & 64)) { + /* Bad sequence header. */ + return(0); + } + + /* Accept only legitimate bit patterns. */ + + if (cu > 191 || cu < 128) { + /* Bad in-sequence bits. */ + return(0); + } + + accum |= (cu & 63) << --state * 6; + + /* + * Accum is held in little-endian order as + * stipulated by the UTF-8 sequence coding. We + * need to convert to a native big-endian if our + * architecture requires it. + */ + + if (0 == state && be) + accum = (accum >> 24) | + ((accum << 8) & 0x00FF0000) | + ((accum >> 8) & 0x0000FF00) | + (accum << 24); + + if (0 == state) { + accum < 128U ? putchar(accum) : + printf("\\[u%.4X]", accum); + accum = 0U; + } + } else if (cu & (1 << 7)) { + /* + * Entering a UTF-8 state: if we encounter a + * UTF-8 bitmask, calculate the expected UTF-8 + * state from it. + */ + for (state = 0; state < 7; state++) + if ( ! (cu & (1 << (7 - state)))) + break; + + /* Accept only legitimate bit patterns. */ + + switch (state) { + case (4): + if (cu <= 244 && cu >= 240) { + accum = (cu & 7) << 18; + break; + } + /* Bad 4-sequence start bits. */ + return(0); + case (3): + if (cu <= 239 && cu >= 224) { + accum = (cu & 15) << 12; + break; + } + /* Bad 3-sequence start bits. */ + return(0); + case (2): + if (cu <= 223 && cu >= 194) { + accum = (cu & 31) << 6; + break; + } + /* Bad 2-sequence start bits. */ + return(0); + default: + /* Bad sequence bit mask. */ + return(0); + } + state--; + } else + putchar(cu); + } + + if (0 != state) { + /* Bad trailing bits. */ + return(0); + } + + return(1); +} + +static void +resize_buf(struct buf *buf, size_t initial) +{ + + buf->sz = buf->sz > initial / 2 ? + 2 * buf->sz : initial; + + buf->buf = realloc(buf->buf, buf->sz); + if (NULL == buf->buf) { + perror(NULL); + exit(EXIT_FAILURE); + } +} + +static int +read_whole_file(const char *f, int fd, + struct buf *fb, int *with_mmap) +{ + struct stat st; + size_t off; + ssize_t ssz; + + if (-1 == fstat(fd, &st)) { + perror(f); + return(0); + } + + /* + * If we're a regular file, try just reading in the whole entry + * via mmap(). This is faster than reading it into blocks, and + * since each file is only a few bytes to begin with, I'm not + * concerned that this is going to tank any machines. + */ + + if (S_ISREG(st.st_mode) && st.st_size >= (1U << 31)) { + fprintf(stderr, "%s: input too large\n", f); + return(0); + } + + if (S_ISREG(st.st_mode)) { + *with_mmap = 1; + fb->sz = (size_t)st.st_size; + fb->buf = mmap(NULL, fb->sz, PROT_READ, + MAP_FILE|MAP_SHARED, fd, 0); + if (fb->buf != MAP_FAILED) + return(1); + } + + /* + * If this isn't a regular file (like, say, stdin), then we must + * go the old way and just read things in bit by bit. + */ + + *with_mmap = 0; + off = 0; + fb->sz = 0; + fb->buf = NULL; + for (;;) { + if (off == fb->sz && fb->sz == (1U << 31)) { + fprintf(stderr, "%s: input too large\n", f); + break; + } + + if (off == fb->sz) + resize_buf(fb, 65536); + + ssz = read(fd, fb->buf + (int)off, fb->sz - off); + if (ssz == 0) { + fb->sz = off; + return(1); + } + if (ssz == -1) { + perror(f); + break; + } + off += (size_t)ssz; + } + + free(fb->buf); + fb->buf = NULL; + return(0); +} + +static int +cue_enc(const struct buf *b, size_t *offs, enum enc *enc) +{ + const char *ln, *eoln, *eoph; + size_t sz, phsz, nsz; + int i; + + ln = b->buf + (int)*offs; + sz = b->sz - *offs; + + /* Look for the end-of-line. */ + + if (NULL == (eoln = memchr(ln, '\n', sz))) + return(-1); + + /* Set next-line marker. */ + + *offs = (size_t)((eoln + 1) - b->buf); + + /* Check if we have the correct header/trailer. */ + + if ((sz = (size_t)(eoln - ln)) < 10 || + memcmp(ln, ".\\\" -*-", 7) || + memcmp(eoln - 3, "-*-", 3)) + return(0); + + /* Move after the header and adjust for the trailer. */ + + ln += 7; + sz -= 10; + + while (sz > 0) { + while (sz > 0 && ' ' == *ln) { + ln++; + sz--; + } + if (0 == sz) + break; + + /* Find the end-of-phrase marker (or eoln). */ + + if (NULL == (eoph = memchr(ln, ';', sz))) + eoph = eoln - 3; + else + eoph++; + + /* Only account for the "coding" phrase. */ + + if ((phsz = (size_t)(eoph - ln)) < 7 || + strncasecmp(ln, "coding:", 7)) { + sz -= phsz; + ln += phsz; + continue; + } + + sz -= 7; + ln += 7; + + while (sz > 0 && ' ' == *ln) { + ln++; + sz--; + } + if (0 == sz) + break; + + /* Check us against known encodings. */ + + for (i = 0; i < (int)ENC__MAX; i++) { + nsz = strlen(encs[i].name); + if (phsz < nsz) + continue; + if (strncasecmp(ln, encs[i].name, nsz)) + continue; + + *enc = (enum enc)i; + return(1); + } + + /* Unknown encoding. */ + + *enc = ENC__MAX; + return(1); + } + + return(0); +} + +int +main(int argc, char *argv[]) +{ + int i, ch, map, fd, rc; + struct buf b; + const char *fn; + enum enc enc, def; + unsigned char bom[3] = { 0xEF, 0xBB, 0xBF }; + size_t offs; + extern int optind; + extern char *optarg; + + progname = strrchr(argv[0], '/'); + if (progname == NULL) + progname = argv[0]; + else + ++progname; + + fn = ""; + fd = STDIN_FILENO; + rc = EXIT_FAILURE; + enc = def = ENC__MAX; + map = 0; + + memset(&b, 0, sizeof(struct buf)); + + while (-1 != (ch = getopt(argc, argv, "D:e:rdvh"))) + switch (ch) { + case ('D'): + /* FALLTHROUGH */ + case ('e'): + for (i = 0; i < (int)ENC__MAX; i++) { + if (strcasecmp(optarg, encs[i].name)) + continue; + break; + } + if (i < (int)ENC__MAX) { + if ('D' == ch) + def = (enum enc)i; + else + enc = (enum enc)i; + break; + } + + fprintf(stderr, "%s: Bad encoding\n", optarg); + return(EXIT_FAILURE); + case ('r'): + /* FALLTHROUGH */ + case ('d'): + /* FALLTHROUGH */ + case ('v'): + /* Compatibility with GNU preconv. */ + break; + case ('h'): + /* Compatibility with GNU preconv. */ + /* FALLTHROUGH */ + default: + usage(); + return(EXIT_FAILURE); + } + + argc -= optind; + argv += optind; + + /* + * Open and read the first argument on the command-line. + * If we don't have one, we default to stdin. + */ + + if (argc > 0) { + fn = *argv; + fd = open(fn, O_RDONLY, 0); + if (-1 == fd) { + perror(fn); + return(EXIT_FAILURE); + } + } + + if ( ! read_whole_file(fn, fd, &b, &map)) + goto out; + + /* Try to read the UTF-8 BOM. */ + + if (ENC__MAX == enc) + if (b.sz > 3 && 0 == memcmp(b.buf, bom, 3)) { + b.offs = 3; + enc = ENC_UTF_8; + } + + /* Try reading from the "-*-" cue. */ + + if (ENC__MAX == enc) { + offs = b.offs; + ch = cue_enc(&b, &offs, &enc); + if (0 == ch) + ch = cue_enc(&b, &offs, &enc); + } + + /* + * No encoding has been detected. + * Thus, we either fall into our default encoder, if specified, + * or use Latin-1 if all else fails. + */ + + if (ENC__MAX == enc) + enc = ENC__MAX == def ? ENC_LATIN_1 : def; + + if ( ! (*encs[(int)enc].conv)(&b)) { + fprintf(stderr, "%s: Bad encoding\n", fn); + goto out; + } + + rc = EXIT_SUCCESS; +out: + if (map) + munmap(b.buf, b.sz); + else + free(b.buf); + + if (fd > STDIN_FILENO) + close(fd); + + return(rc); +} diff --git a/contrib/mdocml/predefs.in b/contrib/mdocml/predefs.in new file mode 100644 index 0000000000..9c56112715 --- /dev/null +++ b/contrib/mdocml/predefs.in @@ -0,0 +1,65 @@ +/* $Id: predefs.in,v 1.2 2011/05/26 14:30:28 kristaps Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * The predefined-string translation tables. Each corresponds to a + * predefined strings from (e.g.) tmac/mdoc/doc-nroff. The left-hand + * side corresponds to the input sequence (\*x, \*(xx and so on). The + * right-hand side is what's produced by libroff. + * + * XXX - C-escape strings! + * XXX - update PREDEF_MAX in roff.c if adding more! + */ + +PREDEF("Am", "&") +PREDEF("Ba", "|") +PREDEF("Ge", "\\(>=") +PREDEF("Gt", ">") +PREDEF("If", "\\(if") +PREDEF("Le", "\\(<=") +PREDEF("Lq", "\\(lq") +PREDEF("Lt", "<") +PREDEF("Na", "NaN") +PREDEF("Ne", "\\(!=") +PREDEF("Pi", "\\(*p") +PREDEF("Pm", "\\(+-") +PREDEF("Rq", "\\(rq") +PREDEF("left-bracket", "[") +PREDEF("left-parenthesis", "(") +PREDEF("lp", "(") +PREDEF("left-singlequote", "\\(oq") +PREDEF("q", "\\(dq") +PREDEF("quote-left", "\\(oq") +PREDEF("quote-right", "\\(cq") +PREDEF("R", "\\(rg") +PREDEF("right-bracket", "]") +PREDEF("right-parenthesis", ")") +PREDEF("rp", ")") +PREDEF("right-singlequote", "\\(cq") +PREDEF("Tm", "\\(tm") +PREDEF("Px", "POSIX") +PREDEF("Ai", "ANSI") +PREDEF("\'", "\\\'") +PREDEF("aa", "\\(aa") +PREDEF("ga", "\\(ga") +PREDEF("`", "\\`") +PREDEF("lq", "\\(lq") +PREDEF("rq", "\\(rq") +PREDEF("ua", "\\(ua") +PREDEF("va", "\\(va") +PREDEF("<=", "\\(<=") +PREDEF(">=", "\\(>=") diff --git a/contrib/mdocml/read.c b/contrib/mdocml/read.c index 6c240c2bd2..da273ae183 100644 --- a/contrib/mdocml/read.c +++ b/contrib/mdocml/read.c @@ -1,4 +1,4 @@ -/* $Id: read.c,v 1.10 2011/04/03 10:11:25 kristaps Exp $ */ +/* $Id: read.c,v 1.15 2011/05/26 20:36:21 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010, 2011 Ingo Schwarze @@ -15,8 +15,14 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include -#include +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_MMAP +# include +# include +#endif #include #include @@ -138,7 +144,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { "tab in non-literal context", "end of line whitespace", "bad comment style", - "unknown escape sequence", + "bad escape sequence", "unterminated quoted string", "generic error", @@ -350,7 +356,7 @@ mparse_buf_r(struct mparse *curp, struct buf blk, int start) continue; } - if ('"' == blk.buf[i + 1]) { + if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { i += 2; /* Comment, skip to end of line */ for (; i < (int)blk.sz; ++i) { @@ -441,7 +447,7 @@ rerun: /* * If input parsers have not been allocated, do so now. - * We keep these instanced betwen parsers, but set them + * We keep these instanced between parsers, but set them * locally per parse routine since we can use different * parsers with each one. */ @@ -525,19 +531,22 @@ pdesc(struct mparse *curp, const char *file, int fd) mparse_buf_r(curp, blk, 1); +#ifdef HAVE_MMAP if (with_mmap) munmap(blk.buf, blk.sz); else +#endif free(blk.buf); } static int read_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap) { - struct stat st; size_t off; ssize_t ssz; +#ifdef HAVE_MMAP + struct stat st; if (-1 == fstat(fd, &st)) { perror(file); return(0); @@ -562,6 +571,7 @@ read_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap) if (fb->buf != MAP_FAILED) return(1); } +#endif /* * If this isn't a regular file (like, say, stdin), then we must diff --git a/contrib/mdocml/roff.7 b/contrib/mdocml/roff.7 index 8f40d96cbe..41837a1d3c 100644 --- a/contrib/mdocml/roff.7 +++ b/contrib/mdocml/roff.7 @@ -1,4 +1,4 @@ -.\" $Id: roff.7,v 1.27 2011/02/09 10:03:02 kristaps Exp $ +.\" $Id: roff.7,v 1.29 2011/05/24 15:22:14 kristaps Exp $ .\" .\" Copyright (c) 2010 Kristaps Dzonsons .\" Copyright (c) 2010 Ingo Schwarze @@ -15,7 +15,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: February 9 2011 $ +.Dd $Mdocdate: May 24 2011 $ .Dt ROFF 7 .Os .Sh NAME @@ -448,15 +448,20 @@ than having the request or macro follow as The scope of a conditional is always parsed, but only executed if the conditional evaluates to true. .Pp -Note that text following an -.Sq \&.\e} -escape sequence is discarded. -Furthermore, if an explicit closing sequence +Note that the .Sq \e} -is specified in a free-form line, the entire line is accepted within the -scope of the prior request, not only the text preceding the close, with the +is converted into a zero-width escape sequence if not passed as a +standalone macro +.Sq \&.\e} . +For example, +.Pp +.D1 \&.Fl a \e} b +.Pp +will result in .Sq \e} -collapsing into a zero-width space. +being considered an argument of the +.Sq \&Fl +macro. .Ss \&ig Ignore input. Its syntax can be either @@ -567,7 +572,7 @@ The will be read and its contents processed as input in place of the .Sq \&.so request line. -To avoid inadvertant inclusion of unrelated files, +To avoid inadvertent inclusion of unrelated files, .Xr mandoc 1 only accepts relative paths not containing the strings .Qq ../ diff --git a/contrib/mdocml/roff.c b/contrib/mdocml/roff.c index 3aa3972b6d..92a4a9b372 100644 --- a/contrib/mdocml/roff.c +++ b/contrib/mdocml/roff.c @@ -1,4 +1,4 @@ -/* $Id: roff.c,v 1.130 2011/03/29 09:00:48 kristaps Exp $ */ +/* $Id: roff.c,v 1.142 2011/05/26 11:58:25 kristaps Exp $ */ /* * Copyright (c) 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010, 2011 Ingo Schwarze @@ -20,17 +20,15 @@ #endif #include -#include #include -#include #include #include -#include #include "mandoc.h" #include "libroff.h" #include "libmandoc.h" +/* Maximum number of nested if-else conditionals. */ #define RSTACK_MAX 128 enum rofft { @@ -63,7 +61,7 @@ enum rofft { ROFF_EQ, ROFF_EN, ROFF_cblock, - ROFF_ccond, /* FIXME: remove this. */ + ROFF_ccond, ROFF_USERDEF, ROFF_MAX }; @@ -127,6 +125,14 @@ struct roffmac { struct roffmac *next; }; +struct predef { + const char *name; /* predefined input name */ + const char *str; /* replacement symbol */ +}; + +#define PREDEF(__name, __str) \ + { (__name), (__str) }, + static enum rofferr roff_block(ROFF_ARGS); static enum rofferr roff_block_text(ROFF_ARGS); static enum rofferr roff_block_sub(ROFF_ARGS); @@ -144,7 +150,7 @@ static const char *roff_getstrn(const struct roff *, static enum rofferr roff_line_ignore(ROFF_ARGS); static enum rofferr roff_nr(ROFF_ARGS); static int roff_res(struct roff *, - char **, size_t *, int); + char **, size_t *, int, int); static enum rofferr roff_rm(ROFF_ARGS); static void roff_setstr(struct roff *, const char *, const char *, int); @@ -198,6 +204,12 @@ static struct roffmac roffs[ROFF_MAX] = { { NULL, roff_userdef, NULL, NULL, 0, NULL }, }; +/* Array of injected predefined strings. */ +#define PREDEFS_MAX 38 +static const struct predef predefs[PREDEFS_MAX] = { +#include "predefs.in" +}; + static void roff_free1(struct roff *); static enum rofft roff_hash_find(const char *, size_t); static void roff_hash_init(void); @@ -206,7 +218,6 @@ static void roffnode_push(struct roff *, enum rofft, const char *, int, int); static void roffnode_pop(struct roff *); static enum rofft roff_parse(struct roff *, const char *, int *); -static int roff_parse_nat(const char *, unsigned int *); /* See roff_hash_find() */ #define ROFF_HASH(p) (p[0] - ASCII_LO) @@ -232,7 +243,6 @@ roff_hash_init(void) } } - /* * Look up a roff token by its name. Returns ROFF_MAX if no macro by * the nil-terminated string name could be found. @@ -277,10 +287,6 @@ roffnode_pop(struct roff *r) assert(r->last); p = r->last; - if (ROFF_el == p->tok) - if (r->rstackpos > -1) - r->rstackpos--; - r->last = r->last->parent; free(p->name); free(p->end); @@ -359,6 +365,7 @@ struct roff * roff_alloc(struct regset *regs, struct mparse *parse) { struct roff *r; + int i; r = mandoc_calloc(1, sizeof(struct roff)); r->regs = regs; @@ -366,6 +373,10 @@ roff_alloc(struct regset *regs, struct mparse *parse) r->rstackpos = -1; roff_hash_init(); + + for (i = 0; i < PREDEFS_MAX; i++) + roff_setstr(r, predefs[i].name, predefs[i].str, 0); + return(r); } @@ -376,7 +387,7 @@ roff_alloc(struct regset *regs, struct mparse *parse) * is processed. */ static int -roff_res(struct roff *r, char **bufp, size_t *szp, int pos) +roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos) { const char *stesc; /* start of an escape sequence ('\\') */ const char *stnam; /* start of the name, after "[(*" */ @@ -443,8 +454,9 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int pos) res = roff_getstrn(r, stnam, (size_t)i); if (NULL == res) { - cp -= maxl ? 1 : 0; - continue; + /* TODO: keep track of the correct position. */ + mandoc_msg(MANDOCERR_BADESCAPE, r->parse, ln, pos, NULL); + res = ""; } /* Replace the escape sequence by the string. */ @@ -480,7 +492,7 @@ roff_parseln(struct roff *r, int ln, char **bufp, * words to fill in. */ - if (r->first_string && ! roff_res(r, bufp, szp, pos)) + if (r->first_string && ! roff_res(r, bufp, szp, ln, pos)) return(ROFF_REPARSE); ppos = pos; @@ -597,27 +609,6 @@ roff_parse(struct roff *r, const char *buf, int *pos) return(t); } - -static int -roff_parse_nat(const char *buf, unsigned int *res) -{ - char *ep; - long lval; - - errno = 0; - lval = strtol(buf, &ep, 10); - if (buf[0] == '\0' || *ep != '\0') - return(0); - if ((errno == ERANGE && - (lval == LONG_MAX || lval == LONG_MIN)) || - (lval > INT_MAX || lval < 0)) - return(0); - - *res = (unsigned int)lval; - return(1); -} - - /* ARGSUSED */ static enum rofferr roff_cblock(ROFF_ARGS) @@ -739,10 +730,10 @@ roff_block(ROFF_ARGS) mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, roffs[tok].name); - while ((*bufp)[pos] && ' ' != (*bufp)[pos]) + while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos])) pos++; - while (' ' == (*bufp)[pos]) + while (isspace((unsigned char)(*bufp)[pos])) (*bufp)[pos++] = '\0'; } @@ -763,9 +754,7 @@ roff_block(ROFF_ARGS) /* If present, process the custom end-of-line marker. */ sv = pos; - while ((*bufp)[pos] && - ' ' != (*bufp)[pos] && - '\t' != (*bufp)[pos]) + while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos])) pos++; /* @@ -835,8 +824,7 @@ roff_block_sub(ROFF_ARGS) * pulling it out of the hashtable. */ - if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) - return(ROFF_IGN); + t = roff_parse(r, *bufp, &pos); /* * Macros other than block-end are only significant @@ -872,21 +860,29 @@ roff_cond_sub(ROFF_ARGS) { enum rofft t; enum roffrule rr; + char *ep; rr = r->last->rule; + roffnode_cleanscope(r); - /* - * Clean out scope. If we've closed ourselves, then don't - * continue. + /* + * If the macro is unknown, first check if it contains a closing + * delimiter `\}'. If it does, close out our scope and return + * the currently-scoped rule (ignore or continue). Else, drop + * into the currently-scoped rule. */ - roffnode_cleanscope(r); - if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) { - if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1]) - return(roff_ccond - (r, ROFF_ccond, bufp, szp, - ln, pos, pos + 2, offs)); + ep = &(*bufp)[pos]; + for ( ; NULL != (ep = strchr(ep, '\\')); ep++) { + ep++; + if ('}' != *ep) + continue; + *ep = '&'; + roff_ccond(r, ROFF_ccond, bufp, szp, + ln, pos, pos + 2, offs); + break; + } return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); } @@ -895,6 +891,7 @@ roff_cond_sub(ROFF_ARGS) * if they're either structurally required (such as loops and * conditionals) or a closing macro. */ + if (ROFFRULE_DENY == rr) if ( ! (ROFFMAC_STRUCT & roffs[t].flags)) if (ROFF_ccond != t) @@ -905,37 +902,28 @@ roff_cond_sub(ROFF_ARGS) ln, ppos, pos, offs)); } - /* ARGSUSED */ static enum rofferr roff_cond_text(ROFF_ARGS) { - char *ep, *st; + char *ep; enum roffrule rr; rr = r->last->rule; + roffnode_cleanscope(r); - /* - * We display the value of the text if out current evaluation - * scope permits us to do so. - */ - - /* FIXME: use roff_ccond? */ - - st = &(*bufp)[pos]; - if (NULL == (ep = strstr(st, "\\}"))) { - roffnode_cleanscope(r); - return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); + ep = &(*bufp)[pos]; + for ( ; NULL != (ep = strchr(ep, '\\')); ep++) { + ep++; + if ('}' != *ep) + continue; + *ep = '&'; + roff_ccond(r, ROFF_ccond, bufp, szp, + ln, pos, pos + 2, offs); } - - if (ep == st || (ep > st && '\\' != *(ep - 1))) - roffnode_pop(r); - - roffnode_cleanscope(r); return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); } - static enum roffrule roff_evalcond(const char *v, int *pos) { @@ -978,29 +966,20 @@ roff_cond(ROFF_ARGS) int sv; enum roffrule rule; - /* Stack overflow! */ - - if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) { - mandoc_msg(MANDOCERR_MEM, r->parse, ln, ppos, NULL); - return(ROFF_ERR); - } - - /* First, evaluate the conditional. */ + /* + * An `.el' has no conditional body: it will consume the value + * of the current rstack entry set in prior `ie' calls or + * defaults to DENY. + * + * If we're not an `el', however, then evaluate the conditional. + */ - if (ROFF_el == tok) { - /* - * An `.el' will get the value of the current rstack - * entry set in prior `ie' calls or defaults to DENY. - */ - if (r->rstackpos < 0) - rule = ROFFRULE_DENY; - else - rule = r->rstack[r->rstackpos]; - } else - rule = roff_evalcond(*bufp, &pos); + rule = ROFF_el == tok ? + (r->rstackpos < 0 ? + ROFFRULE_DENY : r->rstack[r->rstackpos--]) : + roff_evalcond(*bufp, &pos); sv = pos; - while (' ' == (*bufp)[pos]) pos++; @@ -1020,16 +999,20 @@ roff_cond(ROFF_ARGS) r->last->rule = rule; + /* + * An if-else will put the NEGATION of the current evaluated + * conditional into the stack of rules. + */ + if (ROFF_ie == tok) { - /* - * An if-else will put the NEGATION of the current - * evaluated conditional into the stack. - */ - r->rstackpos++; - if (ROFFRULE_DENY == r->last->rule) - r->rstack[r->rstackpos] = ROFFRULE_ALLOW; - else - r->rstack[r->rstackpos] = ROFFRULE_DENY; + if (r->rstackpos == RSTACK_MAX - 1) { + mandoc_msg(MANDOCERR_MEM, + r->parse, ln, ppos, NULL); + return(ROFF_ERR); + } + r->rstack[++r->rstackpos] = + ROFFRULE_DENY == r->last->rule ? + ROFFRULE_ALLOW : ROFFRULE_DENY; } /* If the parent has false as its rule, then so do we. */ @@ -1102,6 +1085,7 @@ roff_nr(ROFF_ARGS) { const char *key; char *val; + int iv; struct reg *rg; val = *bufp + pos; @@ -1110,8 +1094,10 @@ roff_nr(ROFF_ARGS) if (0 == strcmp(key, "nS")) { rg[(int)REG_nS].set = 1; - if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u)) - rg[(int)REG_nS].v.u = 0; + if ((iv = mandoc_strntou(val, strlen(val), 10)) >= 0) + rg[REG_nS].v.u = (unsigned)iv; + else + rg[(int)REG_nS].v.u = 0u; } return(ROFF_IGN); diff --git a/contrib/mdocml/st.in b/contrib/mdocml/st.in index 2d7d005e61..888e5e44fb 100644 --- a/contrib/mdocml/st.in +++ b/contrib/mdocml/st.in @@ -1,4 +1,4 @@ -/* $Id: st.in,v 1.15 2010/07/31 23:52:58 schwarze Exp $ */ +/* $Id: st.in,v 1.16 2011/04/24 17:56:44 schwarze Exp $ */ /* * Copyright (c) 2009, 2010 Kristaps Dzonsons * @@ -22,6 +22,8 @@ * the formatted output string. * * Be sure to escape strings. + * The non-breaking blanks prevent ending an output line right before + * a number. Groff prevent line breaks at the same places. * * REMEMBER TO ADD NEW STANDARDS TO MDOC.7! */ @@ -43,32 +45,32 @@ LINE("-p1003.2a-92", "IEEE Std 1003.2a-1992 (\\(lqPOSIX.2\\(rq)") LINE("-p1387.2-95", "IEEE Std 1387.2-1995 (\\(lqPOSIX.7.2\\(rq)") LINE("-p1003.2", "IEEE Std 1003.2 (\\(lqPOSIX.2\\(rq)") LINE("-p1387.2", "IEEE Std 1387.2 (\\(lqPOSIX.7.2\\(rq)") -LINE("-isoC", "ISO/IEC 9899:1990 (\\(lqISO C90\\(rq)") -LINE("-isoC-90", "ISO/IEC 9899:1990 (\\(lqISO C90\\(rq)") -LINE("-isoC-amd1", "ISO/IEC 9899/AMD1:1995 (\\(lqISO C90\\(rq)") -LINE("-isoC-tcor1", "ISO/IEC 9899/TCOR1:1994 (\\(lqISO C90\\(rq)") -LINE("-isoC-tcor2", "ISO/IEC 9899/TCOR2:1995 (\\(lqISO C90\\(rq)") -LINE("-isoC-99", "ISO/IEC 9899:1999 (\\(lqISO C99\\(rq)") +LINE("-isoC", "ISO/IEC 9899:1990 (\\(lqISO\\~C90\\(rq)") +LINE("-isoC-90", "ISO/IEC 9899:1990 (\\(lqISO\\~C90\\(rq)") +LINE("-isoC-amd1", "ISO/IEC 9899/AMD1:1995 (\\(lqISO\\~C90, Amendment 1\\(rq)") +LINE("-isoC-tcor1", "ISO/IEC 9899/TCOR1:1994 (\\(lqISO\\~C90, Technical Corrigendum 1\\(rq)") +LINE("-isoC-tcor2", "ISO/IEC 9899/TCOR2:1995 (\\(lqISO\\~C90, Technical Corrigendum 2\\(rq)") +LINE("-isoC-99", "ISO/IEC 9899:1999 (\\(lqISO\\~C99\\(rq)") LINE("-iso9945-1-90", "ISO/IEC 9945-1:1990 (\\(lqPOSIX.1\\(rq)") LINE("-iso9945-1-96", "ISO/IEC 9945-1:1996 (\\(lqPOSIX.1\\(rq)") LINE("-iso9945-2-93", "ISO/IEC 9945-2:1993 (\\(lqPOSIX.2\\(rq)") -LINE("-ansiC", "ANSI X3.159-1989 (\\(lqANSI C\\(rq)") -LINE("-ansiC-89", "ANSI X3.159-1989 (\\(lqANSI C\\(rq)") -LINE("-ansiC-99", "ANSI/ISO/IEC 9899-1999 (\\(lqANSI C99\\(rq)") +LINE("-ansiC", "ANSI X3.159-1989 (\\(lqANSI\\~C89\\(rq)") +LINE("-ansiC-89", "ANSI X3.159-1989 (\\(lqANSI\\~C89\\(rq)") +LINE("-ansiC-99", "ANSI/ISO/IEC 9899-1999 (\\(lqANSI\\~C99\\(rq)") LINE("-ieee754", "IEEE Std 754-1985") LINE("-iso8802-3", "ISO 8802-3: 1989") LINE("-ieee1275-94", "IEEE Std 1275-1994 (\\(lqOpen Firmware\\(rq)") -LINE("-xpg3", "X/Open Portability Guide Issue 3 (\\(lqXPG3\\(rq)") -LINE("-xpg4", "X/Open Portability Guide Issue 4 (\\(lqXPG4\\(rq)") -LINE("-xpg4.2", "X/Open Portability Guide Issue 4.2 (\\(lqXPG4.2\\(rq)") -LINE("-xpg4.3", "X/Open Portability Guide Issue 4.3 (\\(lqXPG4.3\\(rq)") -LINE("-xbd5", "X/Open System Interface Definitions Issue 5 (\\(lqXBD5\\(rq)") -LINE("-xcu5", "X/Open Commands and Utilities Issue 5 (\\(lqXCU5\\(rq)") -LINE("-xsh5", "X/Open System Interfaces and Headers Issue 5 (\\(lqXSH5\\(rq)") -LINE("-xns5", "X/Open Networking Services Issue 5 (\\(lqXNS5\\(rq)") -LINE("-xns5.2", "X/Open Networking Services Issue 5.2 (\\(lqXNS5.2\\(rq)") -LINE("-xns5.2d2.0", "X/Open Networking Services Issue 5.2 Draft 2.0 (\\(lqXNS5.2D2.0\\(rq)") -LINE("-xcurses4.2", "X/Open Curses Issue 4 Version 2 (\\(lqXCURSES4.2\\(rq)") -LINE("-susv2", "Version 2 of the Single UNIX Specification") -LINE("-susv3", "Version 3 of the Single UNIX Specification") -LINE("-svid4", "System V Interface Definition, Fourth Edition (\\(lqSVID4\\(rq)") +LINE("-xpg3", "X/Open Portability Guide Issue\\~3 (\\(lqXPG3\\(rq)") +LINE("-xpg4", "X/Open Portability Guide Issue\\~4 (\\(lqXPG4\\(rq)") +LINE("-xpg4.2", "X/Open Portability Guide Issue\\~4, Version\\~2 (\\(lqXPG4.2\\(rq)") +LINE("-xpg4.3", "X/Open Portability Guide Issue\\~4, Version\\~3 (\\(lqXPG4.3\\(rq)") +LINE("-xbd5", "X/Open System Interface Definitions Issue\\~5 (\\(lqXBD5\\(rq)") +LINE("-xcu5", "X/Open Commands and Utilities Issue\\~5 (\\(lqXCU5\\(rq)") +LINE("-xsh5", "X/Open System Interfaces and Headers Issue\\~5 (\\(lqXSH5\\(rq)") +LINE("-xns5", "X/Open Networking Services Issue\\~5 (\\(lqXNS5\\(rq)") +LINE("-xns5.2", "X/Open Networking Services Issue\\~5.2 (\\(lqXNS5.2\\(rq)") +LINE("-xns5.2d2.0", "X/Open Networking Services Issue\\~5.2 Draft\\~2.0 (\\(lqXNS5.2D2.0\\(rq)") +LINE("-xcurses4.2", "X/Open Curses Issue\\~4, Version\\~2 (\\(lqXCURSES4.2\\(rq)") +LINE("-susv2", "Version\\~2 of the Single UNIX Specification") +LINE("-susv3", "Version\\~3 of the Single UNIX Specification") +LINE("-svid4", "System\\~V Interface Definition, Fourth Edition (\\(lqSVID4\\(rq)") diff --git a/contrib/mdocml/style.css b/contrib/mdocml/style.css index b51c72191c..22abf5115b 100644 --- a/contrib/mdocml/style.css +++ b/contrib/mdocml/style.css @@ -1,54 +1,47 @@ -/* $Id: style.css,v 1.21 2011/02/09 09:52:47 kristaps Exp $ */ - -html { max-width: 800px; } -body { color: #333333; - font-size: 0.93em; - font-family: Times, serif; } +/* $Id: style.css,v 1.22 2011/05/14 23:40:49 kristaps Exp $ */ + +/* + * This is an example style-sheet provided for mandoc(1) and the -Thtml + * or -Txhtml output mode. + * + * It mimics the appearance of the traditional cvsweb output. + * + * See mdoc(7) and man(7) for macro explanations. + */ + +html { max-width: 880px; } +body { font-size: smaller; font-family: Helvetica,Arial,sans-serif; } +h1 { margin-bottom: 1ex; font-size: 110%; margin-left: -4ex; } /* Section header (Sh, SH). */ +h2 { margin-bottom: 1ex; font-size: 105%; margin-left: -2ex; } /* Sub-section header (Ss, SS). */ +table { width: 100%; margin-top: 0ex; margin-bottom: 0ex; } /* All tables. */ +td { vertical-align: top; } /* All table cells. */ +p { } /* Paragraph: Pp, Lp. */ +blockquote { margin-top: 0ex; margin-bottom: 0ex; } /* D1. */ +div.section { margin-bottom: 2ex; margin-left: 5ex; } /* Sections (Sh, SH). */ +div.subsection { } /* Sub-sections (Ss, SS). */ +table.synopsis { } /* SYNOPSIS section table. */ /* Preamble structure. */ -table.foot { width: 100%; - font-size: 0.8em; - margin-top: 1em; - border-top: 1px dotted #dddddd; - color: #999999; } /* Document footer. */ +table.foot { font-size: smaller; margin-top: 1em; border-top: 1px dotted #dddddd; } /* Document footer. */ td.foot-date { width: 50%; } /* Document footer: date. */ td.foot-os { width: 50%; text-align: right; } /* Document footer: OS/source. */ -table.head { width: 100%; - font-size: 0.8em; - margin-bottom: 1em; - border-bottom: 1px dotted #dddddd; - color: #999999; } /* Document header. */ +table.head { font-size: smaller; margin-bottom: 1em; border-bottom: 1px dotted #dddddd; } /* Document header. */ td.head-ltitle { width: 10%; } /* Document header: left-title. */ td.head-vol { width: 80%; text-align: center; } /* Document header: volume. */ td.head-rtitle { width: 10%; text-align: right; } /* Document header: right-title. */ -/* Sections. */ - -h1 { margin-bottom: 0px; color: #000000; font-size: 0.93em; margin-left: -4ex; } /* Section header (Sh, SH). */ -h2 { margin-bottom: 0px; color: #000000; font-size: 0.93em; margin-left: -2ex; } /* Sub-section header (Ss, SS). */ -div.section { margin-bottom: 2ex; margin-left: 4ex; } /* Sections (Sh, SH). */ -div.subsection { } /* Sub-sections (Ss, SS). */ -table.synopsis { } /* SYNOPSIS section table. */ - -/* Vertical spacing. */ - -p { } /* Paragraph: Pp, Lp. */ -blockquote { margin-top: 0px; margin-bottom: 0px; } -table { margin-top: 0px; margin-bottom: 0px; } -td { vertical-align: top; } /* SYNOPSIS section table. */ - /* General font modes. */ +i { } /* Italic: BI, IB, I, (implicit). */ .emph { font-style: italic; font-weight: normal; } /* Emphasis: Em, Bl -emphasis. */ +b { } /* Bold: SB, BI, IB, BR, RB, B, (implicit). */ .symb { font-style: normal; font-weight: bold; } /* Symbolic: Sy, Ms, Bf -symbolic. */ +small { } /* Small: SB, SM. */ /* Block modes. */ -.display { background-color: #EEEEEE; - margin: 3px; - padding: 3px; - border: 1px solid #339999; } /* Top of all Bd, D1, Dl. */ +.display { } /* Top of all Bd, D1, Dl. */ .list { } /* Top of all Bl. */ /* Context-specific modes. */ @@ -73,7 +66,7 @@ span.lib { } /* Library (Lb). */ i.link-sec { font-weight: normal; } /* Section links (Sx). */ code.lit { font-style: normal; font-weight: normal; } /* Literal: Dl, Li, Bf -literal, Bl -literal, Bl -unfilled. */ b.macro { font-style: normal; } /* Macro-ish thing (Fd). */ -b.name { color: blue; font-style: normal; } /* Name of utility (Nm). */ +b.name { font-style: normal; } /* Name of utility (Nm). */ span.opt { } /* Options (Op, Oo/Oc). */ span.ref { } /* Citations (Rs). */ span.ref-auth { } /* Reference author (%A). */ @@ -94,14 +87,14 @@ span.unix { } /* Unices (Ux, Ox, Nx, Fx, Bx, Bsx, Dx). */ b.utility { font-style: normal; } /* Name of utility (Ex). */ b.var { font-style: normal; } /* Variables (Rv). */ -a.link-ext { background: transparent url(external.png) center right no-repeat; padding-right: 12px; }/* Off-site link (Lk). */ +a.link-ext { } /* Off-site link (Lk). */ a.link-includes { } /* Include-file link (In). */ -a.link-mail { background: transparent url(external.png) center right no-repeat; padding-right: 12px; }/* Mailto links (Mt). */ +a.link-mail { } /* Mailto links (Mt). */ a.link-man { } /* Manual links (Xr). */ a.link-ref { } /* Reference section links (%Q). */ -a.link-sec { text-decoration: none; border-bottom: 1px dotted #339999; } /* Section links (Sx). */ +a.link-sec { } /* Section links (Sx). */ -/* Formatting for lists. */ +/* Formatting for lists. See mdoc(7). */ dl.list-diag { } dt.list-diag { } @@ -117,7 +110,7 @@ dd.list-inset { } dl.list-ohang { } dt.list-ohang { } -dd.list-ohang { margin-left: 0em; } +dd.list-ohang { margin-left: 0ex; } dl.list-tag { } dt.list-tag { } diff --git a/contrib/mdocml/tbl.c b/contrib/mdocml/tbl.c index 9b331e86bf..6ef2f735b3 100644 --- a/contrib/mdocml/tbl.c +++ b/contrib/mdocml/tbl.c @@ -1,4 +1,4 @@ -/* $Id: tbl.c,v 1.24 2011/03/22 09:48:13 kristaps Exp $ */ +/* $Id: tbl.c,v 1.25 2011/04/04 23:04:38 kristaps Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2011 Ingo Schwarze @@ -15,6 +15,10 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + #include #include #include diff --git a/contrib/mdocml/tbl_layout.c b/contrib/mdocml/tbl_layout.c index 8245003b1d..0aa18dcbeb 100644 --- a/contrib/mdocml/tbl_layout.c +++ b/contrib/mdocml/tbl_layout.c @@ -1,4 +1,4 @@ -/* $Id: tbl_layout.c,v 1.17 2011/03/20 16:02:05 kristaps Exp $ */ +/* $Id: tbl_layout.c,v 1.20 2011/05/17 13:11:40 kristaps Exp $ */ /* * Copyright (c) 2009, 2010 Kristaps Dzonsons * @@ -14,6 +14,10 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + #include #include #include @@ -68,6 +72,23 @@ mods(struct tbl_node *tbl, struct tbl_cell *cp, char buf[5]; int i; + /* Not all types accept modifiers. */ + + switch (cp->pos) { + case (TBL_CELL_DOWN): + /* FALLTHROUGH */ + case (TBL_CELL_HORIZ): + /* FALLTHROUGH */ + case (TBL_CELL_DHORIZ): + /* FALLTHROUGH */ + case (TBL_CELL_VERT): + /* FALLTHROUGH */ + case (TBL_CELL_DVERT): + return(1); + default: + break; + } + mod: /* * XXX: since, at least for now, modifiers are non-conflicting @@ -423,19 +444,19 @@ cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos) } static void -head_adjust(const struct tbl_cell *cell, struct tbl_head *head) +head_adjust(const struct tbl_cell *cellp, struct tbl_head *head) { - if (TBL_CELL_VERT != cell->pos && - TBL_CELL_DVERT != cell->pos) { + if (TBL_CELL_VERT != cellp->pos && + TBL_CELL_DVERT != cellp->pos) { head->pos = TBL_HEAD_DATA; return; } - if (TBL_CELL_VERT == cell->pos) + if (TBL_CELL_VERT == cellp->pos) if (TBL_HEAD_DVERT != head->pos) head->pos = TBL_HEAD_VERT; - if (TBL_CELL_DVERT == cell->pos) + if (TBL_CELL_DVERT == cellp->pos) head->pos = TBL_HEAD_DVERT; } diff --git a/contrib/mdocml/tbl_opts.c b/contrib/mdocml/tbl_opts.c index dbdcaa81b7..7b67c13b47 100644 --- a/contrib/mdocml/tbl_opts.c +++ b/contrib/mdocml/tbl_opts.c @@ -1,4 +1,4 @@ -/* $Id: tbl_opts.c,v 1.10 2011/03/20 16:02:05 kristaps Exp $ */ +/* $Id: tbl_opts.c,v 1.11 2011/04/04 23:04:38 kristaps Exp $ */ /* * Copyright (c) 2009, 2010 Kristaps Dzonsons * @@ -14,6 +14,10 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + #include #include #include diff --git a/contrib/mdocml/term.c b/contrib/mdocml/term.c index b0ddd1ed5f..70260bec31 100644 --- a/contrib/mdocml/term.c +++ b/contrib/mdocml/term.c @@ -1,4 +1,4 @@ -/* $Id: term.c,v 1.183 2011/04/04 21:14:12 kristaps Exp $ */ +/* $Id: term.c,v 1.197 2011/05/24 21:31:23 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons * Copyright (c) 2010, 2011 Ingo Schwarze @@ -33,13 +33,10 @@ #include "term.h" #include "main.h" -static void spec(struct termp *, enum roffdeco, - const char *, size_t); -static void res(struct termp *, const char *, size_t); -static void bufferc(struct termp *, char); -static void adjbuf(struct termp *p, size_t); -static void encode(struct termp *, const char *, size_t); - +static void adjbuf(struct termp *p, int); +static void bufferc(struct termp *, char); +static void encode(struct termp *, const char *, size_t); +static void encode1(struct termp *, int); void term_free(struct termp *p) @@ -48,7 +45,7 @@ term_free(struct termp *p) if (p->buf) free(p->buf); if (p->symtab) - chars_free(p->symtab); + mchars_free(p->symtab); free(p); } @@ -73,18 +70,6 @@ term_end(struct termp *p) (*p->end)(p); } - -struct termp * -term_alloc(enum termenc enc) -{ - struct termp *p; - - p = mandoc_calloc(1, sizeof(struct termp)); - p->enc = enc; - return(p); -} - - /* * Flush a line of text. A "line" is loosely defined as being something * that should be followed by a newline, regardless of whether it's @@ -156,12 +141,12 @@ term_flushln(struct termp *p) vis = vend = 0; i = 0; - while (i < (int)p->col) { + while (i < p->col) { /* * Handle literal tab characters: collapse all * subsequent tabs into a single huge set of spaces. */ - while (i < (int)p->col && '\t' == p->buf[i]) { + while (i < p->col && '\t' == p->buf[i]) { vend = (vis / p->tabwidth + 1) * p->tabwidth; vbl += vend - vis; vis = vend; @@ -175,7 +160,7 @@ term_flushln(struct termp *p) * space is printed according to regular spacing rules). */ - for (j = i, jhy = 0; j < (int)p->col; j++) { + for (j = i, jhy = 0; j < p->col; j++) { if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j]) break; @@ -218,7 +203,7 @@ term_flushln(struct termp *p) } /* Write out the [remaining] word. */ - for ( ; i < (int)p->col; i++) { + for ( ; i < p->col; i++) { if (vend > bp && jhy > 0 && i > jhy) break; if ('\t' == p->buf[i]) @@ -345,44 +330,6 @@ term_vspace(struct termp *p) (*p->endline)(p); } - -static void -numbered(struct termp *p, const char *word, size_t len) -{ - const char *rhs; - - rhs = chars_num2char(word, len); - if (rhs) - encode(p, rhs, 1); -} - - -static void -spec(struct termp *p, enum roffdeco d, const char *word, size_t len) -{ - const char *rhs; - size_t sz; - - rhs = chars_spec2str(p->symtab, word, len, &sz); - if (rhs) - encode(p, rhs, sz); - else if (DECO_SSPECIAL == d) - encode(p, word, len); -} - - -static void -res(struct termp *p, const char *word, size_t len) -{ - const char *rhs; - size_t sz; - - rhs = chars_res2str(p->symtab, word, len, &sz); - if (rhs) - encode(p, rhs, sz); -} - - void term_fontlast(struct termp *p) { @@ -447,7 +394,6 @@ term_fontpop(struct termp *p) p->fonti--; } - /* * Handle pwords, partial words, which may be either a single word or a * phrase that cannot be broken down (such as a literal string). This @@ -456,9 +402,11 @@ term_fontpop(struct termp *p) void term_word(struct termp *p, const char *word) { - const char *seq; + const char *seq, *cp; + char c; + int sz, uc; size_t ssz; - enum roffdeco deco; + enum mandoc_esc esc; if ( ! (TERMP_NOSPACE & p->flags)) { if ( ! (TERMP_KEEP & p->flags)) { @@ -478,7 +426,7 @@ term_word(struct termp *p, const char *word) p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM); - while (*word) { + while ('\0' != *word) { if ((ssz = strcspn(word, "\\")) > 0) encode(p, word, ssz); @@ -486,45 +434,71 @@ term_word(struct termp *p, const char *word) if ('\\' != *word) continue; - seq = ++word; - word += a2roffdeco(&deco, &seq, &ssz); + word++; + esc = mandoc_escape(&word, &seq, &sz); + if (ESCAPE_ERROR == esc) + break; + + if (TERMENC_ASCII != p->enc) + switch (esc) { + case (ESCAPE_UNICODE): + uc = mchars_num2uc(seq + 1, sz - 1); + if ('\0' == uc) + break; + encode1(p, uc); + continue; + case (ESCAPE_SPECIAL): + uc = mchars_spec2cp(p->symtab, seq, sz); + if (uc <= 0) + break; + encode1(p, uc); + continue; + default: + break; + } - switch (deco) { - case (DECO_NUMBERED): - numbered(p, seq, ssz); + switch (esc) { + case (ESCAPE_UNICODE): + encode1(p, '?'); break; - case (DECO_RESERVED): - res(p, seq, ssz); + case (ESCAPE_NUMBERED): + c = mchars_num2char(seq, sz); + if ('\0' != c) + encode(p, &c, 1); break; - case (DECO_SPECIAL): - /* FALLTHROUGH */ - case (DECO_SSPECIAL): - spec(p, deco, seq, ssz); + case (ESCAPE_SPECIAL): + cp = mchars_spec2str(p->symtab, seq, sz, &ssz); + if (NULL != cp) + encode(p, cp, ssz); + else if (1 == ssz) + encode(p, seq, sz); break; - case (DECO_BOLD): + case (ESCAPE_FONTBOLD): term_fontrepl(p, TERMFONT_BOLD); break; - case (DECO_ITALIC): + case (ESCAPE_FONTITALIC): term_fontrepl(p, TERMFONT_UNDER); break; - case (DECO_ROMAN): + case (ESCAPE_FONT): + /* FALLTHROUGH */ + case (ESCAPE_FONTROMAN): term_fontrepl(p, TERMFONT_NONE); break; - case (DECO_PREVIOUS): + case (ESCAPE_FONTPREV): term_fontlast(p); break; + case (ESCAPE_NOSPACE): + if ('\0' == *word) + p->flags |= TERMP_NOSPACE; + break; default: break; } - - if (DECO_NOSPACE == deco && '\0' == *word) - p->flags |= TERMP_NOSPACE; } } - static void -adjbuf(struct termp *p, size_t sz) +adjbuf(struct termp *p, int sz) { if (0 == p->maxcols) @@ -532,10 +506,10 @@ adjbuf(struct termp *p, size_t sz) while (sz >= p->maxcols) p->maxcols <<= 2; - p->buf = mandoc_realloc(p->buf, p->maxcols); + p->buf = mandoc_realloc + (p->buf, sizeof(int) * (size_t)p->maxcols); } - static void bufferc(struct termp *p, char c) { @@ -543,15 +517,44 @@ bufferc(struct termp *p, char c) if (p->col + 1 >= p->maxcols) adjbuf(p, p->col + 1); - p->buf[(int)p->col++] = c; + p->buf[p->col++] = c; } +/* + * See encode(). + * Do this for a single (probably unicode) value. + * Does not check for non-decorated glyphs. + */ +static void +encode1(struct termp *p, int c) +{ + enum termfont f; + + if (p->col + 4 >= p->maxcols) + adjbuf(p, p->col + 4); + + f = term_fonttop(p); + + if (TERMFONT_NONE == f) { + p->buf[p->col++] = c; + return; + } else if (TERMFONT_UNDER == f) { + p->buf[p->col++] = '_'; + } else + p->buf[p->col++] = c; + + p->buf[p->col++] = 8; + p->buf[p->col++] = c; +} static void encode(struct termp *p, const char *word, size_t sz) { enum termfont f; - int i; + int i, len; + + /* LINTED */ + len = sz; /* * Encode and buffer a string of characters. If the current @@ -560,35 +563,34 @@ encode(struct termp *p, const char *word, size_t sz) */ if (TERMFONT_NONE == (f = term_fonttop(p))) { - if (p->col + sz >= p->maxcols) - adjbuf(p, p->col + sz); - memcpy(&p->buf[(int)p->col], word, sz); - p->col += sz; + if (p->col + len >= p->maxcols) + adjbuf(p, p->col + len); + for (i = 0; i < len; i++) + p->buf[p->col++] = word[i]; return; } /* Pre-buffer, assuming worst-case. */ - if (p->col + 1 + (sz * 3) >= p->maxcols) - adjbuf(p, p->col + 1 + (sz * 3)); + if (p->col + 1 + (len * 3) >= p->maxcols) + adjbuf(p, p->col + 1 + (len * 3)); - for (i = 0; i < (int)sz; i++) { - if ( ! isgraph((u_char)word[i])) { - p->buf[(int)p->col++] = word[i]; + for (i = 0; i < len; i++) { + if ( ! isgraph((unsigned char)word[i])) { + p->buf[p->col++] = word[i]; continue; } if (TERMFONT_UNDER == f) - p->buf[(int)p->col++] = '_'; + p->buf[p->col++] = '_'; else - p->buf[(int)p->col++] = word[i]; + p->buf[p->col++] = word[i]; - p->buf[(int)p->col++] = 8; - p->buf[(int)p->col++] = word[i]; + p->buf[p->col++] = 8; + p->buf[p->col++] = word[i]; } } - size_t term_len(const struct termp *p, size_t sz) { @@ -600,59 +602,99 @@ term_len(const struct termp *p, size_t sz) size_t term_strlen(const struct termp *p, const char *cp) { - size_t sz, ssz, rsz, i; - enum roffdeco d; + size_t sz, rsz, i; + int ssz, c; const char *seq, *rhs; + enum mandoc_esc esc; + static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' }; - for (sz = 0; '\0' != *cp; ) - /* - * Account for escaped sequences within string length - * calculations. This follows the logic in term_word() - * as we must calculate the width of produced strings. - */ - if ('\\' == *cp) { - seq = ++cp; - cp += a2roffdeco(&d, &seq, &ssz); + /* + * Account for escaped sequences within string length + * calculations. This follows the logic in term_word() as we + * must calculate the width of produced strings. + */ - switch (d) { - case (DECO_RESERVED): - rhs = chars_res2str - (p->symtab, seq, ssz, &rsz); + sz = 0; + while ('\0' != *cp) { + rsz = strcspn(cp, rej); + for (i = 0; i < rsz; i++) + sz += (*p->width)(p, *cp++); + + c = 0; + switch (*cp) { + case ('\\'): + cp++; + esc = mandoc_escape(&cp, &seq, &ssz); + if (ESCAPE_ERROR == esc) + return(sz); + + if (TERMENC_ASCII != p->enc) + switch (esc) { + case (ESCAPE_UNICODE): + c = mchars_num2uc + (seq + 1, ssz - 1); + if ('\0' == c) + break; + sz += (*p->width)(p, c); + continue; + case (ESCAPE_SPECIAL): + c = mchars_spec2cp + (p->symtab, seq, ssz); + if (c <= 0) + break; + sz += (*p->width)(p, c); + continue; + default: + break; + } + + rhs = NULL; + + switch (esc) { + case (ESCAPE_UNICODE): + sz += (*p->width)(p, '?'); break; - case (DECO_SPECIAL): - /* FALLTHROUGH */ - case (DECO_SSPECIAL): - rhs = chars_spec2str + case (ESCAPE_NUMBERED): + c = mchars_num2char(seq, ssz); + if ('\0' != c) + sz += (*p->width)(p, c); + break; + case (ESCAPE_SPECIAL): + rhs = mchars_spec2str (p->symtab, seq, ssz, &rsz); - /* Allow for one-char escapes. */ - if (DECO_SSPECIAL != d || rhs) + if (ssz != 1 || rhs) break; rhs = seq; rsz = ssz; break; default: - rhs = NULL; break; } - if (rhs) - for (i = 0; i < rsz; i++) - sz += (*p->width)(p, *rhs++); - } else if (ASCII_NBRSP == *cp) { + if (NULL == rhs) + break; + + for (i = 0; i < rsz; i++) + sz += (*p->width)(p, *rhs++); + break; + case (ASCII_NBRSP): sz += (*p->width)(p, ' '); cp++; - } else if (ASCII_HYPH == *cp) { + break; + case (ASCII_HYPH): sz += (*p->width)(p, '-'); cp++; - } else - sz += (*p->width)(p, *cp++); + break; + default: + break; + } + } return(sz); } - /* ARGSUSED */ size_t term_vspan(const struct termp *p, const struct roffsu *su) @@ -689,7 +731,6 @@ term_vspan(const struct termp *p, const struct roffsu *su) r); } - size_t term_hspan(const struct termp *p, const struct roffsu *su) { diff --git a/contrib/mdocml/term.h b/contrib/mdocml/term.h index 79b738cae9..130024de6c 100644 --- a/contrib/mdocml/term.h +++ b/contrib/mdocml/term.h @@ -1,4 +1,4 @@ -/* $Id: term.h,v 1.79 2011/01/05 15:37:23 kristaps Exp $ */ +/* $Id: term.h,v 1.85 2011/05/20 15:48:22 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons * @@ -22,7 +22,9 @@ __BEGIN_DECLS struct termp; enum termenc { - TERMENC_ASCII + TERMENC_ASCII, + TERMENC_LOCALE, + TERMENC_UTF8 }; enum termtype { @@ -42,35 +44,6 @@ enum termfont { typedef void (*term_margin)(struct termp *, const void *); -struct termp_ps { - int flags; -#define PS_INLINE (1 << 0) /* we're in a word */ -#define PS_MARGINS (1 << 1) /* we're in the margins */ -#define PS_NEWPAGE (1 << 2) /* new page, no words yet */ - size_t pscol; /* visible column (AFM units) */ - size_t psrow; /* visible row (AFM units) */ - char *psmarg; /* margin buf */ - size_t psmargsz; /* margin buf size */ - size_t psmargcur; /* cur index in margin buf */ - char last; /* character buffer */ - enum termfont lastf; /* last set font */ - size_t scale; /* font scaling factor */ - size_t pages; /* number of pages shown */ - size_t lineheight; /* line height (AFM units) */ - size_t top; /* body top (AFM units) */ - size_t bottom; /* body bottom (AFM units) */ - size_t height; /* page height (AFM units */ - size_t width; /* page width (AFM units) */ - size_t left; /* body left (AFM units) */ - size_t header; /* header pos (AFM units) */ - size_t footer; /* footer pos (AFM units) */ - size_t pdfbytes; /* current output byte */ - size_t pdflastpg; /* byte of last page mark */ - size_t pdfbody; /* start of body object */ - size_t *pdfobjs; /* table of object offsets */ - size_t pdfobjsz; /* size of pdfobjs */ -}; - struct termp_tbl { int width; /* width in fixed chars */ int decimal; /* decimal point position */ @@ -82,10 +55,10 @@ struct termp { size_t defrmargin; /* Right margin of the device. */ size_t rmargin; /* Current right margin. */ size_t maxrmargin; /* Max right margin. */ - size_t maxcols; /* Max size of buf. */ + int maxcols; /* Max size of buf. */ size_t offset; /* Margin offest. */ size_t tabwidth; /* Distance of tab positions. */ - size_t col; /* Bytes in buf. */ + int col; /* Bytes in buf. */ size_t viscol; /* Chars on current line. */ int overstep; /* See termp_flushln(). */ int flags; @@ -103,29 +76,26 @@ struct termp { #define TERMP_ANPREC (1 << 13) /* See termp_an_pre(). */ #define TERMP_KEEP (1 << 14) /* Keep words together. */ #define TERMP_PREKEEP (1 << 15) /* ...starting with the next one. */ - char *buf; /* Output buffer. */ + int *buf; /* Output buffer. */ enum termenc enc; /* Type of encoding. */ - void *symtab; /* Encoded-symbol table. */ + struct mchars *symtab; /* Encoded-symbol table. */ enum termfont fontl; /* Last font set. */ enum termfont fontq[10]; /* Symmetric fonts. */ int fonti; /* Index of font stack. */ term_margin headf; /* invoked to print head */ term_margin footf; /* invoked to print foot */ - void (*letter)(struct termp *, char); + void (*letter)(struct termp *, int); void (*begin)(struct termp *); void (*end)(struct termp *); void (*endline)(struct termp *); void (*advance)(struct termp *, size_t); - size_t (*width)(const struct termp *, char); + size_t (*width)(const struct termp *, int); double (*hspan)(const struct termp *, const struct roffsu *); const void *argf; /* arg for headf/footf */ - union { - struct termp_ps ps; - } engine; + struct termp_ps *ps; }; -struct termp *term_alloc(enum termenc); void term_tbl(struct termp *, const struct tbl_span *); void term_free(struct termp *); void term_newln(struct termp *); diff --git a/contrib/mdocml/term_ascii.c b/contrib/mdocml/term_ascii.c index 374a2a02e0..e65f590a71 100644 --- a/contrib/mdocml/term_ascii.c +++ b/contrib/mdocml/term_ascii.c @@ -1,4 +1,4 @@ -/* $Id: term_ascii.c,v 1.12 2011/01/25 17:32:04 kristaps Exp $ */ +/* $Id: term_ascii.c,v 1.17 2011/05/20 15:48:22 kristaps Exp $ */ /* * Copyright (c) 2010 Kristaps Dzonsons * @@ -21,47 +21,89 @@ #include #include +#ifdef USE_WCHAR +# include +#endif #include #include #include #include +#ifdef USE_WCHAR +# include +#endif #include "mandoc.h" #include "out.h" #include "term.h" #include "main.h" +/* + * Sadly, this doesn't seem to be defined on systems even when they + * support it. For the time being, remove it and let those compiling + * the software decide for themselves what to use. + */ +#if 0 +#if ! defined(__STDC_ISO_10646__) +# undef USE_WCHAR +#endif +#endif + +static struct termp *ascii_init(enum termenc, char *); static double ascii_hspan(const struct termp *, const struct roffsu *); -static size_t ascii_width(const struct termp *, char); +static size_t ascii_width(const struct termp *, int); static void ascii_advance(struct termp *, size_t); static void ascii_begin(struct termp *); static void ascii_end(struct termp *); static void ascii_endline(struct termp *); -static void ascii_letter(struct termp *, char); +static void ascii_letter(struct termp *, int); +#ifdef USE_WCHAR +static void locale_advance(struct termp *, size_t); +static void locale_endline(struct termp *); +static void locale_letter(struct termp *, int); +static size_t locale_width(const struct termp *, int); +#endif -void * -ascii_alloc(char *outopts) +static struct termp * +ascii_init(enum termenc enc, char *outopts) { - struct termp *p; const char *toks[2]; char *v; + struct termp *p; - p = term_alloc(TERMENC_ASCII); + p = mandoc_calloc(1, sizeof(struct termp)); + p->enc = enc; p->tabwidth = 5; p->defrmargin = 78; - p->advance = ascii_advance; p->begin = ascii_begin; p->end = ascii_end; - p->endline = ascii_endline; p->hspan = ascii_hspan; - p->letter = ascii_letter; p->type = TERMTYPE_CHAR; + + p->enc = TERMENC_ASCII; + p->advance = ascii_advance; + p->endline = ascii_endline; + p->letter = ascii_letter; p->width = ascii_width; +#ifdef USE_WCHAR + if (TERMENC_ASCII != enc) { + v = TERMENC_LOCALE == enc ? + setlocale(LC_ALL, "") : + setlocale(LC_CTYPE, "UTF-8"); + if (NULL != v && MB_CUR_MAX > 1) { + p->enc = enc; + p->advance = locale_advance; + p->endline = locale_endline; + p->letter = locale_letter; + p->width = locale_width; + } + } +#endif + toks[0] = "width"; toks[1] = NULL; @@ -81,16 +123,36 @@ ascii_alloc(char *outopts) return(p); } +void * +ascii_alloc(char *outopts) +{ + + return(ascii_init(TERMENC_ASCII, outopts)); +} + +void * +utf8_alloc(char *outopts) +{ + + return(ascii_init(TERMENC_UTF8, outopts)); +} + + +void * +locale_alloc(char *outopts) +{ + + return(ascii_init(TERMENC_LOCALE, outopts)); +} /* ARGSUSED */ static size_t -ascii_width(const struct termp *p, char c) +ascii_width(const struct termp *p, int c) { return(1); } - void ascii_free(void *arg) { @@ -98,17 +160,14 @@ ascii_free(void *arg) term_free((struct termp *)arg); } - /* ARGSUSED */ static void -ascii_letter(struct termp *p, char c) +ascii_letter(struct termp *p, int c) { - /* LINTED */ putchar(c); } - static void ascii_begin(struct termp *p) { @@ -116,7 +175,6 @@ ascii_begin(struct termp *p) (*p->headf)(p, p->argf); } - static void ascii_end(struct termp *p) { @@ -124,7 +182,6 @@ ascii_end(struct termp *p) (*p->footf)(p, p->argf); } - /* ARGSUSED */ static void ascii_endline(struct termp *p) @@ -133,19 +190,16 @@ ascii_endline(struct termp *p) putchar('\n'); } - /* ARGSUSED */ static void ascii_advance(struct termp *p, size_t len) { size_t i; - /* Just print whitespace on the terminal. */ for (i = 0; i < len; i++) putchar(' '); } - /* ARGSUSED */ static double ascii_hspan(const struct termp *p, const struct roffsu *su) @@ -184,3 +238,39 @@ ascii_hspan(const struct termp *p, const struct roffsu *su) return(r); } +#ifdef USE_WCHAR +/* ARGSUSED */ +static size_t +locale_width(const struct termp *p, int c) +{ + int rc; + + return((rc = wcwidth(c)) < 0 ? 0 : rc); +} + +/* ARGSUSED */ +static void +locale_advance(struct termp *p, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) + putwchar(L' '); +} + +/* ARGSUSED */ +static void +locale_endline(struct termp *p) +{ + + putwchar(L'\n'); +} + +/* ARGSUSED */ +static void +locale_letter(struct termp *p, int c) +{ + + putwchar(c); +} +#endif diff --git a/contrib/mdocml/term_ps.c b/contrib/mdocml/term_ps.c index 233118b819..44e492a2ea 100644 --- a/contrib/mdocml/term_ps.c +++ b/contrib/mdocml/term_ps.c @@ -1,4 +1,4 @@ -/* $Id: term_ps.c,v 1.48 2011/03/17 08:49:34 kristaps Exp $ */ +/* $Id: term_ps.c,v 1.51 2011/05/17 14:38:34 kristaps Exp $ */ /* * Copyright (c) 2010 Kristaps Dzonsons * @@ -34,13 +34,16 @@ #include "main.h" #include "term.h" +/* These work the buffer used by the header and footer. */ +#define PS_BUFSLOP 128 + /* Convert PostScript point "x" to an AFM unit. */ #define PNT2AFM(p, x) /* LINTED */ \ - (size_t)((double)(x) * (1000.0 / (double)(p)->engine.ps.scale)) + (size_t)((double)(x) * (1000.0 / (double)(p)->ps->scale)) /* Convert an AFM unit "x" to a PostScript points */ #define AFM2PNT(p, x) /* LINTED */ \ - ((double)(x) / (1000.0 / (double)(p)->engine.ps.scale)) + ((double)(x) / (1000.0 / (double)(p)->ps->scale)) struct glyph { unsigned short wx; /* WX in AFM */ @@ -52,6 +55,54 @@ struct font { struct glyph gly[MAXCHAR]; /* glyph metrics */ }; +struct termp_ps { + int flags; +#define PS_INLINE (1 << 0) /* we're in a word */ +#define PS_MARGINS (1 << 1) /* we're in the margins */ +#define PS_NEWPAGE (1 << 2) /* new page, no words yet */ + size_t pscol; /* visible column (AFM units) */ + size_t psrow; /* visible row (AFM units) */ + char *psmarg; /* margin buf */ + size_t psmargsz; /* margin buf size */ + size_t psmargcur; /* cur index in margin buf */ + char last; /* character buffer */ + enum termfont lastf; /* last set font */ + size_t scale; /* font scaling factor */ + size_t pages; /* number of pages shown */ + size_t lineheight; /* line height (AFM units) */ + size_t top; /* body top (AFM units) */ + size_t bottom; /* body bottom (AFM units) */ + size_t height; /* page height (AFM units */ + size_t width; /* page width (AFM units) */ + size_t left; /* body left (AFM units) */ + size_t header; /* header pos (AFM units) */ + size_t footer; /* footer pos (AFM units) */ + size_t pdfbytes; /* current output byte */ + size_t pdflastpg; /* byte of last page mark */ + size_t pdfbody; /* start of body object */ + size_t *pdfobjs; /* table of object offsets */ + size_t pdfobjsz; /* size of pdfobjs */ +}; + +static double ps_hspan(const struct termp *, + const struct roffsu *); +static size_t ps_width(const struct termp *, int); +static void ps_advance(struct termp *, size_t); +static void ps_begin(struct termp *); +static void ps_closepage(struct termp *); +static void ps_end(struct termp *); +static void ps_endline(struct termp *); +static void ps_fclose(struct termp *); +static void ps_growbuf(struct termp *, size_t); +static void ps_letter(struct termp *, int); +static void ps_pclose(struct termp *); +static void ps_pletter(struct termp *, int); +static void ps_printf(struct termp *, const char *, ...); +static void ps_putchar(struct termp *, char); +static void ps_setfont(struct termp *, enum termfont); +static struct termp *pspdf_alloc(char *); +static void pdf_obj(struct termp *, size_t); + /* * We define, for the time being, three fonts: bold, oblique/italic, and * normal (roman). The following table hard-codes the font metrics for @@ -352,44 +403,6 @@ static const struct font fonts[TERMFONT__MAX] = { } }, }; -/* These work the buffer used by the header and footer. */ -#define PS_BUFSLOP 128 - -static void -ps_growbuf(struct termp *p, size_t sz) -{ - if (p->engine.ps.psmargcur + sz <= p->engine.ps.psmargsz) - return; - - if (sz < PS_BUFSLOP) - sz = PS_BUFSLOP; - - p->engine.ps.psmargsz += sz; - - p->engine.ps.psmarg = mandoc_realloc - (p->engine.ps.psmarg, - p->engine.ps.psmargsz); -} - -static double ps_hspan(const struct termp *, - const struct roffsu *); -static size_t ps_width(const struct termp *, char); -static void ps_advance(struct termp *, size_t); -static void ps_begin(struct termp *); -static void ps_closepage(struct termp *); -static void ps_end(struct termp *); -static void ps_endline(struct termp *); -static void ps_fclose(struct termp *); -static void ps_letter(struct termp *, char); -static void ps_pclose(struct termp *); -static void ps_pletter(struct termp *, int); -static void ps_printf(struct termp *, const char *, ...); -static void ps_putchar(struct termp *, char); -static void ps_setfont(struct termp *, enum termfont); -static struct termp *pspdf_alloc(char *); -static void pdf_obj(struct termp *, size_t); - - void * pdf_alloc(char *outopts) { @@ -401,7 +414,6 @@ pdf_alloc(char *outopts) return(p); } - void * ps_alloc(char *outopts) { @@ -413,7 +425,6 @@ ps_alloc(char *outopts) return(p); } - static struct termp * pspdf_alloc(char *outopts) { @@ -423,7 +434,9 @@ pspdf_alloc(char *outopts) const char *pp; char *v; - p = term_alloc(TERMENC_ASCII); + p = mandoc_calloc(1, sizeof(struct termp)); + p->enc = TERMENC_ASCII; + p->ps = mandoc_calloc(1, sizeof(struct termp_ps)); p->advance = ps_advance; p->begin = ps_begin; @@ -482,7 +495,7 @@ pspdf_alloc(char *outopts) * calculations occur. */ - p->engine.ps.scale = 11; + p->ps->scale = 11; /* Remember millimetres -> AFM units. */ @@ -498,16 +511,16 @@ pspdf_alloc(char *outopts) /* Line-height is 1.4em. */ - lineheight = PNT2AFM(p, ((double)p->engine.ps.scale * 1.4)); + lineheight = PNT2AFM(p, ((double)p->ps->scale * 1.4)); - p->engine.ps.width = pagex; - p->engine.ps.height = pagey; - p->engine.ps.header = pagey - (marginy / 2) - (lineheight / 2); - p->engine.ps.top = pagey - marginy; - p->engine.ps.footer = (marginy / 2) - (lineheight / 2); - p->engine.ps.bottom = marginy; - p->engine.ps.left = marginx; - p->engine.ps.lineheight = lineheight; + p->ps->width = pagex; + p->ps->height = pagey; + p->ps->header = pagey - (marginy / 2) - (lineheight / 2); + p->ps->top = pagey - marginy; + p->ps->footer = (marginy / 2) - (lineheight / 2); + p->ps->bottom = marginy; + p->ps->left = marginx; + p->ps->lineheight = lineheight; p->defrmargin = pagex - (marginx * 2); return(p); @@ -521,11 +534,12 @@ pspdf_free(void *arg) p = (struct termp *)arg; - if (p->engine.ps.psmarg) - free(p->engine.ps.psmarg); - if (p->engine.ps.pdfobjs) - free(p->engine.ps.pdfobjs); + if (p->ps->psmarg) + free(p->ps->psmarg); + if (p->ps->pdfobjs) + free(p->ps->pdfobjs); + free(p->ps); term_free(p); } @@ -544,10 +558,10 @@ ps_printf(struct termp *p, const char *fmt, ...) * into our growable margin buffer. */ - if ( ! (PS_MARGINS & p->engine.ps.flags)) { + if ( ! (PS_MARGINS & p->ps->flags)) { len = vprintf(fmt, ap); va_end(ap); - p->engine.ps.pdfbytes += /* LINTED */ + p->ps->pdfbytes += /* LINTED */ len < 0 ? 0 : (size_t)len; return; } @@ -560,12 +574,12 @@ ps_printf(struct termp *p, const char *fmt, ...) ps_growbuf(p, PS_BUFSLOP); - pos = (int)p->engine.ps.psmargcur; - len = vsnprintf(&p->engine.ps.psmarg[pos], PS_BUFSLOP, fmt, ap); + pos = (int)p->ps->psmargcur; + len = vsnprintf(&p->ps->psmarg[pos], PS_BUFSLOP, fmt, ap); va_end(ap); - p->engine.ps.psmargcur = strlen(p->engine.ps.psmarg); + p->ps->psmargcur = strlen(p->ps->psmarg); } @@ -576,18 +590,18 @@ ps_putchar(struct termp *p, char c) /* See ps_printf(). */ - if ( ! (PS_MARGINS & p->engine.ps.flags)) { + if ( ! (PS_MARGINS & p->ps->flags)) { /* LINTED */ putchar(c); - p->engine.ps.pdfbytes++; + p->ps->pdfbytes++; return; } ps_growbuf(p, 2); - pos = (int)p->engine.ps.psmargcur++; - p->engine.ps.psmarg[pos++] = c; - p->engine.ps.psmarg[pos] = '\0'; + pos = (int)p->ps->psmargcur++; + p->ps->psmarg[pos++] = c; + p->ps->psmarg[pos] = '\0'; } @@ -597,18 +611,18 @@ pdf_obj(struct termp *p, size_t obj) assert(obj > 0); - if ((obj - 1) >= p->engine.ps.pdfobjsz) { - p->engine.ps.pdfobjsz = obj + 128; - p->engine.ps.pdfobjs = realloc - (p->engine.ps.pdfobjs, - p->engine.ps.pdfobjsz * sizeof(size_t)); - if (NULL == p->engine.ps.pdfobjs) { + if ((obj - 1) >= p->ps->pdfobjsz) { + p->ps->pdfobjsz = obj + 128; + p->ps->pdfobjs = realloc + (p->ps->pdfobjs, + p->ps->pdfobjsz * sizeof(size_t)); + if (NULL == p->ps->pdfobjs) { perror(NULL); exit((int)MANDOCLEVEL_SYSERR); } } - p->engine.ps.pdfobjs[(int)obj - 1] = p->engine.ps.pdfbytes; + p->ps->pdfobjs[(int)obj - 1] = p->ps->pdfbytes; ps_printf(p, "%zu 0 obj\n", obj); } @@ -626,14 +640,14 @@ ps_closepage(struct termp *p) * for the page contents. */ - assert(p->engine.ps.psmarg && p->engine.ps.psmarg[0]); - ps_printf(p, "%s", p->engine.ps.psmarg); + assert(p->ps->psmarg && p->ps->psmarg[0]); + ps_printf(p, "%s", p->ps->psmarg); if (TERMTYPE_PS != p->type) { ps_printf(p, "ET\n"); - len = p->engine.ps.pdfbytes - p->engine.ps.pdflastpg; - base = p->engine.ps.pages * 4 + p->engine.ps.pdfbody; + len = p->ps->pdfbytes - p->ps->pdflastpg; + base = p->ps->pages * 4 + p->ps->pdfbody; ps_printf(p, "endstream\nendobj\n"); @@ -660,10 +674,10 @@ ps_closepage(struct termp *p) } else ps_printf(p, "showpage\n"); - p->engine.ps.pages++; - p->engine.ps.psrow = p->engine.ps.top; - assert( ! (PS_NEWPAGE & p->engine.ps.flags)); - p->engine.ps.flags |= PS_NEWPAGE; + p->ps->pages++; + p->ps->psrow = p->ps->top; + assert( ! (PS_NEWPAGE & p->ps->flags)); + p->ps->flags |= PS_NEWPAGE; } @@ -679,15 +693,15 @@ ps_end(struct termp *p) * well as just one. */ - if ( ! (PS_NEWPAGE & p->engine.ps.flags)) { - assert(0 == p->engine.ps.flags); - assert('\0' == p->engine.ps.last); + if ( ! (PS_NEWPAGE & p->ps->flags)) { + assert(0 == p->ps->flags); + assert('\0' == p->ps->last); ps_closepage(p); } if (TERMTYPE_PS == p->type) { ps_printf(p, "%%%%Trailer\n"); - ps_printf(p, "%%%%Pages: %zu\n", p->engine.ps.pages); + ps_printf(p, "%%%%Pages: %zu\n", p->ps->pages); ps_printf(p, "%%%%EOF\n"); return; } @@ -695,18 +709,18 @@ ps_end(struct termp *p) pdf_obj(p, 2); ps_printf(p, "<<\n/Type /Pages\n"); ps_printf(p, "/MediaBox [0 0 %zu %zu]\n", - (size_t)AFM2PNT(p, p->engine.ps.width), - (size_t)AFM2PNT(p, p->engine.ps.height)); + (size_t)AFM2PNT(p, p->ps->width), + (size_t)AFM2PNT(p, p->ps->height)); - ps_printf(p, "/Count %zu\n", p->engine.ps.pages); + ps_printf(p, "/Count %zu\n", p->ps->pages); ps_printf(p, "/Kids ["); - for (i = 0; i < p->engine.ps.pages; i++) + for (i = 0; i < p->ps->pages; i++) ps_printf(p, " %zu 0 R", i * 4 + - p->engine.ps.pdfbody + 3); + p->ps->pdfbody + 3); - base = (p->engine.ps.pages - 1) * 4 + - p->engine.ps.pdfbody + 4; + base = (p->ps->pages - 1) * 4 + + p->ps->pdfbody + 4; ps_printf(p, "]\n>>\nendobj\n"); pdf_obj(p, base); @@ -714,14 +728,14 @@ ps_end(struct termp *p) ps_printf(p, "/Type /Catalog\n"); ps_printf(p, "/Pages 2 0 R\n"); ps_printf(p, ">>\n"); - xref = p->engine.ps.pdfbytes; + xref = p->ps->pdfbytes; ps_printf(p, "xref\n"); ps_printf(p, "0 %zu\n", base + 1); ps_printf(p, "0000000000 65535 f \n"); for (i = 0; i < base; i++) ps_printf(p, "%.10zu 00000 n \n", - p->engine.ps.pdfobjs[(int)i]); + p->ps->pdfobjs[(int)i]); ps_printf(p, "trailer\n"); ps_printf(p, "<<\n"); @@ -746,33 +760,33 @@ ps_begin(struct termp *p) * screen yet, so we don't need to initialise the primary state. */ - if (p->engine.ps.psmarg) { - assert(p->engine.ps.psmargsz); - p->engine.ps.psmarg[0] = '\0'; + if (p->ps->psmarg) { + assert(p->ps->psmargsz); + p->ps->psmarg[0] = '\0'; } - /*p->engine.ps.pdfbytes = 0;*/ - p->engine.ps.psmargcur = 0; - p->engine.ps.flags = PS_MARGINS; - p->engine.ps.pscol = p->engine.ps.left; - p->engine.ps.psrow = p->engine.ps.header; + /*p->ps->pdfbytes = 0;*/ + p->ps->psmargcur = 0; + p->ps->flags = PS_MARGINS; + p->ps->pscol = p->ps->left; + p->ps->psrow = p->ps->header; ps_setfont(p, TERMFONT_NONE); (*p->headf)(p, p->argf); (*p->endline)(p); - p->engine.ps.pscol = p->engine.ps.left; - p->engine.ps.psrow = p->engine.ps.footer; + p->ps->pscol = p->ps->left; + p->ps->psrow = p->ps->footer; (*p->footf)(p, p->argf); (*p->endline)(p); - p->engine.ps.flags &= ~PS_MARGINS; + p->ps->flags &= ~PS_MARGINS; - assert(0 == p->engine.ps.flags); - assert(p->engine.ps.psmarg); - assert('\0' != p->engine.ps.psmarg[0]); + assert(0 == p->ps->flags); + assert(p->ps->psmarg); + assert('\0' != p->ps->psmarg[0]); /* * Print header and initialise page state. Following this, @@ -790,8 +804,8 @@ ps_begin(struct termp *p) ps_printf(p, "%%%%PageOrder: Ascend\n"); ps_printf(p, "%%%%DocumentMedia: " "Default %zu %zu 0 () ()\n", - (size_t)AFM2PNT(p, p->engine.ps.width), - (size_t)AFM2PNT(p, p->engine.ps.height)); + (size_t)AFM2PNT(p, p->ps->width), + (size_t)AFM2PNT(p, p->ps->height)); ps_printf(p, "%%%%DocumentNeededResources: font"); for (i = 0; i < (int)TERMFONT__MAX; i++) @@ -816,10 +830,10 @@ ps_begin(struct termp *p) } } - p->engine.ps.pdfbody = (size_t)TERMFONT__MAX + 3; - p->engine.ps.pscol = p->engine.ps.left; - p->engine.ps.psrow = p->engine.ps.top; - p->engine.ps.flags |= PS_NEWPAGE; + p->ps->pdfbody = (size_t)TERMFONT__MAX + 3; + p->ps->pscol = p->ps->left; + p->ps->psrow = p->ps->top; + p->ps->flags |= PS_NEWPAGE; ps_setfont(p, TERMFONT_NONE); } @@ -834,25 +848,25 @@ ps_pletter(struct termp *p, int c) * in a new page and make sure the font is correctly set. */ - if (PS_NEWPAGE & p->engine.ps.flags) { + if (PS_NEWPAGE & p->ps->flags) { if (TERMTYPE_PS == p->type) { ps_printf(p, "%%%%Page: %zu %zu\n", - p->engine.ps.pages + 1, - p->engine.ps.pages + 1); + p->ps->pages + 1, + p->ps->pages + 1); ps_printf(p, "/%s %zu selectfont\n", - fonts[(int)p->engine.ps.lastf].name, - p->engine.ps.scale); + fonts[(int)p->ps->lastf].name, + p->ps->scale); } else { - pdf_obj(p, p->engine.ps.pdfbody + - p->engine.ps.pages * 4); + pdf_obj(p, p->ps->pdfbody + + p->ps->pages * 4); ps_printf(p, "<<\n"); ps_printf(p, "/Length %zu 0 R\n", - p->engine.ps.pdfbody + 1 + - p->engine.ps.pages * 4); + p->ps->pdfbody + 1 + + p->ps->pages * 4); ps_printf(p, ">>\nstream\n"); } - p->engine.ps.pdflastpg = p->engine.ps.pdfbytes; - p->engine.ps.flags &= ~PS_NEWPAGE; + p->ps->pdflastpg = p->ps->pdfbytes; + p->ps->flags &= ~PS_NEWPAGE; } /* @@ -860,22 +874,22 @@ ps_pletter(struct termp *p, int c) * now at the current cursor. */ - if ( ! (PS_INLINE & p->engine.ps.flags)) { + if ( ! (PS_INLINE & p->ps->flags)) { if (TERMTYPE_PS != p->type) { ps_printf(p, "BT\n/F%d %zu Tf\n", - (int)p->engine.ps.lastf, - p->engine.ps.scale); + (int)p->ps->lastf, + p->ps->scale); ps_printf(p, "%.3f %.3f Td\n(", - AFM2PNT(p, p->engine.ps.pscol), - AFM2PNT(p, p->engine.ps.psrow)); + AFM2PNT(p, p->ps->pscol), + AFM2PNT(p, p->ps->psrow)); } else ps_printf(p, "%.3f %.3f moveto\n(", - AFM2PNT(p, p->engine.ps.pscol), - AFM2PNT(p, p->engine.ps.psrow)); - p->engine.ps.flags |= PS_INLINE; + AFM2PNT(p, p->ps->pscol), + AFM2PNT(p, p->ps->psrow)); + p->ps->flags |= PS_INLINE; } - assert( ! (PS_NEWPAGE & p->engine.ps.flags)); + assert( ! (PS_NEWPAGE & p->ps->flags)); /* * We need to escape these characters as per the PostScript @@ -898,17 +912,17 @@ ps_pletter(struct termp *p, int c) /* Write the character and adjust where we are on the page. */ - f = (int)p->engine.ps.lastf; + f = (int)p->ps->lastf; if (c <= 32 || (c - 32 >= MAXCHAR)) { ps_putchar(p, ' '); - p->engine.ps.pscol += (size_t)fonts[f].gly[0].wx; + p->ps->pscol += (size_t)fonts[f].gly[0].wx; return; } ps_putchar(p, (char)c); c -= 32; - p->engine.ps.pscol += (size_t)fonts[f].gly[c].wx; + p->ps->pscol += (size_t)fonts[f].gly[c].wx; } @@ -922,7 +936,7 @@ ps_pclose(struct termp *p) * or anything). */ - if ( ! (PS_INLINE & p->engine.ps.flags)) + if ( ! (PS_INLINE & p->ps->flags)) return; if (TERMTYPE_PS != p->type) { @@ -930,7 +944,7 @@ ps_pclose(struct termp *p) } else ps_printf(p, ") show\n"); - p->engine.ps.flags &= ~PS_INLINE; + p->ps->flags &= ~PS_INLINE; } @@ -946,16 +960,16 @@ ps_fclose(struct termp *p) * Following this, close out any scope that's open. */ - if ('\0' != p->engine.ps.last) { - if (p->engine.ps.lastf != TERMFONT_NONE) { + if ('\0' != p->ps->last) { + if (p->ps->lastf != TERMFONT_NONE) { ps_pclose(p); ps_setfont(p, TERMFONT_NONE); } - ps_pletter(p, p->engine.ps.last); - p->engine.ps.last = '\0'; + ps_pletter(p, p->ps->last); + p->ps->last = '\0'; } - if ( ! (PS_INLINE & p->engine.ps.flags)) + if ( ! (PS_INLINE & p->ps->flags)) return; ps_pclose(p); @@ -963,9 +977,12 @@ ps_fclose(struct termp *p) static void -ps_letter(struct termp *p, char c) +ps_letter(struct termp *p, int arg) { - char cc; + char cc, c; + + /* LINTED */ + c = arg >= 128 || arg <= 0 ? '?' : arg; /* * State machine dictates whether to buffer the last character @@ -976,33 +993,33 @@ ps_letter(struct termp *p, char c) * regular character and a regular buffer character. */ - if ('\0' == p->engine.ps.last) { + if ('\0' == p->ps->last) { assert(8 != c); - p->engine.ps.last = c; + p->ps->last = c; return; - } else if (8 == p->engine.ps.last) { + } else if (8 == p->ps->last) { assert(8 != c); - p->engine.ps.last = '\0'; + p->ps->last = '\0'; } else if (8 == c) { - assert(8 != p->engine.ps.last); - if ('_' == p->engine.ps.last) { - if (p->engine.ps.lastf != TERMFONT_UNDER) { + assert(8 != p->ps->last); + if ('_' == p->ps->last) { + if (p->ps->lastf != TERMFONT_UNDER) { ps_pclose(p); ps_setfont(p, TERMFONT_UNDER); } - } else if (p->engine.ps.lastf != TERMFONT_BOLD) { + } else if (p->ps->lastf != TERMFONT_BOLD) { ps_pclose(p); ps_setfont(p, TERMFONT_BOLD); } - p->engine.ps.last = c; + p->ps->last = c; return; } else { - if (p->engine.ps.lastf != TERMFONT_NONE) { + if (p->ps->lastf != TERMFONT_NONE) { ps_pclose(p); ps_setfont(p, TERMFONT_NONE); } - cc = p->engine.ps.last; - p->engine.ps.last = c; + cc = p->ps->last; + p->ps->last = c; c = cc; } @@ -1022,7 +1039,7 @@ ps_advance(struct termp *p, size_t len) */ ps_fclose(p); - p->engine.ps.pscol += len; + p->ps->pscol += len; } @@ -1040,16 +1057,16 @@ ps_endline(struct termp *p) * lines, we'll do nasty stuff. */ - if (PS_MARGINS & p->engine.ps.flags) + if (PS_MARGINS & p->ps->flags) return; /* Left-justify. */ - p->engine.ps.pscol = p->engine.ps.left; + p->ps->pscol = p->ps->left; /* If we haven't printed anything, return. */ - if (PS_NEWPAGE & p->engine.ps.flags) + if (PS_NEWPAGE & p->ps->flags) return; /* @@ -1057,9 +1074,9 @@ ps_endline(struct termp *p) * showpage and restart our row. */ - if (p->engine.ps.psrow >= p->engine.ps.lineheight + - p->engine.ps.bottom) { - p->engine.ps.psrow -= p->engine.ps.lineheight; + if (p->ps->psrow >= p->ps->lineheight + + p->ps->bottom) { + p->ps->psrow -= p->ps->lineheight; return; } @@ -1072,37 +1089,37 @@ ps_setfont(struct termp *p, enum termfont f) { assert(f < TERMFONT__MAX); - p->engine.ps.lastf = f; + p->ps->lastf = f; /* * If we're still at the top of the page, let the font-setting * be delayed until we actually have stuff to print. */ - if (PS_NEWPAGE & p->engine.ps.flags) + if (PS_NEWPAGE & p->ps->flags) return; if (TERMTYPE_PS == p->type) ps_printf(p, "/%s %zu selectfont\n", fonts[(int)f].name, - p->engine.ps.scale); + p->ps->scale); else ps_printf(p, "/F%d %zu Tf\n", (int)f, - p->engine.ps.scale); + p->ps->scale); } /* ARGSUSED */ static size_t -ps_width(const struct termp *p, char c) +ps_width(const struct termp *p, int c) { if (c <= 32 || c - 32 >= MAXCHAR) return((size_t)fonts[(int)TERMFONT_NONE].gly[0].wx); c -= 32; - return((size_t)fonts[(int)TERMFONT_NONE].gly[(int)c].wx); + return((size_t)fonts[(int)TERMFONT_NONE].gly[c].wx); } @@ -1141,7 +1158,7 @@ ps_hspan(const struct termp *p, const struct roffsu *su) fonts[(int)TERMFONT_NONE].gly[110 - 32].wx; break; case (SCALE_VS): - r = su->scale * p->engine.ps.lineheight; + r = su->scale * p->ps->lineheight; break; default: r = su->scale; @@ -1151,3 +1168,18 @@ ps_hspan(const struct termp *p, const struct roffsu *su) return(r); } +static void +ps_growbuf(struct termp *p, size_t sz) +{ + if (p->ps->psmargcur + sz <= p->ps->psmargsz) + return; + + if (sz < PS_BUFSLOP) + sz = PS_BUFSLOP; + + p->ps->psmargsz += sz; + + p->ps->psmarg = mandoc_realloc + (p->ps->psmarg, p->ps->psmargsz); +} + diff --git a/contrib/mdocml/test-mmap.c b/contrib/mdocml/test-mmap.c new file mode 100644 index 0000000000..db8fd8732e --- /dev/null +++ b/contrib/mdocml/test-mmap.c @@ -0,0 +1,10 @@ +#include +#include + +int +main(int argc, char **argv) +{ + + mmap(0, 0, PROT_READ, MAP_FILE|MAP_SHARED, -1, 0); + return 0; +} -- 2.41.0