From a4c7eb570f02f94484dd27694bc4c315771426d0 Mon Sep 17 00:00:00 2001
From: Sascha Wildner <saw@online.de>
Date: Thu, 2 Jun 2011 20:31:59 +0200
Subject: [PATCH] Import mdocml-1.11.3

---
 contrib/mdocml/Makefile          |  89 ++-
 contrib/mdocml/att.in            |  30 +-
 contrib/mdocml/chars.c           | 127 ++---
 contrib/mdocml/chars.in          |  75 +--
 contrib/mdocml/example.style.css |  32 +-
 contrib/mdocml/html.c            | 341 +++++-------
 contrib/mdocml/html.h            |  18 +-
 contrib/mdocml/index.css         |  61 +-
 contrib/mdocml/index.sgml        | 744 ++++++++++++-------------
 contrib/mdocml/libmandoc.h       |   4 +-
 contrib/mdocml/libmdoc.h         |  30 +-
 contrib/mdocml/main.c            |  23 +-
 contrib/mdocml/main.h            |   4 +-
 contrib/mdocml/makewhatis.1      | 152 +++++
 contrib/mdocml/makewhatis.c      | 920 +++++++++++++++++++++++++++++++
 contrib/mdocml/man.7             |  16 +-
 contrib/mdocml/man_html.c        |  14 +-
 contrib/mdocml/man_macro.c       |   2 +-
 contrib/mdocml/man_term.c        |  11 +-
 contrib/mdocml/man_validate.c    |  67 +--
 contrib/mdocml/mandoc.1          |  44 +-
 contrib/mdocml/mandoc.3          | 191 ++++++-
 contrib/mdocml/mandoc.c          | 485 +++++++++++-----
 contrib/mdocml/mandoc.h          |  27 +-
 contrib/mdocml/mandoc_char.7     |  32 +-
 contrib/mdocml/mdoc.7            | 163 +++---
 contrib/mdocml/mdoc_argv.c       | 377 ++++++-------
 contrib/mdocml/mdoc_html.c       |  69 ++-
 contrib/mdocml/mdoc_macro.c      |  30 +-
 contrib/mdocml/mdoc_term.c       |  11 +-
 contrib/mdocml/mdoc_validate.c   |  38 +-
 contrib/mdocml/out.c             | 239 +-------
 contrib/mdocml/out.h             |  32 +-
 contrib/mdocml/preconv.1         | 161 ++++++
 contrib/mdocml/preconv.c         | 522 ++++++++++++++++++
 contrib/mdocml/predefs.in        |  65 +++
 contrib/mdocml/read.c            |  24 +-
 contrib/mdocml/roff.7            |  25 +-
 contrib/mdocml/roff.c            | 192 +++----
 contrib/mdocml/st.in             |  50 +-
 contrib/mdocml/style.css         |  75 ++-
 contrib/mdocml/tbl.c             |   6 +-
 contrib/mdocml/tbl_layout.c      |  33 +-
 contrib/mdocml/tbl_opts.c        |   6 +-
 contrib/mdocml/term.c            | 313 ++++++-----
 contrib/mdocml/term.h            |  52 +-
 contrib/mdocml/term_ascii.c      | 132 ++++-
 contrib/mdocml/term_ps.c         | 380 +++++++------
 contrib/mdocml/test-mmap.c       |  10 +
 49 files changed, 4298 insertions(+), 2246 deletions(-)
 create mode 100644 contrib/mdocml/makewhatis.1
 create mode 100644 contrib/mdocml/makewhatis.c
 create mode 100644 contrib/mdocml/preconv.1
 create mode 100644 contrib/mdocml/preconv.c
 create mode 100644 contrib/mdocml/predefs.in
 create mode 100644 contrib/mdocml/test-mmap.c

diff --git a/contrib/mdocml/Makefile b/contrib/mdocml/Makefile
index 3cc7daecb4..c535f105b9 100644
--- a/contrib/mdocml/Makefile
+++ b/contrib/mdocml/Makefile
@@ -11,9 +11,14 @@
 # in the lower-left hand corner of -mdoc manuals.
 # CFLAGS	+= -DOSNAME="\"OpenBSD 4.5\""
 
-VERSION		 = 1.11.1
-VDATE		 = 04 April 2011
-CFLAGS		+= -g -DHAVE_CONFIG_H -DVERSION="\"$(VERSION)\""
+VERSION		 = 1.11.3
+VDATE		 = 26 May 2011
+# IFF your system supports multi-byte functions (setlocale(), wcwidth(),
+# putwchar()) AND has __STDC_ISO_10646__ (that is, wchar_t is simply a
+# UCS-4 value) should you define USE_WCHAR.  If you define it and your
+# system DOESN'T support this, -Tlocale will produce garbage.
+# If you don't define it, -Tlocale is a synonym for -Tacsii.
+CFLAGS		+= -g -DUSE_WCHAR -DHAVE_CONFIG_H -DVERSION="\"$(VERSION)\""
 CFLAGS     	+= -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings
 PREFIX		 = /usr/local
 BINDIR		 = $(PREFIX)/bin
@@ -27,7 +32,7 @@ INSTALL_DATA	 = $(INSTALL) -m 0444
 INSTALL_LIB	 = $(INSTALL) -m 0644
 INSTALL_MAN	 = $(INSTALL_DATA)
 
-all: mandoc
+all: mandoc preconv
 
 SRCS		 = Makefile \
 		   arch.c \
@@ -67,8 +72,8 @@ SRCS		 = Makefile \
 		   mandoc.3 \
 		   mandoc.c \
 		   mandoc.h \
-		   mandoc-db.1 \
-		   mandoc-db.c \
+		   makewhatis.1 \
+		   makewhatis.c \
 		   mandoc_char.7 \
 		   mdoc.h \
 		   mdoc.7 \
@@ -83,6 +88,9 @@ SRCS		 = Makefile \
 		   msec.in \
 		   out.c \
 		   out.h \
+		   preconv.1 \
+		   preconv.c \
+		   predefs.in \
 		   read.c \
 		   roff.7 \
 		   roff.c \
@@ -100,6 +108,7 @@ SRCS		 = Makefile \
 		   term.h \
 		   term_ascii.c \
 		   term_ps.c \
+		   test-mmap.c \
 		   test-strlcat.c \
 		   test-strlcpy.c \
 		   tree.c \
@@ -154,18 +163,22 @@ LIBROFF_LNS	 = eqn.ln \
 LIBMANDOC_OBJS	 = $(LIBMAN_OBJS) \
 		   $(LIBMDOC_OBJS) \
 		   $(LIBROFF_OBJS) \
+		   chars.o \
 		   mandoc.o \
 		   read.o
 LIBMANDOC_LNS	 = $(LIBMAN_LNS) \
 		   $(LIBMDOC_LNS) \
 		   $(LIBROFF_LNS) \
+		   chars.ln \
 		   mandoc.ln \
 		   read.ln
 
 arch.o arch.ln: arch.in
 att.o att.ln: att.in
+chars.o chars.ln: chars.in
 lib.o lib.ln: lib.in
 msec.o msec.ln: msec.in
+roff.o roff.ln: predefs.in
 st.o st.ln: st.in
 vol.o vol.ln: vol.in
 
@@ -198,31 +211,37 @@ MANDOC_TERM_LNS	 = man_term.ln \
 
 MANDOC_OBJS	 = $(MANDOC_HTML_OBJS) \
 		   $(MANDOC_TERM_OBJS) \
-		   chars.o \
 		   main.o \
 		   out.o \
 		   tree.o
 MANDOC_LNS	 = $(MANDOC_HTML_LNS) \
 		   $(MANDOC_TERM_LNS) \
-		   chars.ln \
 		   main.ln \
 		   out.ln \
 		   tree.ln
 
-chars.o chars.ln: chars.in
-
 $(MANDOC_HTML_OBJS) $(MANDOC_HTML_LNS): html.h
 $(MANDOC_TERM_OBJS) $(MANDOC_TERM_LNS): term.h
 $(MANDOC_OBJS) $(MANDOC_LNS): main.h mandoc.h mdoc.h man.h config.h out.h
 
 compat.o compat.ln: config.h
 
-MANDOCDB_OBJS	 = mandoc-db.o
-MANDOCDB_LNS	 = mandoc-db.ln
+MAKEWHATIS_OBJS	 = makewhatis.o
+MAKEWHATIS_LNS	 = makewhatis.ln
+
+$(MAKEWHATIS_OBJS) $(MAKEWHATIS_LNS): mandoc.h mdoc.h man.h config.h
 
-$(MANDOCDB_OBJS) $(MANDOCDB_LNS): mandoc.h mdoc.h man.h config.h
+PRECONV_OBJS	 = preconv.o
+PRECONV_LNS	 = preconv.ln
 
-INDEX_MANS	 = mandoc.1.html \
+$(PRECONV_OBJS) $(PRECONV_LNS): config.h
+
+INDEX_MANS	 = makewhatis.1.html \
+		   makewhatis.1.xhtml \
+		   makewhatis.1.ps \
+		   makewhatis.1.pdf \
+		   makewhatis.1.txt \
+		   mandoc.1.html \
 		   mandoc.1.xhtml \
 		   mandoc.1.ps \
 		   mandoc.1.pdf \
@@ -252,6 +271,11 @@ INDEX_MANS	 = mandoc.1.html \
 		   mdoc.7.ps \
 		   mdoc.7.pdf \
 		   mdoc.7.txt \
+		   preconv.1.html \
+		   preconv.1.xhtml \
+		   preconv.1.ps \
+		   preconv.1.pdf \
+		   preconv.1.txt \
 		   roff.7.html \
 		   roff.7.xhtml \
 		   roff.7.ps \
@@ -274,13 +298,15 @@ INDEX_OBJS	 = $(INDEX_MANS) \
 
 www: index.html
 
-lint: llib-llibmandoc.ln llib-lmandoc.ln
+lint: llib-llibmandoc.ln llib-lmandoc.ln llib-lpreconv.ln
 
 clean:
 	rm -f libmandoc.a $(LIBMANDOC_OBJS)
 	rm -f llib-llibmandoc.ln $(LIBMANDOC_LNS)
-	rm -f mandoc-db $(MANDOCDB_OBJS)
-	rm -f llib-lmandoc-db.ln $(MANDOCDB_LNS)
+	rm -f makewhatis $(MAKEWHATIS_OBJS)
+	rm -f llib-lmakewhatis.ln $(MAKEWHATIS_LNS)
+	rm -f preconv $(PRECONV_OBJS)
+	rm -f llib-lpreconv.ln $(PRECONV_LNS)
 	rm -f mandoc $(MANDOC_OBJS)
 	rm -f llib-lmandoc.ln $(MANDOC_LNS)
 	rm -f config.h config.log compat.o compat.ln
@@ -290,12 +316,15 @@ clean:
 install: all
 	mkdir -p $(DESTDIR)$(BINDIR)
 	mkdir -p $(DESTDIR)$(EXAMPLEDIR)
+	mkdir -p $(DESTDIR)$(LIBDIR)
+	mkdir -p $(DESTDIR)$(INCLUDEDIR)
 	mkdir -p $(DESTDIR)$(MANDIR)/man1
 	mkdir -p $(DESTDIR)$(MANDIR)/man3
 	mkdir -p $(DESTDIR)$(MANDIR)/man7
-	$(INSTALL_PROGRAM) mandoc $(DESTDIR)$(BINDIR)
+	$(INSTALL_PROGRAM) mandoc preconv $(DESTDIR)$(BINDIR)
 	$(INSTALL_LIB) libmandoc.a $(DESTDIR)$(LIBDIR)
-	$(INSTALL_MAN) mandoc.1 $(DESTDIR)$(MANDIR)/man1
+	$(INSTALL_LIB) mandoc.h $(DESTDIR)$(INCLUDEDIR)
+	$(INSTALL_MAN) mandoc.1 preconv.1 $(DESTDIR)$(MANDIR)/man1
 	$(INSTALL_MAN) mandoc.3 $(DESTDIR)$(MANDIR)/man3
 	$(INSTALL_MAN) man.7 mdoc.7 roff.7 eqn.7 tbl.7 mandoc_char.7 $(DESTDIR)$(MANDIR)/man7
 	$(INSTALL_DATA) example.style.css $(DESTDIR)$(EXAMPLEDIR)
@@ -319,15 +348,21 @@ llib-llibmandoc.ln: compat.ln $(LIBMANDOC_LNS)
 mandoc: $(MANDOC_OBJS) libmandoc.a
 	$(CC) -o $@ $(MANDOC_OBJS) libmandoc.a
 
-# You'll need -ldb for Linux.
-mandoc-db: $(MANDOCDB_OBJS) libmandoc.a
-	$(CC) -o $@ $(MANDOCDB_OBJS) libmandoc.a
-
 llib-lmandoc.ln: $(MANDOC_LNS)
 	$(LINT) $(LINTFLAGS) -Cmandoc $(MANDOC_LNS)
 
-llib-lmandoc-db.ln: $(MANDOCDB_LNS)
-	$(LINT) $(LINTFLAGS) -Cmandoc-db $(MANDOCDB_LNS)
+# You'll need -ldb for Linux.
+makewhatis: $(MAKEWHATIS_OBJS) libmandoc.a
+	$(CC) -o $@ $(MAKEWHATIS_OBJS) libmandoc.a
+
+llib-lmakewhatis.ln: $(MAKEWHATIS_LNS)
+	$(LINT) $(LINTFLAGS) -Cmakewhatis $(MAKEWHATIS_LNS)
+
+preconv: $(PRECONV_OBJS)
+	$(CC) -o $@ $(PRECONV_OBJS)
+
+llib-lpreconv.ln: $(PRECONV_LNS)
+	$(LINT) $(LINTFLAGS) -Cpreconv $(PRECONV_LNS)
 
 mdocml.md5: mdocml.tar.gz
 	md5 mdocml.tar.gz >$@
@@ -348,6 +383,10 @@ config.h: config.h.pre config.h.post
 		echo '#define HAVE_STRLCAT'; \
 		rm test-strlcat; \
 	  fi; \
+	  if $(CC) $(CFLAGS) -Werror -o test-mmap test-mmap.c >> config.log 2>&1; then \
+		echo '#define HAVE_MMAP'; \
+		rm test-mmap; \
+	  fi; \
 	  if $(CC) $(CFLAGS) -Werror -o test-strlcpy test-strlcpy.c >> config.log 2>&1; then \
 		echo '#define HAVE_STRLCPY'; \
 		rm test-strlcpy; \
diff --git a/contrib/mdocml/att.in b/contrib/mdocml/att.in
index 48fcd30b99..95af2ef22f 100644
--- a/contrib/mdocml/att.in
+++ b/contrib/mdocml/att.in
@@ -1,4 +1,4 @@
-/*	$Id: att.in,v 1.6 2010/06/19 20:46:27 kristaps Exp $ */
+/*	$Id: att.in,v 1.7 2011/04/24 17:56:44 schwarze Exp $ */
 /*
  * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -20,18 +20,20 @@
  * isn't going to change.  The right-hand side is the formatted string.
  *
  * Be sure to escape strings.
+ * The non-breaking blanks prevent ending an output line right before
+ * a number.  Groff prevent line breaks at the same places.
  */
 
-LINE("v1",		"Version 1 AT&T UNIX")
-LINE("v2",		"Version 2 AT&T UNIX")
-LINE("v3",		"Version 3 AT&T UNIX")
-LINE("v4",		"Version 4 AT&T UNIX")
-LINE("v5",		"Version 5 AT&T UNIX")
-LINE("v6",		"Version 6 AT&T UNIX")
-LINE("v7",		"Version 7 AT&T UNIX")
-LINE("32v",		"Version 32V AT&T UNIX")
-LINE("V",		"AT&T System V UNIX")
-LINE("V.1",		"AT&T System V.1 UNIX")
-LINE("V.2",		"AT&T System V.2 UNIX")
-LINE("V.3",		"AT&T System V.3 UNIX")
-LINE("V.4",		"AT&T System V.4 UNIX")
+LINE("v1",		"Version\\~1 AT&T UNIX")
+LINE("v2",		"Version\\~2 AT&T UNIX")
+LINE("v3",		"Version\\~3 AT&T UNIX")
+LINE("v4",		"Version\\~4 AT&T UNIX")
+LINE("v5",		"Version\\~5 AT&T UNIX")
+LINE("v6",		"Version\\~6 AT&T UNIX")
+LINE("v7",		"Version\\~7 AT&T UNIX")
+LINE("32v",		"Version\\~32V AT&T UNIX")
+LINE("V",		"AT&T System\\~V UNIX")
+LINE("V.1",		"AT&T System\\~V Release\\~1 UNIX")
+LINE("V.2",		"AT&T System\\~V Release\\~2 UNIX")
+LINE("V.3",		"AT&T System\\~V Release\\~3 UNIX")
+LINE("V.4",		"AT&T System\\~V Release\\~4 UNIX")
diff --git a/contrib/mdocml/chars.c b/contrib/mdocml/chars.c
index 03e44910d8..5158612a32 100644
--- a/contrib/mdocml/chars.c
+++ b/contrib/mdocml/chars.c
@@ -1,4 +1,4 @@
-/*	$Id: chars.c,v 1.34 2011/03/22 10:13:01 kristaps Exp $ */
+/*	$Id: chars.c,v 1.46 2011/05/24 21:31:23 kristaps Exp $ */
 /*
  * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -20,12 +20,13 @@
 #endif
 
 #include <assert.h>
+#include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "mandoc.h"
-#include "out.h"
+#include "libmandoc.h"
 
 #define	PRINT_HI	 126
 #define	PRINT_LO	 32
@@ -35,52 +36,37 @@ struct	ln {
 	const char	 *code;
 	const char	 *ascii;
 	int		  unicode;
-	int		  type;
-#define	CHARS_CHAR	 (1 << 0)
-#define	CHARS_STRING	 (1 << 1)
-#define CHARS_BOTH	 (CHARS_CHAR | CHARS_STRING)
 };
 
-#define	LINES_MAX	  351
+#define	LINES_MAX	  325
 
 #define CHAR(in, ch, code) \
-	{ NULL, (in), (ch), (code), CHARS_CHAR },
-#define STRING(in, ch, code) \
-	{ NULL, (in), (ch), (code), CHARS_STRING },
-#define BOTH(in, ch, code) \
-	{ NULL, (in), (ch), (code), CHARS_BOTH },
+	{ NULL, (in), (ch), (code) },
 
 #define	CHAR_TBL_START	  static struct ln lines[LINES_MAX] = {
 #define	CHAR_TBL_END	  };
 
 #include "chars.in"
 
-struct	ctab {
-	enum chars	  type;
+struct	mchars {
 	struct ln	**htab;
 };
 
-static	inline int	  match(const struct ln *,
-				const char *, size_t, int);
-static	const struct ln	 *find(struct ctab *, const char *, size_t, int);
-
+static	inline int	  match(const struct ln *, const char *, size_t);
+static	const struct ln	 *find(struct mchars *, const char *, size_t);
 
 void
-chars_free(void *arg)
+mchars_free(struct mchars *arg)
 {
-	struct ctab	*tab;
-
-	tab = (struct ctab *)arg;
 
-	free(tab->htab);
-	free(tab);
+	free(arg->htab);
+	free(arg);
 }
 
-
-void *
-chars_init(enum chars type)
+struct mchars *
+mchars_alloc(void)
 {
-	struct ctab	 *tab;
+	struct mchars	 *tab;
 	struct ln	**htab;
 	struct ln	 *pp;
 	int		  i, hash;
@@ -92,7 +78,7 @@ chars_init(enum chars type)
 	 * (they're in-line re-ordered during lookup).
 	 */
 
-	tab = mandoc_malloc(sizeof(struct ctab));
+	tab = mandoc_malloc(sizeof(struct mchars));
 	htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **));
 
 	for (i = 0; i < LINES_MAX; i++) {
@@ -109,7 +95,6 @@ chars_init(enum chars type)
 	}
 
 	tab->htab = htab;
-	tab->type = type;
 	return(tab);
 }
 
@@ -118,79 +103,57 @@ chars_init(enum chars type)
  * Special character to Unicode codepoint.
  */
 int
-chars_spec2cp(void *arg, const char *p, size_t sz)
-{
-	const struct ln	*ln;
-
-	ln = find((struct ctab *)arg, p, sz, CHARS_CHAR);
-	if (NULL == ln)
-		return(-1);
-	return(ln->unicode);
-}
-
-
-/* 
- * Reserved word to Unicode codepoint.
- */
-int
-chars_res2cp(void *arg, const char *p, size_t sz)
+mchars_spec2cp(struct mchars *arg, const char *p, size_t sz)
 {
 	const struct ln	*ln;
 
-	ln = find((struct ctab *)arg, p, sz, CHARS_STRING);
+	ln = find(arg, p, sz);
 	if (NULL == ln)
 		return(-1);
 	return(ln->unicode);
 }
 
-
 /*
- * Numbered character to literal character,
- * represented as a null-terminated string for additional safety.
+ * Numbered character string to ASCII codepoint.
+ * This can only be a printable character (i.e., alnum, punct, space) so
+ * prevent the character from ruining our state (backspace, newline, and
+ * so on).
+ * If the character is illegal, returns '\0'.
  */
-const char *
-chars_num2char(const char *p, size_t sz)
+char
+mchars_num2char(const char *p, size_t sz)
 {
 	int		  i;
-	static char	  c[2];
 
-	if (sz > 3)
-		return(NULL);
-	i = atoi(p);
-	if (i < 0 || i > 255)
-		return(NULL);
-	c[0] = (char)i;
-	c[1] = '\0';
-	return(c);
+	if ((i = mandoc_strntou(p, sz, 10)) < 0)
+		return('\0');
+	return(isprint(i) ? i : '\0');
 }
 
-
-/* 
- * Special character to string array.
+/*
+ * Hex character string to Unicode codepoint.
+ * If the character is illegal, returns '\0'.
  */
-const char *
-chars_spec2str(void *arg, const char *p, size_t sz, size_t *rsz)
+int
+mchars_num2uc(const char *p, size_t sz)
 {
-	const struct ln	*ln;
-
-	ln = find((struct ctab *)arg, p, sz, CHARS_CHAR);
-	if (NULL == ln)
-		return(NULL);
+	int               i;
 
-	*rsz = strlen(ln->ascii);
-	return(ln->ascii);
+	if ((i = mandoc_strntou(p, sz, 16)) < 0)
+		return('\0');
+	/* FIXME: make sure we're not in a bogus range. */
+	return(i > 0x80 && i <= 0x10FFFF ? i : '\0');
 }
 
-
 /* 
- * Reserved word to string array.
+ * Special character to string array.
  */
 const char *
-chars_res2str(void *arg, const char *p, size_t sz, size_t *rsz)
+mchars_spec2str(struct mchars *arg, const char *p, size_t sz, size_t *rsz)
 {
 	const struct ln	*ln;
 
-	ln = find((struct ctab *)arg, p, sz, CHARS_STRING);
+	ln = find(arg, p, sz);
 	if (NULL == ln)
 		return(NULL);
 
@@ -198,9 +161,8 @@ chars_res2str(void *arg, const char *p, size_t sz, size_t *rsz)
 	return(ln->ascii);
 }
 
-
 static const struct ln *
-find(struct ctab *tab, const char *p, size_t sz, int type)
+find(struct mchars *tab, const char *p, size_t sz)
 {
 	struct ln	 *pp, *prev;
 	struct ln	**htab;
@@ -226,7 +188,7 @@ find(struct ctab *tab, const char *p, size_t sz, int type)
 		return(NULL);
 
 	for (prev = NULL; pp; pp = pp->next) {
-		if ( ! match(pp, p, sz, type)) {
+		if ( ! match(pp, p, sz)) {
 			prev = pp;
 			continue;
 		}
@@ -243,13 +205,10 @@ find(struct ctab *tab, const char *p, size_t sz, int type)
 	return(NULL);
 }
 
-
 static inline int
-match(const struct ln *ln, const char *p, size_t sz, int type)
+match(const struct ln *ln, const char *p, size_t sz)
 {
 
-	if ( ! (ln->type & type))
-		return(0);
 	if (strncmp(ln->code, p, sz))
 		return(0);
 	return('\0' == ln->code[(int)sz]);
diff --git a/contrib/mdocml/chars.in b/contrib/mdocml/chars.in
index f628960c2d..483a2bb828 100644
--- a/contrib/mdocml/chars.in
+++ b/contrib/mdocml/chars.in
@@ -1,4 +1,4 @@
-/*	$Id: chars.in,v 1.36 2011/03/16 22:49:55 schwarze Exp $ */
+/*	$Id: chars.in,v 1.39 2011/05/24 21:40:14 kristaps Exp $ */
 /*
  * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -16,15 +16,12 @@
  */
 
 /*
- * The ASCII translation tables.  STRING corresponds to predefined
- * strings (cf. mdoc_samples.7 and tmac/mdoc/doc-nroff).  CHAR
- * corresponds to special characters (cf. groff_char.7).  BOTH contains
- * sequences that are equivalent in both STRING and CHAR.
+ * The ASCII translation tables.  
  *
- * Either way, the left-hand side corresponds to the input sequence (\x,
- * \(xx, \*(xx and so on) whose length is listed second element.  The
- * right-hand side is what's produced by the front-end, with the fourth
- * element being its length.
+ * The left-hand side corresponds to the input sequence (\x, \(xx, \*(xx
+ * and so on) whose length is listed second element.  The right-hand
+ * side is what's produced by the front-end, with the fourth element
+ * being its length.
  *
  * XXX - C-escape strings!
  * XXX - update LINES_MAX if adding more!
@@ -36,25 +33,25 @@ static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' };
 CHAR_TBL_START
 
 /* Spacing. */
-CHAR("c",			"",		0)
+CHAR("c",			"",		8203)
 CHAR("0",			" ",		8194)
 CHAR(" ",			ascii_nbrsp,	160)
 CHAR("~",			ascii_nbrsp,	160)
-CHAR("%",			"",		0)
-CHAR("&",			"",		0)
-CHAR("^",			"",		0)
-CHAR("|",			"",		0)
-CHAR("}",			"",		0)
+CHAR("%",			"",		8203)
+CHAR("&",			"",		8203)
+CHAR("^",			"",		8203)
+CHAR("|",			"",		8203)
+CHAR("}",			"",		8203)
 
 /* Accents. */
 CHAR("a\"",			"\"",		779)
 CHAR("a-",			"-",		175)
 CHAR("a.",			".",		729)
 CHAR("a^",			"^",		770)
-BOTH("\'",			"\'",		769)
-BOTH("aa",			"\'",		769)
-BOTH("ga",			"`",		768)
-BOTH("`",			"`",		768)
+CHAR("\'",			"\'",		769)
+CHAR("aa",			"\'",		769)
+CHAR("ga",			"`",		768)
+CHAR("`",			"`",		768)
 CHAR("ab",			"`",		774)
 CHAR("ac",			",",		807)
 CHAR("ad",			"\"",		776)
@@ -68,8 +65,8 @@ CHAR("ti",			"~",		126)
 /* Quotes. */
 CHAR("Bq",			",,",		8222)
 CHAR("bq",			",",		8218)
-BOTH("lq",			"``",		8220)
-BOTH("rq",			"\'\'",		8221)
+CHAR("lq",			"``",		8220)
+CHAR("rq",			"\'\'",		8221)
 CHAR("oq",			"`",		8216)
 CHAR("cq",			"\'",		8217)
 CHAR("aq",			"\'",		39)
@@ -232,8 +229,8 @@ CHAR("<-",			"<-",		8592)
 CHAR("->",			"->",		8594)
 CHAR("<>",			"<>",		8596)
 CHAR("da",			"v",		8595)
-BOTH("ua",			"^",		8593)
-BOTH("va",			"^v",		8597)
+CHAR("ua",			"^",		8593)
+CHAR("va",			"^v",		8597)
 CHAR("lA",			"<=",		8656)
 CHAR("rA",			"=>",		8658)
 CHAR("hA",			"<=>",		8660)
@@ -270,8 +267,8 @@ CHAR("di",			"-:-",		247)
 CHAR("tdi",			"-:-",		247)
 CHAR("f/",			"/",		8260)
 CHAR("**",			"*",		8727)
-BOTH("<=",			"<=",		8804)
-BOTH(">=",			">=",		8805)
+CHAR("<=",			"<=",		8804)
+CHAR(">=",			">=",		8805)
 CHAR("<<",			"<<",		8810)
 CHAR(">>",			">>",		8811)
 CHAR("eq",			"=",		61)
@@ -348,34 +345,6 @@ CHAR("Po",			"L",		163)
 CHAR("Cs",			"x",		164)
 CHAR("Fn",			"f",		402)
 
-/* Old style. */
-STRING("Am",			"&",		38)
-STRING("Ba",			"|",		124)
-STRING("Ge",			">=",		8805)
-STRING("Gt",			">",		62)
-STRING("If",			"infinity",	0)
-STRING("Le",			"<=",		8804)
-STRING("Lq",			"``",		8220)
-STRING("Lt",			"<",		60)
-STRING("Na",			"NaN",		0)
-STRING("Ne",			"!=",		8800)
-STRING("Pi",			"pi",		960)
-STRING("Pm",			"+-",		177)
-STRING("Rq",			"\'\'",		8221)
-STRING("left-bracket",		"[",		91)
-STRING("left-parenthesis",	"(",		40)
-STRING("left-singlequote",	"`",		8216)
-STRING("lp",			"(",		40)
-STRING("q",			"\"",		34)
-STRING("quote-left",		"`",		8216)
-STRING("quote-right",		"\'",		8217)
-STRING("R",			"(R)",		174)
-STRING("right-bracket",		"]",		93)
-STRING("right-parenthesis",	")",		41)
-STRING("right-singlequote",	"\'",		8217)
-STRING("rp",			")",		41)
-STRING("Tm",			"(Tm)",		8482)
-
 /* Lines. */
 CHAR("ba",			"|",		124)
 CHAR("br",			"|",		9474)
diff --git a/contrib/mdocml/example.style.css b/contrib/mdocml/example.style.css
index c7cc484f35..39075460a4 100644
--- a/contrib/mdocml/example.style.css
+++ b/contrib/mdocml/example.style.css
@@ -1,4 +1,4 @@
-/* $Id: example.style.css,v 1.42 2011/02/09 09:52:47 kristaps Exp $ */
+/* $Id: example.style.css,v 1.43 2011/04/11 22:58:28 kristaps Exp $ */
 
 /*
  * This is an example style-sheet provided for mandoc(1) and the -Thtml
@@ -11,32 +11,26 @@
 
 html		{ min-width: 580px; width: 580px; }
 body		{ font-family: monospace; }
+h1		{ margin-bottom: 0ex; font-size: inherit; margin-left: -4ex; } /* Section header (Sh, SH). */
+h2		{ margin-bottom: 0ex; font-size: inherit; margin-left: -2ex; } /* Sub-section header (Ss, SS). */
+table		{ width: 100%; margin-top: 0ex; margin-bottom: 0ex; } /* All tables. */
+td		{ vertical-align: top; } /* All table cells. */
+p		{ } /* Paragraph: Pp, Lp. */
+blockquote	{ margin-top: 0ex; margin-bottom: 0ex; } /* D1. */
+div.section	{ margin-bottom: 2ex; margin-left: 5ex; } /* Sections (Sh, SH). */
+div.subsection	{ } /* Sub-sections (Ss, SS). */
+table.synopsis	{ } /* SYNOPSIS section table. */
 
 /* Preamble structure. */
 
-table.foot	{ width: 100%; } /* Document footer. */
+table.foot	{ } /* Document footer. */
 td.foot-date	{ width: 50%; } /* Document footer: date. */
 td.foot-os	{ width: 50%; text-align: right; } /* Document footer: OS/source. */
-table.head	{ width: 100%; } /* Document header. */
+table.head	{ } /* Document header. */
 td.head-ltitle	{ width: 10%; } /* Document header: left-title. */
 td.head-vol	{ width: 80%; text-align: center; } /* Document header: volume. */
 td.head-rtitle	{ width: 10%; text-align: right; } /* Document header: right-title. */
 
-/* Sections. */
-
-h1		{ margin-bottom: 0px; font-size: medium; margin-left: -4ex; } /* Section header (Sh, SH). */
-h2		{ margin-bottom: 0px; font-size: medium; margin-left: -2ex; } /* Sub-section header (Ss, SS). */
-div.section	{ margin-bottom: 2ex; margin-left: 4ex; } /* Sections (Sh, SH). */
-div.subsection	{ } /* Sub-sections (Ss, SS). */
-table.synopsis	{ } /* SYNOPSIS section table. */
-
-/* Vertical spacing. */
-
-p		{ } /* Paragraph: Pp, Lp. */
-blockquote	{ margin-top: 0px; margin-bottom: 0px; }
-table		{ margin-top: 0px; margin-bottom: 0px; }
-td		{ vertical-align: top; }
-
 /* General font modes. */
 
 i		{ } /* Italic: BI, IB, I, (implicit). */
@@ -116,7 +110,7 @@ dd.list-inset	{ }
 
 dl.list-ohang	{ }
 dt.list-ohang	{ }
-dd.list-ohang	{ margin-left: 0em; }
+dd.list-ohang	{ margin-left: 0ex; }
 
 dl.list-tag	{ }
 dt.list-tag	{ }
diff --git a/contrib/mdocml/html.c b/contrib/mdocml/html.c
index ab57c3743f..45471fe3b0 100644
--- a/contrib/mdocml/html.c
+++ b/contrib/mdocml/html.c
@@ -1,4 +1,4 @@
-/*	$Id: html.c,v 1.131 2011/03/22 14:05:45 kristaps Exp $ */
+/*	$Id: html.c,v 1.147 2011/05/24 21:40:14 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -31,6 +31,7 @@
 #include <unistd.h>
 
 #include "mandoc.h"
+#include "libmandoc.h"
 #include "out.h"
 #include "html.h"
 #include "main.h"
@@ -93,19 +94,25 @@ static	const char	*const htmlattrs[ATTR_MAX] = {
 	"colspan", /* ATTR_COLSPAN */
 };
 
-static	void		  print_num(struct html *, const char *, size_t);
-static	void		  print_spec(struct html *, enum roffdeco,
-				const char *, size_t);
-static	void		  print_res(struct html *, const char *, size_t);
-static	void		  print_ctag(struct html *, enum htmltag);
-static	void		  print_doctype(struct html *);
-static	void		  print_xmltype(struct html *);
-static	int		  print_encode(struct html *, const char *, int);
-static	void		  print_metaf(struct html *, enum roffdeco);
-static	void		  print_attr(struct html *, 
-				const char *, const char *);
-static	void		 *ml_alloc(char *, enum htmltype);
+static	const char	*const roffscales[SCALE_MAX] = {
+	"cm", /* SCALE_CM */
+	"in", /* SCALE_IN */
+	"pc", /* SCALE_PC */
+	"pt", /* SCALE_PT */
+	"em", /* SCALE_EM */
+	"em", /* SCALE_MM */
+	"ex", /* SCALE_EN */
+	"ex", /* SCALE_BU */
+	"em", /* SCALE_VS */
+	"ex", /* SCALE_FS */
+};
 
+static	void	 bufncat(struct html *, const char *, size_t);
+static	void	 print_ctag(struct html *, enum htmltag);
+static	int	 print_encode(struct html *, const char *, int);
+static	void	 print_metaf(struct html *, enum mandoc_esc);
+static	void	 print_attr(struct html *, const char *, const char *);
+static	void	 *ml_alloc(char *, enum htmltype);
 
 static void *
 ml_alloc(char *outopts, enum htmltype type)
@@ -123,7 +130,7 @@ ml_alloc(char *outopts, enum htmltype type)
 
 	h->type = type;
 	h->tags.head = NULL;
-	h->symtab = chars_init(CHARS_HTML);
+	h->symtab = mchars_alloc();
 
 	while (outopts && *outopts)
 		switch (getsubopt(&outopts, UNCONST(toks), &v)) {
@@ -173,7 +180,7 @@ html_free(void *p)
 	}
 	
 	if (h->symtab)
-		chars_free(h->symtab);
+		mchars_free(h->symtab);
 
 	free(h);
 }
@@ -209,72 +216,24 @@ print_gen_head(struct html *h)
 	}
 }
 
-/* ARGSUSED */
-static void
-print_num(struct html *h, const char *p, size_t len)
-{
-	const char	*rhs;
-
-	rhs = chars_num2char(p, len);
-	if (rhs)
-		putchar((int)*rhs);
-}
-
 static void
-print_spec(struct html *h, enum roffdeco d, const char *p, size_t len)
-{
-	int		 cp;
-	const char	*rhs;
-	size_t		 sz;
-
-	if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) {
-		printf("&#%d;", cp);
-		return;
-	} else if (-1 == cp && DECO_SSPECIAL == d) {
-		fwrite(p, 1, len, stdout);
-		return;
-	} else if (-1 == cp)
-		return;
-
-	if (NULL != (rhs = chars_spec2str(h->symtab, p, len, &sz)))
-		fwrite(rhs, 1, sz, stdout);
-}
-
-
-static void
-print_res(struct html *h, const char *p, size_t len)
-{
-	int		 cp;
-	const char	*rhs;
-	size_t		 sz;
-
-	if ((cp = chars_res2cp(h->symtab, p, len)) > 0) {
-		printf("&#%d;", cp);
-		return;
-	} else if (-1 == cp)
-		return;
-
-	if (NULL != (rhs = chars_res2str(h->symtab, p, len, &sz)))
-		fwrite(rhs, 1, sz, stdout);
-}
-
-
-static void
-print_metaf(struct html *h, enum roffdeco deco)
+print_metaf(struct html *h, enum mandoc_esc deco)
 {
 	enum htmlfont	 font;
 
 	switch (deco) {
-	case (DECO_PREVIOUS):
+	case (ESCAPE_FONTPREV):
 		font = h->metal;
 		break;
-	case (DECO_ITALIC):
+	case (ESCAPE_FONTITALIC):
 		font = HTMLFONT_ITALIC;
 		break;
-	case (DECO_BOLD):
+	case (ESCAPE_FONTBOLD):
 		font = HTMLFONT_BOLD;
 		break;
-	case (DECO_ROMAN):
+	case (ESCAPE_FONT):
+		/* FALLTHROUGH */
+	case (ESCAPE_FONTROMAN):
 		font = HTMLFONT_NONE;
 		break;
 	default:
@@ -296,80 +255,123 @@ print_metaf(struct html *h, enum roffdeco deco)
 			print_otag(h, TAG_I, 0, NULL);
 }
 
+int
+html_strlen(const char *cp)
+{
+	int		 ssz, sz;
+	const char	*seq, *p;
+
+	/*
+	 * Account for escaped sequences within string length
+	 * calculations.  This follows the logic in term_strlen() as we
+	 * must calculate the width of produced strings.
+	 * Assume that characters are always width of "1".  This is
+	 * hacky, but it gets the job done for approximation of widths.
+	 */
+
+	sz = 0;
+	while (NULL != (p = strchr(cp, '\\'))) {
+		sz += (int)(p - cp);
+		++cp;
+		switch (mandoc_escape(&cp, &seq, &ssz)) {
+		case (ESCAPE_ERROR):
+			return(sz);
+		case (ESCAPE_UNICODE):
+			/* FALLTHROUGH */
+		case (ESCAPE_NUMBERED):
+			/* FALLTHROUGH */
+		case (ESCAPE_SPECIAL):
+			sz++;
+			break;
+		default:
+			break;
+		}
+	}
+
+	assert(sz >= 0);
+	return(sz + strlen(cp));
+}
 
 static int
 print_encode(struct html *h, const char *p, int norecurse)
 {
 	size_t		 sz;
-	int		 len, nospace;
+	int		 c, len, nospace;
 	const char	*seq;
-	enum roffdeco	 deco;
+	enum mandoc_esc	 esc;
 	static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
 
 	nospace = 0;
 
-	for (; *p; p++) {
+	while ('\0' != *p) {
 		sz = strcspn(p, rejs);
 
 		fwrite(p, 1, sz, stdout);
-		p += /* LINTED */
-			sz;
+		p += (int)sz;
+
+		if ('\0' == *p)
+			break;
 
-		if ('<' == *p) {
+		switch (*p++) {
+		case ('<'):
 			printf("&lt;");
 			continue;
-		} else if ('>' == *p) {
+		case ('>'):
 			printf("&gt;");
 			continue;
-		} else if ('&' == *p) {
+		case ('&'):
 			printf("&amp;");
 			continue;
-		} else if (ASCII_HYPH == *p) {
-			/*
-			 * Note: "soft hyphens" aren't graphically
-			 * displayed when not breaking the text; we want
-			 * them to be displayed.
-			 */
-			/*printf("&#173;");*/
+		case (ASCII_HYPH):
 			putchar('-');
 			continue;
-		} else if ('\0' == *p)
+		default:
 			break;
+		}
 
-		seq = ++p;
-		len = a2roffdeco(&deco, &seq, &sz);
+		esc = mandoc_escape(&p, &seq, &len);
+		if (ESCAPE_ERROR == esc)
+			break;
 
-		switch (deco) {
-		case (DECO_NUMBERED):
-			print_num(h, seq, sz);
+		switch (esc) {
+		case (ESCAPE_UNICODE):
+			/* Skip passed "u" header. */
+			c = mchars_num2uc(seq + 1, len - 1);
+			if ('\0' != c)
+				printf("&#x%x;", c);
 			break;
-		case (DECO_RESERVED):
-			print_res(h, seq, sz);
+		case (ESCAPE_NUMBERED):
+			c = mchars_num2char(seq, len);
+			if ('\0' != c)
+				putchar(c);
 			break;
-		case (DECO_SSPECIAL):
-			/* FALLTHROUGH */
-		case (DECO_SPECIAL):
-			print_spec(h, deco, seq, sz);
+		case (ESCAPE_SPECIAL):
+			c = mchars_spec2cp(h->symtab, seq, len);
+			if (c > 0)
+				printf("&#%d;", c);
+			else if (-1 == c && 1 == len)
+				putchar((int)*seq);
 			break;
-		case (DECO_PREVIOUS):
+		case (ESCAPE_FONT):
+			/* FALLTHROUGH */
+		case (ESCAPE_FONTPREV):
 			/* FALLTHROUGH */
-		case (DECO_BOLD):
+		case (ESCAPE_FONTBOLD):
 			/* FALLTHROUGH */
-		case (DECO_ITALIC):
+		case (ESCAPE_FONTITALIC):
 			/* FALLTHROUGH */
-		case (DECO_ROMAN):
+		case (ESCAPE_FONTROMAN):
 			if (norecurse)
 				break;
-			print_metaf(h, deco);
+			print_metaf(h, esc);
+			break;
+		case (ESCAPE_NOSPACE):
+			if ('\0' == *p)
+				nospace = 1;
 			break;
 		default:
 			break;
 		}
-
-		p += len - 1;
-
-		if (DECO_NOSPACE == deco && '\0' == *(p + 1))
-			nospace = 1;
 	}
 
 	return(nospace);
@@ -432,7 +434,7 @@ print_otag(struct html *h, enum htmltag tag,
 		print_attr(h, "lang", "en");
 	}
 
-	/* Accomodate for XML "well-formed" singleton escaping. */
+	/* Accommodate for XML "well-formed" singleton escaping. */
 
 	if (HTML_AUTOCLOSE & htmltags[tag].flags)
 		switch (h->type) {
@@ -465,27 +467,8 @@ print_ctag(struct html *h, enum htmltag tag)
 	} 
 }
 
-
 void
 print_gen_decls(struct html *h)
-{
-
-	print_xmltype(h);
-	print_doctype(h);
-}
-
-
-static void
-print_xmltype(struct html *h)
-{
-
-	if (HTML_XHTML_1_0_STRICT == h->type)
-		puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
-}
-
-
-static void
-print_doctype(struct html *h)
 {
 	const char	*doctype;
 	const char	*dtd;
@@ -498,6 +481,7 @@ print_doctype(struct html *h)
 		dtd = "http://www.w3.org/TR/html4/strict.dtd";
 		break;
 	default:
+		puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
 		name = "html";
 		doctype = "-//W3C//DTD XHTML 1.0 Strict//EN";
 		dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
@@ -587,7 +571,6 @@ print_stagq(struct html *h, const struct tag *suntil)
 	}
 }
 
-
 void
 bufinit(struct html *h)
 {
@@ -596,28 +579,27 @@ bufinit(struct html *h)
 	h->buflen = 0;
 }
 
-
 void
 bufcat_style(struct html *h, const char *key, const char *val)
 {
 
 	bufcat(h, key);
-	bufncat(h, ":", 1);
+	bufcat(h, ":");
 	bufcat(h, val);
-	bufncat(h, ";", 1);
+	bufcat(h, ";");
 }
 
-
 void
 bufcat(struct html *h, const char *p)
 {
 
-	bufncat(h, p, strlen(p));
+	h->buflen = strlcat(h->buf, p, BUFSIZ);
+	assert(h->buflen < BUFSIZ);
+	h->buflen--;
 }
 
-
 void
-buffmt(struct html *h, const char *fmt, ...)
+bufcat_fmt(struct html *h, const char *fmt, ...)
 {
 	va_list		 ap;
 
@@ -628,19 +610,15 @@ buffmt(struct html *h, const char *fmt, ...)
 	h->buflen = strlen(h->buf);
 }
 
-
-void
+static void
 bufncat(struct html *h, const char *p, size_t sz)
 {
 
-	if (h->buflen + sz > BUFSIZ - 1)
-		sz = BUFSIZ - 1 - h->buflen;
-
-	(void)strncat(h->buf, p, sz);
+	assert(h->buflen + sz + 1 < BUFSIZ);
+	strncat(h->buf, p, sz);
 	h->buflen += sz;
 }
 
-
 void
 buffmt_includes(struct html *h, const char *name)
 {
@@ -648,6 +626,7 @@ buffmt_includes(struct html *h, const char *name)
 
 	pp = h->base_includes;
 	
+	bufinit(h);
 	while (NULL != (p = strchr(pp, '%'))) {
 		bufncat(h, pp, (size_t)(p - pp));
 		switch (*(p + 1)) {
@@ -664,7 +643,6 @@ buffmt_includes(struct html *h, const char *name)
 		bufcat(h, pp);
 }
 
-
 void
 buffmt_man(struct html *h, 
 		const char *name, const char *sec)
@@ -673,7 +651,7 @@ buffmt_man(struct html *h,
 
 	pp = h->base_man;
 	
-	/* LINTED */
+	bufinit(h);
 	while (NULL != (p = strchr(pp, '%'))) {
 		bufncat(h, pp, (size_t)(p - pp));
 		switch (*(p + 1)) {
@@ -681,7 +659,7 @@ buffmt_man(struct html *h,
 			bufcat(h, sec ? sec : "1");
 			break;
 		case('N'):
-			buffmt(h, name);
+			bufcat_fmt(h, name);
 			break;
 		default:
 			bufncat(h, p, 2);
@@ -693,85 +671,24 @@ buffmt_man(struct html *h,
 		bufcat(h, pp);
 }
 
-
 void
 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
 {
 	double		 v;
-	const char	*u;
 
 	v = su->scale;
+	if (SCALE_MM == su->unit && 0.0 == (v /= 100.0))
+		v = 1.0;
 
-	switch (su->unit) {
-	case (SCALE_CM):
-		u = "cm";
-		break;
-	case (SCALE_IN):
-		u = "in";
-		break;
-	case (SCALE_PC):
-		u = "pc";
-		break;
-	case (SCALE_PT):
-		u = "pt";
-		break;
-	case (SCALE_EM):
-		u = "em";
-		break;
-	case (SCALE_MM):
-		if (0 == (v /= 100))
-			v = 1;
-		u = "em";
-		break;
-	case (SCALE_EN):
-		u = "ex";
-		break;
-	case (SCALE_BU):
-		u = "ex";
-		break;
-	case (SCALE_VS):
-		u = "em";
-		break;
-	default:
-		u = "ex";
-		break;
-	}
-
-	/* 
-	 * XXX: the CSS spec isn't clear as to which types accept
-	 * integer or real numbers, so we just make them all decimals.
-	 */
-	buffmt(h, "%s: %.2f%s;", p, v, u);
+	bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]);
 }
 
-
 void
-html_idcat(char *dst, const char *src, int sz)
+bufcat_id(struct html *h, const char *src)
 {
-	int		 ssz;
-
-	assert(sz > 2);
 
 	/* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
 
-	/* We can't start with a number (bah). */
-
-	if ('#' == *dst) {
-		dst++;
-		sz--;
-	}
-	if ('\0' == *dst) {
-		*dst++ = 'x';
-		*dst = '\0';
-		sz--;
-	}
-
-	for ( ; *dst != '\0' && sz; dst++, sz--)
-		/* Jump to end. */ ;
-
-	for ( ; *src != '\0' && sz > 1; src++) {
-		ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
-		sz -= ssz;
-		dst += ssz;
-	}
+	while ('\0' != *src)
+		bufcat_fmt(h, "%.2x", *src++);
 }
diff --git a/contrib/mdocml/html.h b/contrib/mdocml/html.h
index 561d06e2de..aba635f144 100644
--- a/contrib/mdocml/html.h
+++ b/contrib/mdocml/html.h
@@ -1,4 +1,4 @@
-/*	$Id: html.h,v 1.40 2011/01/29 14:49:44 kristaps Exp $ */
+/*	$Id: html.h,v 1.44 2011/05/17 11:34:31 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -120,7 +120,7 @@ struct	html {
 	struct tagq	  tags; /* stack of open tags */
 	struct rofftbl	  tbl; /* current table */
 	struct tag	 *tblt; /* current open table scope */
-	void		 *symtab; /* character-escapes */
+	struct mchars	 *symtab; /* character-escapes */
 	char		 *base_man; /* base for manpage href */
 	char		 *base_includes; /* base for include href */
 	char		 *style; /* style-sheet URI */
@@ -142,19 +142,19 @@ void		  print_text(struct html *, const char *);
 void		  print_tblclose(struct html *);
 void		  print_tbl(struct html *, const struct tbl_span *);
 
+void		  bufcat_fmt(struct html *, const char *, ...);
+void		  bufcat(struct html *, const char *);
+void		  bufcat_id(struct html *, const char *);
+void		  bufcat_style(struct html *, 
+			const char *, const char *);
 void		  bufcat_su(struct html *, const char *, 
 			const struct roffsu *);
+void		  bufinit(struct html *);
 void		  buffmt_man(struct html *, 
 			const char *, const char *);
 void		  buffmt_includes(struct html *, const char *);
-void		  buffmt(struct html *, const char *, ...);
-void		  bufcat(struct html *, const char *);
-void		  bufcat_style(struct html *, 
-			const char *, const char *);
-void		  bufncat(struct html *, const char *, size_t);
-void		  bufinit(struct html *);
 
-void		  html_idcat(char *, const char *, int);
+int		  html_strlen(const char *);
 
 __END_DECLS
 
diff --git a/contrib/mdocml/index.css b/contrib/mdocml/index.css
index d8d0b2d80f..ce0898d0d3 100644
--- a/contrib/mdocml/index.css
+++ b/contrib/mdocml/index.css
@@ -1,48 +1,43 @@
-body		{ color: #333333;
-		  font-size: 0.93em;
-		  font-family: Times, sans-serif; }
+html		{ min-width: 40em;
+		  margin-top: 2em;
+		  margin-left: auto;
+		  margin-right: auto;
+		  width: 80%; }
 
-table.frame	{ max-width: 800px; 
-		  padding-right: 2em;
-		  padding-left: 1em; }
+body		{ text-align: justify; 
+		  font-family: Helvetica,Arial,sans-serif;
+		  line-height: 120%;
+		  font-size: small; }
 
-table		{ padding-left: 40px; }
+p,ul,table	{ margin-left: 3em; }
 
-p		{ padding-left: 40px;
-		  text-align: justify; }
+p.head, p.foot	{ margin-left: 0.0em; margin-right: 0.0em; }
 
-h1		{ font-weight: bold;
-		  font-size: small;
-		  font-family: Verdana, Tahoma, Arial, sans-serif; }
+p.news		{ margin-left: 2.0em; }
 
-h2		{ font-weight: bold;
-		  font-size: small;
-		  padding-left: 20px;
-		  margin-bottom: 0px; 
-		  font-family: Verdana, Tahoma, Arial, sans-serif; }
+li		{ margin: 0.25em; }
 
-span.nm		{ font-weight: bold; }
+h1		{ font-size: 110%; }
+h2		{ font-size: 105%; margin-left: 1.5em }
+
+p.head		{ margin-bottom: 1.75em;
+		  border-bottom: 1px solid #dddddd; 
+		  padding-bottom: 0.2em; }
+
+p.foot		{ border-top: 1px solid #dddddd; 
+		  color: #666666;
+		  padding-top: 0.2em;
+		  margin-top: 1.75em; }
+
+span.nm		{ color: green; }
 
 span.file	{ font-style: italic; }
 
-span.attn	{ color: #000000; font-weight: bold; }
+span.attn	{ font-weight: bold; }
 
 span.flag	{ font-weight: bold; }
 
-div.head	{ border-bottom: 1px solid #dddddd; 
-		  padding-bottom: 5px;
-		  text-align: right; }
-
-div.foot	{ border-top: 1px solid #dddddd; 
-		  padding-top: 5px;
-		  font-size: smaller;
-		  text-align: right; }
+a		{ text-decoration: none; }
 
 a.external 	{ background: transparent url(external.png) center right no-repeat; 
 		  padding-right: 12px; }
-
-span.date	{ color: #000000; }
-
-div.news	{ margin-bottom: 2em; }
-
-div.news ul	{ margin-left: 4em; }
diff --git a/contrib/mdocml/index.sgml b/contrib/mdocml/index.sgml
index d4fd1edc62..fa7d8b431d 100644
--- a/contrib/mdocml/index.sgml
+++ b/contrib/mdocml/index.sgml
@@ -2,408 +2,386 @@
 <HTML>
 	<HEAD>
 		<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
-		<META NAME="resource-type" CONTENT="document">
 		<LINK REL="stylesheet" HREF="index.css" TYPE="text/css" MEDIA="all">
 		<TITLE>mdocml | mdoc macro compiler</TITLE>
 	</HEAD>
 	<BODY>
-	<TABLE CLASS="frame" SUMMARY="[frame]">
-		<COL WIDTH="100%">
-		<TBODY>
-			<TR>
-				<TD>
-					<DIV CLASS="head">
-						<B>mdocml</B> &#8211; mdoc macro compiler
-					</DIV>
-				</TD>
-			</TR>
-			<TR>
-				<TD VALIGN="top">
-					<H1>
-					<A NAME="description">DESCRIPTION</A>
-					</H1>
+		<P CLASS="head">
+			<B>mdocml</B> &#8211; mdoc macro compiler
+		</P>
+		<H1>
+			<A NAME="description">Description</A>
+		</H1>
+		<P>
+			<SPAN CLASS="nm">mdocml</SPAN> is a suite of tools compiling <I><A HREF="mdoc.7.html">mdoc</A></I>, the roff macro
+			package of choice for BSD manual pages, and <I><A HREF="man.7.html">man</A></I>, the predominant historical package for
+			UNIX manuals.  The mission of <SPAN CLASS="nm">mdocml</SPAN> is to deprecate <A
+			HREF="http://www.gnu.org/software/groff/" CLASS="external">groff</A>, the GNU troff implementation, for displaying <I>mdoc</I>
+			pages whilst providing token support for <I>man</I>.
+		</P>
+		<P>
+			Why?  groff amounts to over 5 MB of source code, most of which is C++ and all of which is GPL.  It runs slowly, produces
+			uncertain output, and varies in operation from system to system.  mdocml strives to fix this (respectively small, C, <A
+			CLASS="external" HREF="http://www.isc.org/software/license">ISC</A>-licensed, fast and regular).
+		</P>
+		<P>
+			<SPAN CLASS="nm">mdocml</SPAN> consists of the <A HREF="mandoc.3.html">libmandoc</A> validating compiler and <A
+			HREF="mandoc.1.html">mandoc</A>, which interfaces with the compiler library to format output for UNIX terminals (with
+			support for wide-character locales), XHTML, HTML, PostScript, and PDF.  
+			It also includes <A HREF="preconv.1.html">preconv</A>, for recoding multibyte manuals; and <A
+			HREF="makewhatis.1.html">makewhatis</A>, for indexing manuals.
+			It is a <A CLASS="external" HREF="http://bsd.lv/">BSD.lv</A> project.  
+		</P>
+		<P>
+			<I>Disambiguation</I>: <SPAN CLASS="nm">mdocml</SPAN> is often referred to by its installed binary, <Q>mandoc</Q>.
+		</P>
+		<H1>
+			<A NAME="sources">Sources</A>
+		</H1>
+		<P>
+			<SPAN CLASS="nm">mdocml</SPAN> is in plain-old ANSI C and should build and run on any UNIX system, although <A
+			HREF="makewhatis.1.html">makewhatis</A> requires <A CLASS="external"
+			HREF="http://www.oracle.com/technetwork/database/berkeleydb/overview/index.html">Berkeley Database</A> (this is
+			installed by default on all BSD operating systems).  
+			To compile <SPAN CLASS="nm">mdocml</SPAN>, run <CODE>make</CODE>, then <CODE>make install</CODE> to install into
+			<I>/usr/local</I>.
+			Be aware: if you have an existing <A HREF="http://www.gnu.org/software/groff/" CLASS="external">groff</A> installation,
+			this may overwrite its <B>preconv</B> binary.
+			The <A HREF="makewhatis.1.html">makewhatis</A> utility is not yet linked to the build.  You must run <CODE>make
+			makewhatis</CODE> to build it (it does not install).
+		</P>
+		<P>
+			The most current version of <SPAN CLASS="nm">mdocml</SPAN> is <SPAN CLASS="attn">@VERSION@</SPAN>, dated <SPAN
+			class="attn">@VDATE@</SPAN>.  
+		</P>
 
-					<P>
-					<SPAN CLASS="nm">mdocml</SPAN> is a suite of tools compiling <Q>-<A HREF="mdoc.7.html">mdoc</A></Q>, the
-					roff macro package of choice for BSD manual pages, and <Q>-<A HREF="man.7.html">man</A></Q>, the
-					predominant historical package for UNIX manuals.  The mission of <SPAN CLASS="nm">mdocml</SPAN> is to
-					deprecate <A HREF="http://www.gnu.org/software/groff/" CLASS="external">groff</A>, the GNU troff
-					implementation, for displaying -mdoc pages whilst providing token support for -man.
-					</P>
+		<H2>
+			Current
+		</H2>
 
-					<P>
-					Why?  groff amounts to over 5 MB of source code, most of which is C++ and all of which is GPL.  It runs
-					slowly, produces uncertain output, and varies in operation from system to system.  mdocml strives to fix
-					this (respectively small, C, <A CLASS="external"
-					HREF="http://www.isc.org/software/license">ISC</A>-licensed, fast and regular).
-					</P>
+		<TABLE WIDTH="100%" SUMMARY="Current Sources">
+			<COL WIDTH="175">
+			<COL>
+			<TBODY>
+				<TR>
+					<TD>Source archive</TD>
+					<TD>
+					<A HREF="/snapshots/mdocml.tar.gz">/snapshots/mdocml.tar.gz</A> 
+					<SMALL>(<A HREF="/snapshots/mdocml.md5">md5</A>)</SMALL>
+					</TD>
+				</TR>
+				<TR>
+					<TD>Online source</TD>
+					<TD>
+					<A HREF="http://mdocml.bsd.lv/cgi-bin/cvsweb/?cvsroot=mdocml">cvsweb</A>
+					</TD>
+				</TR>
+			</TBODY>
+		</TABLE>
 
-					<P>
-					<SPAN CLASS="nm">mdocml</SPAN> consists of the <A HREF="mandoc.3.html">libmandoc</A> validating
-					compiler and <A HREF="mandoc.1.html">mandoc</A>, which interfaces with the compiler library to format
-					output for UNIX terminals, XHTML, HTML, PostScript, and PDF.  It is a <A CLASS="external"
-					HREF="http://bsd.lv/">BSD.lv</A> project.  
-					</P>
+		<H2>
+			Downstream
+		</H2>
 
-					<P>
-					<I>Disambiguation</I>: <SPAN CLASS="nm">mdocml</SPAN> is often referred to by its installed binary,
-					<Q>mandoc</Q>.
-					</P>
-				</TD>
-			</TR>
-			<TR>
-				<TD>
-					<H1>
-					<A NAME="sources">SOURCES</A>
-					</H1>
+		<TABLE WIDTH="100%" SUMMARY="Downstream Sources">
+			<COL WIDTH="175">
+			<COL>
+			<TBODY>
+				<TR>
+					<TD>DragonFly BSD</TD>
+					<TD>
+					<A HREF="http://gitweb.dragonflybsd.org/dragonfly.git/tree/HEAD:/usr.bin/mandoc"
+						CLASS="external">usr.bin/mandoc</A>
+					</TD>
+				</TR>
+				<TR>
+					<TD>FreeBSD</TD>
+					<TD>
+					<A HREF="http://www.freebsd.org/cgi/cvsweb.cgi/ports/textproc/mdocml/" 
+						CLASS="external">ports/textproc/mdocml</A>
+					</TD>
+				</TR>
+				<TR>
+					<TD>NetBSD</TD>
+					<TD>
+					<A HREF="http://cvsweb.netbsd.org/bsdweb.cgi/src/external/bsd/mdocml/"
+						CLASS="external">src/external/bsd/mdocml</A>
+					</TD>
+				</TR>
+				<TR>
+					<TD>OpenBSD</TD>
+					<TD>
+					<A HREF="http://www.openbsd.org/cgi-bin/cvsweb/src/usr.bin/mandoc/"
+						CLASS="external">src/usr.bin/mandoc</A> 
+					</TD>
+				</TR>
+			</TBODY>
+		</TABLE>
 
-					<P>
-					<SPAN CLASS="nm">mdocml</SPAN> is in plain-old ANSI C and should build and run on any UNIX system.
-					The most current version is <SPAN CLASS="attn">@VERSION@</SPAN>, dated <SPAN class="attn">@VDATE@</SPAN>.
-					</P>
+		<H2>
+			Historical
+		</H2>
 
-					<H2>
-					Current
-					</H2>
+		<TABLE WIDTH="100%" SUMMARY="Archived Sources">
+			<COL WIDTH="175">
+			<COL>
+			<TBODY>
+				<TR>
+					<TD>Source archive</TD>
+					<TD>
+					<A HREF="/snapshots/">/snapshots/</A> 
+					</TD>
+				</TR>
+			</TBODY>
+		</TABLE>
 
-					<TABLE WIDTH="100%" SUMMARY="Current Sources">
-					<COL WIDTH="175">
-					<COL>
-					<TBODY>
-						<TR>
-							<TD>Source archive</TD>
-							<TD>
-							<A HREF="/snapshots/mdocml.tar.gz">/snapshots/mdocml.tar.gz</A> 
-							(<A HREF="/snapshots/mdocml.md5">md5</A>)
-							</TD>
-						</TR>
-						<TR>
-							<TD>Online source</TD>
-							<TD>
-							<A HREF="http://mdocml.bsd.lv/cgi-bin/cvsweb/?cvsroot=mdocml">cvsweb</A>
-							</TD>
-						</TR>
-					</TBODY>
-					</TABLE>
+		<H1>
+			<A NAME="documentation">Documentation</A>
+		</H1>
 
-					<H2>
-					Downstream
-					</H2>
+		<P>
+			These manuals are generated automatically and refer to the current snapshot.
+		</P>
 
-					<TABLE WIDTH="100%" SUMMARY="Downstream Sources">
-					<COL WIDTH="175">
-					<COL>
-					<TBODY>
-						<TR>
-							<TD>DragonFly BSD</TD>
-							<TD>
-							<A HREF="http://gitweb.dragonflybsd.org/dragonfly.git/tree/HEAD:/usr.bin/mandoc"
-								CLASS="external">usr.bin/mandoc</A>
-							</TD>
-						</TR>
-						<TR>
-							<TD>FreeBSD</TD>
-							<TD>
-							<A HREF="http://www.freebsd.org/cgi/cvsweb.cgi/ports/textproc/mdocml/" 
-								CLASS="external">ports/textproc/mdocml</A>
-							</TD>
-						</TR>
-						<TR>
-							<TD>NetBSD</TD>
-							<TD>
-							<A HREF="http://cvsweb.netbsd.org/bsdweb.cgi/src/external/bsd/mdocml/"
-								CLASS="external">src/external/bsd/mdocml</A>
-							</TD>
-						</TR>
-						<TR>
-							<TD>OpenBSD</TD>
-							<TD>
-							<A HREF="http://www.openbsd.org/cgi-bin/cvsweb/src/usr.bin/mandoc/"
-								CLASS="external">src/usr.bin/mandoc</A> 
-							</TD>
-						</TR>
-					</TBODY>
-					</TABLE>
+		<TABLE WIDTH="100%" SUMMARY="Documentation">
+			<COL WIDTH="175">
+			<COL>
+			<TBODY>
+				<TR>
+					<TD VALIGN="top"><A HREF="mandoc.1.html">mandoc(1)</A></TD>
+					<TD VALIGN="top">
+						format and display UNIX manuals
+						<SMALL>
+							(<A HREF="mandoc.1.txt">text</A> | 
+							<A HREF="mandoc.1.xhtml">xhtml</A> |
+							<A HREF="mandoc.1.pdf">pdf</A> |
+							<A HREF="mandoc.1.ps">postscript</A>)
+						</SMALL>
+					</TD>
+				</TR>
+				<TR>
+					<TD VALIGN="top"><A HREF="makewhatis.1.html">makewhatis(1)</A></TD>
+					<TD VALIGN="top">
+						index UNIX manuals
+						<SMALL>
+							(<A HREF="makewhatis.1.txt">text</A> | 
+							<A HREF="makewhatis.1.xhtml">xhtml</A> |
+							<A HREF="makewhatis.1.pdf">pdf</A> |
+							<A HREF="makewhatis.1.ps">postscript</A>)
+						</SMALL>
+					</TD>
+				</TR>
+				<TR>
+					<TD VALIGN="top"><A HREF="preconv.1.html">preconv(1)</A></TD>
+					<TD VALIGN="top">
+						recode multibyte UNIX manuals
+						<SMALL>
+							(<A HREF="preconv.1.txt">text</A> | 
+							<A HREF="preconv.1.xhtml">xhtml</A> |
+							<A HREF="preconv.1.pdf">pdf</A> |
+							<A HREF="preconv.1.ps">postscript</A>)
+						</SMALL>
+					</TD>
+				</TR>
+				<TR>
+					<TD VALIGN="top"><A HREF="mandoc.3.html">mandoc(3)</A></TD>
+					<TD VALIGN="top">
+						mandoc macro compiler library
+						<SMALL>
+							(<A HREF="mandoc.3.txt">text</A> | 
+							<A HREF="mandoc.3.xhtml">xhtml</A> |
+							<A HREF="mandoc.3.pdf">pdf</A> |
+							<A HREF="mandoc.3.ps">postscript</A>)
+						</SMALL>
+					</TD>
+				</TR>
+				<TR>
+					<TD VALIGN="top"><A HREF="man.7.html">man(7)</A></TD>
+					<TD VALIGN="top">
+						man language reference
+						<SMALL>
+							(<A HREF="man.7.txt">text</A> | 
+							<A HREF="man.7.xhtml">xhtml</A> |
+							<A HREF="man.7.pdf">pdf</A> |
+							<A HREF="man.7.ps">postscript</A>)
+						</SMALL>
+					</TD>
+				</TR>
+				<TR>
+					<TD VALIGN="top"><A HREF="eqn.7.html">eqn(7)</A></TD>
+					<TD VALIGN="top">
+						eqn-mandoc language reference
+						<SMALL>
+							(<A HREF="eqn.7.txt">text</A> | 
+							<A HREF="eqn.7.xhtml">xhtml</A> |
+							<A HREF="eqn.7.pdf">pdf</A> |
+							<A HREF="eqn.7.ps">postscript</A>)
+						</SMALL>
+					</TD>
+				</TR>
+				<TR>
+					<TD VALIGN="top"><A HREF="mandoc_char.7.html">mandoc_char(7)</A></TD>
+					<TD VALIGN="top">
+						mandoc special characters
+						<SMALL>
+							(<A HREF="mandoc_char.7.txt">text</A> | 
+							<A HREF="mandoc_char.7.xhtml">xhtml</A> |
+							<A HREF="mandoc_char.7.pdf">pdf</A> |
+							<A HREF="mandoc_char.7.ps">postscript</A>)
+						</SMALL>
+					</TD>
+				</TR>
+				<TR>
+					<TD VALIGN="top"><A HREF="mdoc.7.html">mdoc(7)</A></TD>
+					<TD VALIGN="top">
+						mdoc language reference
+						<SMALL>
+							(<A HREF="mdoc.7.txt">text</A> | 
+							<A HREF="mdoc.7.xhtml">xhtml</A> |
+							<A HREF="mdoc.7.pdf">pdf</A> |
+							<A HREF="mdoc.7.ps">postscript</A>)
+						</SMALL>
+					</TD>
+				</TR>
+				<TR>
+					<TD VALIGN="top"><A HREF="roff.7.html">roff(7)</A></TD>
+					<TD VALIGN="top">
+						roff-mandoc language reference
+						<SMALL>
+							(<A HREF="roff.7.txt">text</A> | 
+							<A HREF="roff.7.xhtml">xhtml</A> |
+							<A HREF="roff.7.pdf">pdf</A> |
+							<A HREF="roff.7.ps">postscript</A>)
+						</SMALL>
+					</TD>
+				</TR>
+				<TR>
+					<TD VALIGN="top"><A HREF="tbl.7.html">tbl(7)</A></TD>
+					<TD VALIGN="top">
+						tbl-mandoc language reference
+						<SMALL>
+							(<A HREF="tbl.7.txt">text</A> | 
+							<A HREF="tbl.7.xhtml">xhtml</A> |
+							<A HREF="tbl.7.pdf">pdf</A> |
+							<A HREF="tbl.7.ps">postscript</A>)
+						</SMALL>
+					</TD>
+				</TR>
+			</TBODY>
+		</TABLE>
 
-					<H2>
-					Historical
-					</H2>
+		<H1>
+			<A NAME="contact">Contact</A>
+		</H1>
 
-					<TABLE WIDTH="100%" SUMMARY="Archived Sources">
-					<COL WIDTH="175">
-					<COL>
-					<TBODY>
-						<TR>
-							<TD>Source archive</TD>
-							<TD>
-							<A HREF="/snapshots/">/snapshots/</A> 
-							</TD>
-						</TR>
-					</TBODY>
-					</TABLE>
-				</TD>
-			</TR>
-			<TR>
-				<TD>
-					<H1>
-					<A NAME="documentation">DOCUMENTATION</A>
-					</H1>
+		<P>
+			Use the mailing lists for bug-reports, patches, questions, etc. (these require subscription).  Please check the
+			<A HREF="http://mdocml.bsd.lv/cgi-bin/cvsweb/TODO?cvsroot=mdocml">TODO</A> for known issues
+			before posting.  Beyond that, contact Kristaps at <A
+			HREF="http://mailhide.recaptcha.net/d?k=01M6h_w7twDp58ZgH57eWC_w==&amp;c=Q2DBUt401ePlSeupJFrq_Q==" TITLE="Reveal
+			this e-mail address">kris...</A>@bsd.lv.
+		</P>
 
-					<P>
-						These manuals are generated automatically and refer to the current snapshot.
-					</P>
+		<TABLE WIDTH="100%" SUMMARY="Mailing Lists">
+			<COL WIDTH="175">
+			<COL>
+			<TBODY>
+				<TR>
+					<TD>
+						disc<A CLASS="external" TITLE="Reveal this e-mail address"
+						HREF="http://www.google.com/recaptcha/mailhide/d?k=01KQ80PFH5n3BBNpF5Gs4sRg==&amp;c=EV1QytpQqTHSItc2IXvZyocgYLPnG5K0JKw_gwMC9yc=">...</A>@mdocml.bsd.lv
+					</TD>
+					<TD>
+						bug-reports, general questions, and announcements 
+						<SMALL>(<A HREF="/archives/discuss/summary.html">archive</A>)</SMALL>
+					</TD>
+				</TR>
+				<TR>
+					<TD>
+						tec<A CLASS="external" TITLE="Reveal this e-mail address"
+						HREF="http://www.google.com/recaptcha/mailhide/d?k=01qDX_iV0RlUOarEvb6mR28g==&amp;c=gRXsTjza0NNCFPaYu-Taj2tF0pmYZSc90EZkFkhkxgo=">...</A>@mdocml.bsd.lv
+					</TD>
+					<TD>
+						patches and system discussions 
+						<SMALL>(<A HREF="/archives/tech/summary.html">archive</A>)</SMALL>
+					</TD>
+				</TR>
+				<TR>
+					<TD>
+						sou<A CLASS="external" TITLE="Reveal this e-mail address"
+						HREF="http://www.google.com/recaptcha/mailhide/d?k=01prQrAZhhl2EbIwVcRfABsQ==&amp;c=KtTW4Yic9xk-8g40KzJoca4fR3MYXv28g8NC6OQV-T8=">...</A>@mdocml.bsd.lv
+					</TD>
+					<TD>
+						source commit messages 
+						<SMALL>(<A HREF="/archives/source/summary.html">archive</A>)</SMALL>
+					</TD>
+				</TR>
+			</TBODY>
+		</TABLE>
 
-					<TABLE WIDTH="100%" SUMMARY="Documentation">
-					<COL WIDTH="175">
-					<COL>
-					<TBODY>
-						<TR>
-							<TD VALIGN="top"><A HREF="mandoc.1.html">mandoc(1)</A></TD>
-							<TD VALIGN="top">
-								format and display UNIX manuals
-								<SPAN STYLE="font-size: smaller;">
-									(<A HREF="mandoc.1.txt">text</A> | 
-									<A HREF="mandoc.1.xhtml">xhtml</A> |
-									<A HREF="mandoc.1.pdf">pdf</A> |
-									<A HREF="mandoc.1.ps">postscript</A>)
-								</SPAN>
-							</TD>
-						</TR>
-						<TR>
-							<TD VALIGN="top"><A HREF="mandoc.3.html">mandoc(3)</A></TD>
-							<TD VALIGN="top">
-								mandoc macro compiler library
-								<SPAN STYLE="font-size: smaller;">
-									(<A HREF="mandoc.3.txt">text</A> | 
-									<A HREF="mandoc.3.xhtml">xhtml</A> |
-									<A HREF="mandoc.3.pdf">pdf</A> |
-									<A HREF="mandoc.3.ps">postscript</A>)
-								</SPAN>
-							</TD>
-						</TR>
-						<TR>
-							<TD VALIGN="top"><A HREF="man.7.html">man(7)</A></TD>
-							<TD VALIGN="top">
-								man language reference
-								<SPAN STYLE="font-size: smaller;">
-									(<A HREF="man.7.txt">text</A> | 
-									<A HREF="man.7.xhtml">xhtml</A> |
-									<A HREF="man.7.pdf">pdf</A> |
-									<A HREF="man.7.ps">postscript</A>)
-								</SPAN>
-							</TD>
-						</TR>
-						<TR>
-							<TD VALIGN="top"><A HREF="eqn.7.html">eqn(7)</A></TD>
-							<TD VALIGN="top">
-								eqn-mandoc language reference
-								<SPAN STYLE="font-size: smaller;">
-									(<A HREF="eqn.7.txt">text</A> | 
-									<A HREF="eqn.7.xhtml">xhtml</A> |
-									<A HREF="eqn.7.pdf">pdf</A> |
-									<A HREF="eqn.7.ps">postscript</A>)
-								</SPAN>
-							</TD>
-						</TR>
-						<TR>
-							<TD VALIGN="top"><A HREF="mandoc_char.7.html">mandoc_char(7)</A></TD>
-							<TD VALIGN="top">
-								mandoc special characters
-								<SPAN STYLE="font-size: smaller;">
-									(<A HREF="mandoc_char.7.txt">text</A> | 
-									<A HREF="mandoc_char.7.xhtml">xhtml</A> |
-									<A HREF="mandoc_char.7.pdf">pdf</A> |
-									<A HREF="mandoc_char.7.ps">postscript</A>)
-								</SPAN>
-							</TD>
-						</TR>
-						<TR>
-							<TD VALIGN="top"><A HREF="mdoc.7.html">mdoc(7)</A></TD>
-							<TD VALIGN="top">
-								mdoc language reference
-								<SPAN STYLE="font-size: smaller;">
-									(<A HREF="mdoc.7.txt">text</A> | 
-									<A HREF="mdoc.7.xhtml">xhtml</A> |
-									<A HREF="mdoc.7.pdf">pdf</A> |
-									<A HREF="mdoc.7.ps">postscript</A>)
-								</SPAN>
-							</TD>
-						</TR>
-						<TR>
-							<TD VALIGN="top"><A HREF="roff.7.html">roff(7)</A></TD>
-							<TD VALIGN="top">
-								roff-mandoc language reference
-								<SPAN STYLE="font-size: smaller;">
-									(<A HREF="roff.7.txt">text</A> | 
-									<A HREF="roff.7.xhtml">xhtml</A> |
-									<A HREF="roff.7.pdf">pdf</A> |
-									<A HREF="roff.7.ps">postscript</A>)
-								</SPAN>
-							</TD>
-						</TR>
-						<TR>
-							<TD VALIGN="top"><A HREF="tbl.7.html">tbl(7)</A></TD>
-							<TD VALIGN="top">
-								tbl-mandoc language reference
-								<SPAN STYLE="font-size: smaller;">
-									(<A HREF="tbl.7.txt">text</A> | 
-									<A HREF="tbl.7.xhtml">xhtml</A> |
-									<A HREF="tbl.7.pdf">pdf</A> |
-									<A HREF="tbl.7.ps">postscript</A>)
-								</SPAN>
-							</TD>
-						</TR>
-					</TBODY>
-					</TABLE>
+		<H1>
+			<A NAME="news">News</A>
+		</H1>
 
-					<P>
-					See <Q><A CLASS="external" HREF="http://manpages.bsd.lv">Writing UNIX Manual Pages</A></Q> for a general
-					introduction to manpages and mdoc.
-					</P>
-				</TD>
-			</TR>
-			<TR>
-				<TD>
-					<H1>
-					<A NAME="contact">CONTACT</A>
-					</H1>
+		<P CLASS="news">
+			26-05-2011: version 1.11.3
+		</P>
+		<P>
+			Introduce locale-encoding of output with the <B>-Tlocale</B> output option and Unicode escaped-character input.
+			See <A HREF="mandoc.1.html">mandoc</A> and <A HREF="mandoc_char.7.html">mandoc_char</A>, respectively, for details.
+			This allows for non-ASCII characters (e.g., <I>\[u5000]</I>) to be rendered in the locale's encoding, if said
+			environment supports wide-character encoding (if it does not, <B>-Tascii</B> is used instead).
+			Locale support can be turned off at compile time by removing <I>-DUSE_WCHAR</I> in the <I>Makefile</I>, in which case
+			<B>-Tlocale</B> is always a synonym for <B>-Tascii</B>.
+		</P>
+		<P>
+			Furthermore, multibyte-encoded documents, such as those in UTF-8, may be on-the-fly recoded into <A
+			HREF="mandoc.1.html">mandoc</A> input by using the newly-added <A HREF="preconv.1.html">preconv</A> utility.
+			Note: in the future, this feature may be integrated into <A HREF="mandoc.1.html">mandoc</A>.
+		</P>
 
-					<P>
-						Use the mailing lists for bug-reports, patches, questions, etc. (these require
-						subscription).
-						Please check the
-						<A HREF="http://mdocml.bsd.lv/cgi-bin/cvsweb/TODO?cvsroot=mdocml">TODO</A> for known issues
-						before posting.
-						Beyond that, contact Kristaps at <A
-						HREF="http://mailhide.recaptcha.net/d?k=01M6h_w7twDp58ZgH57eWC_w==&amp;c=Q2DBUt401ePlSeupJFrq_Q=="
-						TITLE="Reveal this e-mail address">kris...</A>@bsd.lv.
-					</P>
+		<P CLASS="news">
+			12-05-2011: version 1.11.2
+		</P>
+		<P>
+			Corrected some installation issues in version 1.11.1.
+			Further migration to <A HREF="mandoc.3.html">libmandoc</A>.
+			Initial public release (this utility is very much under development) of <A HREF="makewhatis.1.html">makewhatis</A>,
+			initially named mandoc-db.
+			This utility produces keyword databases of manual content
+			<A HREF="http://mdocml.bsd.lv/mandoc-cgi/index.html">mandoc-cgi</A>, which features semantic querying of manual content.
+		</P>
 
-					<TABLE WIDTH="100%" SUMMARY="Mailing Lists">
-						<COL WIDTH="175">
-						<COL>
-						<TBODY>
-							<TR>
-								<TD>
-									disc<A CLASS="external" TITLE="Reveal this e-mail address"
-									HREF="http://www.google.com/recaptcha/mailhide/d?k=01KQ80PFH5n3BBNpF5Gs4sRg==&amp;c=EV1QytpQqTHSItc2IXvZyocgYLPnG5K0JKw_gwMC9yc=">...</A>@mdocml.bsd.lv
-								</TD>
-								<TD>
-									bug-reports, general questions, and announcements 
-									<SPAN STYLE="font-size: smaller;">(<A HREF="/archives/discuss/summary.html">archive</A>)</SPAN>
-								</TD>
-							</TR>
-							<TR>
-								<TD>
-									tec<A CLASS="external" TITLE="Reveal this e-mail address"
-									HREF="http://www.google.com/recaptcha/mailhide/d?k=01qDX_iV0RlUOarEvb6mR28g==&amp;c=gRXsTjza0NNCFPaYu-Taj2tF0pmYZSc90EZkFkhkxgo=">...</A>@mdocml.bsd.lv
-								</TD>
-								<TD>
-									patches and system discussions 
-									<SPAN STYLE="font-size: smaller;">(<A HREF="/archives/tech/summary.html">archive</A>)</SPAN>
-								</TD>
-							</TR>
-							<TR>
-								<TD>
-									sou<A CLASS="external" TITLE="Reveal this e-mail address"
-									HREF="http://www.google.com/recaptcha/mailhide/d?k=01prQrAZhhl2EbIwVcRfABsQ==&amp;c=KtTW4Yic9xk-8g40KzJoca4fR3MYXv28g8NC6OQV-T8=">...</A>@mdocml.bsd.lv
-								</TD>
-								<TD>
-									source commit messages 
-									<SPAN STYLE="font-size: smaller;">(<A HREF="/archives/source/summary.html">archive</A>)</SPAN>
-								</TD>
-							</TR>
-						</TBODY>
-					</TABLE>
-				</TD>
-			</TR>
-			<TR>
-				<TD>
-					<H1>
-					<A NAME="news">NEWS</A>
-					</H1>
-					<DIV CLASS="news">
-						<P>
-							<SPAN CLASS="date">04-04-2011</SPAN>:
-							version 1.11.1
-						</P>
-						<P>
-							The earlier <I>libroff</I>, <I>libmdoc</I>, and <I>libman</I> soup have been merged into
-							a single library, <A HREF="mandoc.3.html">libmandoc</A>, which manages all aspects of
-							parsing real manuals (from line-handling to <A HREF="tbl.7.html">tbl</A> parsing).
-						</P>
-						<P>
-							Beyond this structural change, initial <A HREF="eqn.7.html">eqn</A> functionality is in
-							place.  For the time being, this is limited to the recognition of equation blocks;
-							future version of <SPAN CLASS="nm">mdocml</SPAN> will expand upon this framework.
-						</P>
-						<P>
-							As usual, many general fixes and improvements have also occured.  In particular, a great
-							deal of redundancy and superfluous code has been removed with the merging of the backend
-							libraries.
-						</P>
-					</DIV>
-					<DIV CLASS="news">
-						<P>
-							<SPAN CLASS="date">07-01-2011</SPAN>:
-							version 1.10.9
-						</P>
-						<P>
-							Many back-end fixes have been implemented: argument handling (quoting), <A
-							HREF="man.7.html">man</A> improvements, error/warning classes, and many more.
-						</P>
-						<P>
-							Initial <A HREF="tbl.7.html">tbl</A> functionality (see the <Q>TS</Q>, <Q>TE</Q>, and
-							<Q>T&amp;</Q> macros in the <A HREF="roff.7.html#x5c265453">roff</A> manual) has been
-							merged from <A CLASS="external" HREF="http://tbl.bsd.lv">tbl.bsd.lv</A>.  Output is
-							still minimal, especially for <SPAN CLASS="flag">-Thtml</SPAN> and <SPAN
-							CLASS="flag">-Txhtml</SPAN>, but manages to at least display data.  This means that <A
-							HREF="mandoc.1.html">mandoc</A> now has built-in support for two troff preprocessors via
-							<DEL>libroff</DEL>: soelim and tbl.
-						</P>
-					</DIV>
-					<DIV CLASS="news">
-						<P>
-							<SPAN CLASS="date">24-12-2010</SPAN>:
-							version 1.10.8
-						</P>
-						<P>
-							Significant improvements merged from <A CLASS="external"
-							HREF="http://www.openbsd.org">OpenBSD</A> downstream, including
-						</P>
-						<UL>
-							<LI>many new <A HREF="roff.7.html">roff</A> components,</LI>
-							<LI>in-line implementation of troff's soelim,</LI>
-							<LI>broken-block handling,</LI>
-							<LI>overhauled error classifications, and</LI>
-							<LI>cleaned up handling of error conditions.</LI>
-						</UL>
-						<P>
-							Also overhauled the <SPAN CLASS="flag">-Thtml</SPAN> and <SPAN
-							CLASS="flag">-Txhtml</SPAN> output modes.  They now display readable output in arbitrary
-							browsers, including text-based ones like <A CLASS="external"
-							HREF="http://lynx.isc.org">lynx</A>.  See HTML and XHTML manuals in the <A
-							HREF="#documentation">DOCUMENTATION</A> section for examples.  <SPAN
-							CLASS="attn">Attention: available style-sheet classes have been considerably
-							changed!</SPAN> See the <SPAN CLASS="file">example.style.css</SPAN> file for details.
-							Lastly, <DEL>libmdoc</DEL> and <DEL>libman</DEL> have been
-							cleaned up and reduced in size and complexity.
-						</P>
-					</DIV>
-					<P>
-						See <A HREF="http://mdocml.bsd.lv/cgi-bin/cvsweb/index.sgml?cvsroot=mdocml">cvsweb</A> for
-						historical notes.
-					</P>
-				</TD>
-			</TR>
-			<TR>
-				<TD>
-					<DIV CLASS="foot">
-						Copyright &#169; 2008&#8211;2011 Kristaps Dzonsons, $Date: 2011/04/04 21:07:20 $
-					</DIV>
-				</TD>
-			</TR>
-		</TBODY>
-	</TABLE>
+		<P CLASS="news">
+			04-04-2011: version 1.11.1
+		</P>
+		<P>
+			The earlier <I>libroff</I>, <I>libmdoc</I>, and <I>libman</I> soup have been merged into
+			a single library, <A HREF="mandoc.3.html">libmandoc</A>, which manages all aspects of
+			parsing real manuals (from line-handling to <A HREF="tbl.7.html">tbl</A> parsing).
+		</P>
+		<P>
+			Beyond this structural change, initial <A HREF="eqn.7.html">eqn</A> functionality is in
+			place.  For the time being, this is limited to the recognition of equation blocks;
+			future version of <SPAN CLASS="nm">mdocml</SPAN> will expand upon this framework.
+		</P>
+		<P>
+			As usual, many general fixes and improvements have also occurred.  In particular, a great
+			deal of redundancy and superfluous code has been removed with the merging of the backend
+			libraries.
+		</P>
+
+		<P>
+			See <A HREF="http://mdocml.bsd.lv/cgi-bin/cvsweb/index.sgml?cvsroot=mdocml">cvsweb</A> for
+			historical notes.
+		</P>
+
+		<P CLASS="foot">
+			<SMALL>
+				Copyright &#169; 2008&#8211;2011 
+				<A CLASS="external" HREF="http://kristaps.bsd.lv">Kristaps Dzonsons</A>, 
+				$Date: 2011/05/26 21:23:50 $
+			</SMALL>
+		</P>
 	</BODY>
 </HTML>
diff --git a/contrib/mdocml/libmandoc.h b/contrib/mdocml/libmandoc.h
index 3157f290f2..5f8379aff2 100644
--- a/contrib/mdocml/libmandoc.h
+++ b/contrib/mdocml/libmandoc.h
@@ -1,4 +1,4 @@
-/*	$Id: libmandoc.h,v 1.17 2011/03/28 23:52:13 kristaps Exp $ */
+/*	$Id: libmandoc.h,v 1.21 2011/05/14 16:06:09 kristaps Exp $ */
 /*
  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -73,13 +73,13 @@ void		 mandoc_msg(enum mandocerr, struct mparse *,
 			int, int, const char *);
 void		 mandoc_vmsg(enum mandocerr, struct mparse *, 
 			int, int, const char *, ...);
-int		 mandoc_special(char *);
 char		*mandoc_strdup(const char *);
 char		*mandoc_getarg(struct mparse *, char **, int, int *);
 char		*mandoc_normdate(struct mparse *, char *, int, int);
 int		 mandoc_eos(const char *, size_t, int);
 int		 mandoc_hyph(const char *, const char *);
 int		 mandoc_getcontrol(const char *, int *);
+int		 mandoc_strntou(const char *, size_t, int);
 
 void	 	 mdoc_free(struct mdoc *);
 struct	mdoc	*mdoc_alloc(struct regset *, struct mparse *);
diff --git a/contrib/mdocml/libmdoc.h b/contrib/mdocml/libmdoc.h
index 2a55eb37be..0e4b125f42 100644
--- a/contrib/mdocml/libmdoc.h
+++ b/contrib/mdocml/libmdoc.h
@@ -1,4 +1,4 @@
-/*	$Id: libmdoc.h,v 1.72 2011/03/22 14:33:05 kristaps Exp $ */
+/*	$Id: libmdoc.h,v 1.74 2011/04/19 16:38:48 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -62,20 +62,20 @@ struct	mdoc_macro {
 
 enum	margserr {
 	ARGS_ERROR,
-	ARGS_EOLN,
-	ARGS_WORD,
-	ARGS_PUNCT,
-	ARGS_QWORD,
-	ARGS_PHRASE,
-	ARGS_PPHRASE,
-	ARGS_PEND
+	ARGS_EOLN, /* end-of-line */
+	ARGS_WORD, /* normal word */
+	ARGS_PUNCT, /* series of punctuation */
+	ARGS_QWORD, /* quoted word */
+	ARGS_PHRASE, /* Ta'd phrase (-column) */
+	ARGS_PPHRASE, /* tabbed phrase (-column) */
+	ARGS_PEND /* last phrase (-column) */
 };
 
 enum	margverr {
 	ARGV_ERROR,
-	ARGV_EOLN,
-	ARGV_ARG,
-	ARGV_WORD
+	ARGV_EOLN, /* end of line */
+	ARGV_ARG, /* valid argument */
+	ARGV_WORD /* normal word (or bad argument---same thing) */
 };
 
 /*
@@ -133,14 +133,8 @@ void		  mdoc_argv_free(struct mdoc_arg *);
 enum margserr	  mdoc_args(struct mdoc *, int,
 			int *, char *, enum mdoct, char **);
 enum margserr	  mdoc_zargs(struct mdoc *, int, 
-			int *, char *, int, char **);
-#define	ARGS_DELIM	(1 << 1)
-#define	ARGS_TABSEP	(1 << 2)
-#define	ARGS_NOWARN	(1 << 3)
-
+			int *, char *, char **);
 int		  mdoc_macroend(struct mdoc *);
-
-#define	DELIMSZ	  6 /* hint: max possible size of a delimiter */
 enum mdelim	  mdoc_isdelim(const char *);
 
 __END_DECLS
diff --git a/contrib/mdocml/main.c b/contrib/mdocml/main.c
index bbbb88466a..4c2e7e790c 100644
--- a/contrib/mdocml/main.c
+++ b/contrib/mdocml/main.c
@@ -1,4 +1,4 @@
-/*	$Id: main.c,v 1.161 2011/03/31 10:53:43 kristaps Exp $ */
+/*	$Id: main.c,v 1.163 2011/05/20 15:51:18 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -43,6 +43,8 @@ typedef	void		(*out_free)(void *);
 
 enum	outt {
 	OUTT_ASCII = 0,	/* -Tascii */
+	OUTT_LOCALE,	/* -Tlocale */
+	OUTT_UTF8,	/* -Tutf8 */
 	OUTT_TREE,	/* -Ttree */
 	OUTT_HTML,	/* -Thtml */
 	OUTT_XHTML,	/* -Txhtml */
@@ -206,9 +208,19 @@ parse(struct curparse *curp, int fd,
 		switch (curp->outtype) {
 		case (OUTT_XHTML):
 			curp->outdata = xhtml_alloc(curp->outopts);
+			curp->outfree = html_free;
 			break;
 		case (OUTT_HTML):
 			curp->outdata = html_alloc(curp->outopts);
+			curp->outfree = html_free;
+			break;
+		case (OUTT_UTF8):
+			curp->outdata = utf8_alloc(curp->outopts);
+			curp->outfree = ascii_free;
+			break;
+		case (OUTT_LOCALE):
+			curp->outdata = locale_alloc(curp->outopts);
+			curp->outfree = ascii_free;
 			break;
 		case (OUTT_ASCII):
 			curp->outdata = ascii_alloc(curp->outopts);
@@ -232,7 +244,6 @@ parse(struct curparse *curp, int fd,
 		case (OUTT_XHTML):
 			curp->outman = html_man;
 			curp->outmdoc = html_mdoc;
-			curp->outfree = html_free;
 			break;
 		case (OUTT_TREE):
 			curp->outman = tree_man;
@@ -242,6 +253,10 @@ parse(struct curparse *curp, int fd,
 			/* FALLTHROUGH */
 		case (OUTT_ASCII):
 			/* FALLTHROUGH */
+		case (OUTT_UTF8):
+			/* FALLTHROUGH */
+		case (OUTT_LOCALE):
+			/* FALLTHROUGH */
 		case (OUTT_PS):
 			curp->outman = terminal_man;
 			curp->outmdoc = terminal_mdoc;
@@ -299,6 +314,10 @@ toptions(struct curparse *curp, char *arg)
 		curp->outtype = OUTT_TREE;
 	else if (0 == strcmp(arg, "html"))
 		curp->outtype = OUTT_HTML;
+	else if (0 == strcmp(arg, "utf8"))
+		curp->outtype = OUTT_UTF8;
+	else if (0 == strcmp(arg, "locale"))
+		curp->outtype = OUTT_LOCALE;
 	else if (0 == strcmp(arg, "xhtml"))
 		curp->outtype = OUTT_XHTML;
 	else if (0 == strcmp(arg, "ps"))
diff --git a/contrib/mdocml/main.h b/contrib/mdocml/main.h
index bb503eb79e..07b9e879fa 100644
--- a/contrib/mdocml/main.h
+++ b/contrib/mdocml/main.h
@@ -1,4 +1,4 @@
-/*	$Id: main.h,v 1.10 2010/07/31 23:52:58 schwarze Exp $ */
+/*	$Id: main.h,v 1.12 2011/05/20 15:48:22 kristaps Exp $ */
 /*
  * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -41,6 +41,8 @@ void		  html_free(void *);
 void		  tree_mdoc(void *, const struct mdoc *);
 void		  tree_man(void *, const struct man *);
 
+void		 *locale_alloc(char *);
+void		 *utf8_alloc(char *);
 void		 *ascii_alloc(char *);
 void		  ascii_free(void *);
 
diff --git a/contrib/mdocml/makewhatis.1 b/contrib/mdocml/makewhatis.1
new file mode 100644
index 0000000000..2eb385cd00
--- /dev/null
+++ b/contrib/mdocml/makewhatis.1
@@ -0,0 +1,152 @@
+.\"	$Id: makewhatis.1,v 1.2 2011/05/14 23:43:03 kristaps Exp $
+.\"
+.\" Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: May 14 2011 $
+.Dt MAKEWHATIS 1
+.Os
+.Sh NAME
+.Nm makewhatis
+.Nd index UNIX manuals
+.Sh SYNOPSIS
+.Nm
+.Op Fl d Ar dir
+.Ar
+.Sh DESCRIPTION
+The
+.Nm
+utility extracts keywords from
+.Ux
+manuals and indexes them for fast retrieval.
+The arguments are as follows:
+.Bl -tag -width Ds
+.It Fl d Ar dir
+The directory into which to write the keyword and index databases.
+.It Ar
+Read input from zero or more files in
+.Xr mdoc 7
+or
+.Xr man 7
+.Ux
+manual format.
+.El
+.Pp
+By default,
+.Nm
+constructs the
+.Sx Index Database
+and
+.Sx Keyword Database
+in the current working directory.
+.Pp
+If fatal parse errors are encountered, the offending file is printed to
+stderr, omitted from the index, and the parse continues with the next
+input file.
+.Ss Index Database
+The index database,
+.Pa mandoc.index ,
+is a
+.Xr recno 3
+database with record values consisting of
+.Pp
+.Bl -enum -compact
+.It
+a nil-terminated filename,
+.It
+a nil-terminated manual section,
+.It
+a nil-terminated manual title,
+.It
+a nil-terminated architecture
+.Pq this is not often available
+.It
+and a nil-terminated description.
+.El
+.Pp
+Both the manual section and description may be zero-length.
+Entries are sequentially-numbered, but the filenames are unordered.
+.Ss Keyword Database
+The keyword database,
+.Pa mandoc.db ,
+is a
+.Xr btree 3
+database of nil-terminated keywords (record length is non-zero string
+length plus one) mapping to a 8-byte binary field consisting of the
+keyword type and source
+.Sx Index Database
+record number.
+The type, an unsigned 32-bit integer in host order, is one of the
+following:
+.Pp
+.Bl -tag -width Ds -offset indent -compact
+.It Li 0x01
+The name of a manual page as given in the NAME section.
+.It Li 0x02
+A function prototype name as given in the SYNOPSIS section.
+.It Li 0x03
+A utility name as given in the SYNOPSIS section.
+.It Li 0x04
+An include file as given in the SYNOPSIS section.
+.It Li 0x05
+A variable name as given in the SYNOPSIS section.
+.It Li 0x06
+A standard as given in the STANDARDS section.
+.It Li 0x07
+An author as given in the AUTHORS section.
+.It Li 0x08
+A configuration as given in the SYNOPSIS section.
+.El
+.Pp
+If a value is encountered outside of this range, the database is
+corrupt.
+.Pp
+The latter four bytes are a host-ordered record number within the
+.Sx Index Database .
+.Pp
+The
+.Nm
+utility is
+.Ud
+.Sh FILES
+.Bl -tag -width Ds
+.It Pa mandoc.db
+A
+.Xr btree 3
+keyword database mapping keywords to a type and file reference in
+.Pa mandoc.index .
+.It Pa mandoc.db~
+Working copy of
+.Pa mandoc.db .
+.It Pa mandoc.index
+A
+.Xr recno 3
+database of indexed file-names.
+.It Pa mandoc.index~
+Working copy of
+.Pa mandoc.index .
+.El
+.Sh EXIT STATUS
+.Ex -std
+.Sh SEE ALSO
+.Xr mandoc 1
+.Sh AUTHORS
+The
+.Nm
+utility was written by
+.An Kristaps Dzonsons Aq kristaps@bsd.lv .
+.Sh CAVEATS
+Only
+.Xr mdoc 7
+manuals are processed.
diff --git a/contrib/mdocml/makewhatis.c b/contrib/mdocml/makewhatis.c
new file mode 100644
index 0000000000..01d0f6a0bd
--- /dev/null
+++ b/contrib/mdocml/makewhatis.c
@@ -0,0 +1,920 @@
+/*	$Id: makewhatis.c,v 1.2 2011/05/15 02:47:17 kristaps Exp $ */
+/*
+ * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/param.h>
+
+#include <assert.h>
+#ifdef __linux__
+# include <db_185.h>
+#else
+# include <db.h>
+#endif
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "man.h"
+#include "mdoc.h"
+#include "mandoc.h"
+
+#define	MANDOC_DB	 "mandoc.db"
+#define	MANDOC_IDX	 "mandoc.index"
+#define	MANDOC_BUFSZ	  BUFSIZ
+#define	MANDOC_FLAGS	  O_CREAT|O_TRUNC|O_RDWR
+
+enum	type {
+	MANDOC_NONE = 0,
+	MANDOC_NAME,
+	MANDOC_FUNCTION,
+	MANDOC_UTILITY,
+	MANDOC_INCLUDES,
+	MANDOC_VARIABLE,
+	MANDOC_STANDARD,
+	MANDOC_AUTHOR,
+	MANDOC_CONFIG
+};
+
+#define	MAN_ARGS	  DB *db, \
+			  const char *dbn, \
+			  DBT *key, size_t *ksz, \
+			  DBT *val, \
+			  DBT *rval, size_t *rsz, \
+			  const struct man_node *n
+#define	MDOC_ARGS	  DB *db, \
+			  const char *dbn, \
+			  DBT *key, size_t *ksz, \
+			  DBT *val, \
+			  DBT *rval, size_t *rsz, \
+			  const struct mdoc_node *n
+
+static	void		  dbt_append(DBT *, size_t *, const char *);
+static	void		  dbt_appendb(DBT *, size_t *, 
+				const void *, size_t);
+static	void		  dbt_init(DBT *, size_t *);
+static	void		  dbt_put(DB *, const char *, DBT *, DBT *);
+static	void		  usage(void);
+static	void		  pman(DB *, const char *, DBT *, size_t *, 
+				DBT *, DBT *, size_t *, struct man *);
+static	int		  pman_node(MAN_ARGS);
+static	void		  pmdoc(DB *, const char *, DBT *, size_t *, 
+				DBT *, DBT *, size_t *, struct mdoc *);
+static	void		  pmdoc_node(MDOC_ARGS);
+static	void		  pmdoc_An(MDOC_ARGS);
+static	void		  pmdoc_Cd(MDOC_ARGS);
+static	void		  pmdoc_Fd(MDOC_ARGS);
+static	void		  pmdoc_In(MDOC_ARGS);
+static	void		  pmdoc_Fn(MDOC_ARGS);
+static	void		  pmdoc_Fo(MDOC_ARGS);
+static	void		  pmdoc_Nd(MDOC_ARGS);
+static	void		  pmdoc_Nm(MDOC_ARGS);
+static	void		  pmdoc_St(MDOC_ARGS);
+static	void		  pmdoc_Vt(MDOC_ARGS);
+
+typedef	void		(*pmdoc_nf)(MDOC_ARGS);
+
+static	const char	 *progname;
+
+static	const pmdoc_nf	  mdocs[MDOC_MAX] = {
+	NULL, /* Ap */
+	NULL, /* Dd */
+	NULL, /* Dt */
+	NULL, /* Os */
+	NULL, /* Sh */ 
+	NULL, /* Ss */ 
+	NULL, /* Pp */ 
+	NULL, /* D1 */
+	NULL, /* Dl */
+	NULL, /* Bd */
+	NULL, /* Ed */
+	NULL, /* Bl */ 
+	NULL, /* El */
+	NULL, /* It */
+	NULL, /* Ad */ 
+	pmdoc_An, /* An */ 
+	NULL, /* Ar */
+	pmdoc_Cd, /* Cd */ 
+	NULL, /* Cm */
+	NULL, /* Dv */ 
+	NULL, /* Er */ 
+	NULL, /* Ev */ 
+	NULL, /* Ex */ 
+	NULL, /* Fa */ 
+	pmdoc_Fd, /* Fd */
+	NULL, /* Fl */
+	pmdoc_Fn, /* Fn */ 
+	NULL, /* Ft */ 
+	NULL, /* Ic */ 
+	pmdoc_In, /* In */ 
+	NULL, /* Li */
+	pmdoc_Nd, /* Nd */
+	pmdoc_Nm, /* Nm */
+	NULL, /* Op */
+	NULL, /* Ot */
+	NULL, /* Pa */
+	NULL, /* Rv */
+	pmdoc_St, /* St */ 
+	pmdoc_Vt, /* Va */
+	pmdoc_Vt, /* Vt */ 
+	NULL, /* Xr */ 
+	NULL, /* %A */
+	NULL, /* %B */
+	NULL, /* %D */
+	NULL, /* %I */
+	NULL, /* %J */
+	NULL, /* %N */
+	NULL, /* %O */
+	NULL, /* %P */
+	NULL, /* %R */
+	NULL, /* %T */
+	NULL, /* %V */
+	NULL, /* Ac */
+	NULL, /* Ao */
+	NULL, /* Aq */
+	NULL, /* At */ 
+	NULL, /* Bc */
+	NULL, /* Bf */
+	NULL, /* Bo */
+	NULL, /* Bq */
+	NULL, /* Bsx */
+	NULL, /* Bx */
+	NULL, /* Db */
+	NULL, /* Dc */
+	NULL, /* Do */
+	NULL, /* Dq */
+	NULL, /* Ec */
+	NULL, /* Ef */ 
+	NULL, /* Em */ 
+	NULL, /* Eo */
+	NULL, /* Fx */
+	NULL, /* Ms */ 
+	NULL, /* No */
+	NULL, /* Ns */
+	NULL, /* Nx */
+	NULL, /* Ox */
+	NULL, /* Pc */
+	NULL, /* Pf */
+	NULL, /* Po */
+	NULL, /* Pq */
+	NULL, /* Qc */
+	NULL, /* Ql */
+	NULL, /* Qo */
+	NULL, /* Qq */
+	NULL, /* Re */
+	NULL, /* Rs */
+	NULL, /* Sc */
+	NULL, /* So */
+	NULL, /* Sq */
+	NULL, /* Sm */ 
+	NULL, /* Sx */
+	NULL, /* Sy */
+	NULL, /* Tn */
+	NULL, /* Ux */
+	NULL, /* Xc */
+	NULL, /* Xo */
+	pmdoc_Fo, /* Fo */ 
+	NULL, /* Fc */ 
+	NULL, /* Oo */
+	NULL, /* Oc */
+	NULL, /* Bk */
+	NULL, /* Ek */
+	NULL, /* Bt */
+	NULL, /* Hf */
+	NULL, /* Fr */
+	NULL, /* Ud */
+	NULL, /* Lb */
+	NULL, /* Lp */ 
+	NULL, /* Lk */ 
+	NULL, /* Mt */ 
+	NULL, /* Brq */ 
+	NULL, /* Bro */ 
+	NULL, /* Brc */ 
+	NULL, /* %C */
+	NULL, /* Es */
+	NULL, /* En */
+	NULL, /* Dx */
+	NULL, /* %Q */
+	NULL, /* br */
+	NULL, /* sp */
+	NULL, /* %U */
+	NULL, /* Ta */
+};
+
+int
+main(int argc, char *argv[])
+{
+	struct mparse	*mp; /* parse sequence */
+	struct mdoc	*mdoc; /* resulting mdoc */
+	struct man	*man; /* resulting man */
+	char		*fn; /* current file being parsed */
+	const char	*msec, /* manual section */
+	      	 	*mtitle, /* manual title */
+			*arch, /* manual architecture */
+	      		*dir; /* result dir (default: cwd) */
+	char		 ibuf[MAXPATHLEN], /* index fname */
+			 ibbuf[MAXPATHLEN], /* index backup fname */
+			 fbuf[MAXPATHLEN],  /* btree fname */
+			 fbbuf[MAXPATHLEN]; /* btree backup fname */
+	int		 ch;
+	DB		*idx, /* index database */
+			*db; /* keyword database */
+	DBT		 rkey, rval, /* recno entries */
+			 key, val; /* persistent keyword entries */
+	size_t		 sv,
+			 ksz, rsz; /* entry buffer size */
+	char		 vbuf[8]; /* stringified record number */
+	BTREEINFO	 info; /* btree configuration */
+	recno_t		 rec; /* current record number */
+	extern int	 optind;
+	extern char	*optarg;
+
+	progname = strrchr(argv[0], '/');
+	if (progname == NULL)
+		progname = argv[0];
+	else
+		++progname;
+
+	dir = "";
+
+	while (-1 != (ch = getopt(argc, argv, "d:")))
+		switch (ch) {
+		case ('d'):
+			dir = optarg;
+			break;
+		default:
+			usage();
+			return((int)MANDOCLEVEL_BADARG);
+		}
+
+	argc -= optind;
+	argv += optind;
+
+	/*
+	 * Set up temporary file-names into which we're going to write
+	 * all of our data (both for the index and database).  These
+	 * will be securely renamed to the real file-names after we've
+	 * written all of our data.
+	 */
+
+	ibuf[0] = ibuf[MAXPATHLEN - 2] =
+		ibbuf[0] = ibbuf[MAXPATHLEN - 2] = 
+		fbuf[0] = fbuf[MAXPATHLEN - 2] = 
+		fbbuf[0] = fbbuf[MAXPATHLEN - 2] = '\0';
+
+	strlcat(fbuf, dir, MAXPATHLEN);
+	strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
+
+	strlcat(fbbuf, fbuf, MAXPATHLEN);
+	strlcat(fbbuf, "~", MAXPATHLEN);
+
+	strlcat(ibuf, dir, MAXPATHLEN);
+	strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
+
+	strlcat(ibbuf, ibuf, MAXPATHLEN);
+	strlcat(ibbuf, "~", MAXPATHLEN);
+
+	if ('\0' != fbuf[MAXPATHLEN - 2] ||
+			'\0' != fbbuf[MAXPATHLEN - 2] ||
+			'\0' != ibuf[MAXPATHLEN - 2] ||
+			'\0' != ibbuf[MAXPATHLEN - 2]) {
+		fprintf(stderr, "%s: Path too long\n", progname);
+		exit((int)MANDOCLEVEL_SYSERR);
+	}
+
+	/*
+	 * For the keyword database, open a BTREE database that allows
+	 * duplicates.  For the index database, use a standard RECNO
+	 * database type.
+	 */
+
+	memset(&info, 0, sizeof(BTREEINFO));
+	info.flags = R_DUP;
+	db = dbopen(fbbuf, MANDOC_FLAGS, 0644, DB_BTREE, &info);
+
+	if (NULL == db) {
+		perror(fbbuf);
+		exit((int)MANDOCLEVEL_SYSERR);
+	}
+
+	idx = dbopen(ibbuf, MANDOC_FLAGS, 0644, DB_RECNO, NULL);
+
+	if (NULL == db) {
+		perror(ibbuf);
+		(*db->close)(db);
+		exit((int)MANDOCLEVEL_SYSERR);
+	}
+
+	/*
+	 * Try parsing the manuals given on the command line.  If we
+	 * totally fail, then just keep on going.  Take resulting trees
+	 * and push them down into the database code.
+	 * Use the auto-parser and don't report any errors.
+	 */
+
+	mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
+
+	memset(&key, 0, sizeof(DBT));
+	memset(&val, 0, sizeof(DBT));
+	memset(&rkey, 0, sizeof(DBT));
+	memset(&rval, 0, sizeof(DBT));
+
+	val.size = sizeof(vbuf);
+	val.data = vbuf;
+	rkey.size = sizeof(recno_t);
+
+	rec = 1;
+	ksz = rsz = 0;
+
+	while (NULL != (fn = *argv++)) {
+		mparse_reset(mp);
+
+		/* Parse and get (non-empty) AST. */
+
+		if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) {
+			fprintf(stderr, "%s: Parse failure\n", fn);
+			continue;
+		}
+		mparse_result(mp, &mdoc, &man);
+		if (NULL == mdoc && NULL == man)
+			continue;
+
+		/* Manual section: can be empty string. */
+
+		msec = NULL != mdoc ? 
+			mdoc_meta(mdoc)->msec :
+			man_meta(man)->msec;
+		mtitle = NULL != mdoc ? 
+			mdoc_meta(mdoc)->title :
+			man_meta(man)->title;
+		arch = NULL != mdoc ? mdoc_meta(mdoc)->arch : NULL;
+
+		assert(msec);
+		assert(mtitle);
+
+		/* 
+		 * The index record value consists of a nil-terminated
+		 * filename, a nil-terminated manual section, and a
+		 * nil-terminated description.  Since the description
+		 * may not be set, we set a sentinel to see if we're
+		 * going to write a nil byte in its place.
+		 */
+
+		dbt_init(&rval, &rsz);
+		dbt_appendb(&rval, &rsz, fn, strlen(fn) + 1);
+		dbt_appendb(&rval, &rsz, msec, strlen(msec) + 1);
+		dbt_appendb(&rval, &rsz, mtitle, strlen(mtitle) + 1);
+		dbt_appendb(&rval, &rsz, arch ? arch : "", 
+				arch ? strlen(arch) + 1 : 1);
+
+		sv = rval.size;
+
+		/* Fix the record number in the btree value. */
+
+		memset(val.data, 0, sizeof(uint32_t));
+		memcpy(val.data + 4, &rec, sizeof(uint32_t));
+
+		if (mdoc)
+			pmdoc(db, fbbuf, &key, &ksz, 
+				&val, &rval, &rsz, mdoc);
+		else 
+			pman(db, fbbuf, &key, &ksz, 
+				&val, &rval, &rsz, man);
+		
+		/*
+		 * Apply this to the index.  If we haven't had a
+		 * description set, put an empty one in now.
+		 */
+
+		if (rval.size == sv)
+			dbt_appendb(&rval, &rsz, "", 1);
+
+		rkey.data = &rec;
+		dbt_put(idx, ibbuf, &rkey, &rval);
+
+		printf("Indexed: %s\n", fn);
+		rec++;
+	}
+
+	(*db->close)(db);
+	(*idx->close)(idx);
+
+	mparse_free(mp);
+
+	free(key.data);
+	free(rval.data);
+
+	/* Atomically replace the file with our temporary one. */
+
+	if (-1 == rename(fbbuf, fbuf))
+		perror(fbuf);
+	if (-1 == rename(ibbuf, ibuf))
+		perror(fbuf);
+
+	return((int)MANDOCLEVEL_OK);
+}
+
+/*
+ * Initialise the stored database key whose data buffer is shared
+ * between uses (as the key must sometimes be constructed from an array
+ * of 
+ */
+static void
+dbt_init(DBT *key, size_t *ksz)
+{
+
+	if (0 == *ksz) {
+		assert(0 == key->size);
+		assert(NULL == key->data);
+		key->data = mandoc_malloc(MANDOC_BUFSZ);
+		*ksz = MANDOC_BUFSZ;
+	}
+
+	key->size = 0;
+}
+
+/*
+ * Append a binary value to a database entry.  This can be invoked
+ * multiple times; the buffer is automatically resized.
+ */
+static void
+dbt_appendb(DBT *key, size_t *ksz, const void *cp, size_t sz)
+{
+
+	assert(key->data);
+
+	/* Overshoot by MANDOC_BUFSZ. */
+
+	while (key->size + sz >= *ksz) {
+		*ksz = key->size + sz + MANDOC_BUFSZ;
+		key->data = mandoc_realloc(key->data, *ksz);
+	}
+
+	memcpy(key->data + (int)key->size, cp, sz);
+	key->size += sz;
+}
+
+/*
+ * Append a nil-terminated string to the database entry.  This can be
+ * invoked multiple times.  The database entry will be nil-terminated as
+ * well; if invoked multiple times, a space is put between strings.
+ */
+static void
+dbt_append(DBT *key, size_t *ksz, const char *cp)
+{
+	size_t		 sz;
+
+	if (0 == (sz = strlen(cp)))
+		return;
+
+	assert(key->data);
+
+	if (key->size)
+		((char *)key->data)[(int)key->size - 1] = ' ';
+
+	dbt_appendb(key, ksz, cp, sz + 1);
+}
+
+/* ARGSUSED */
+static void
+pmdoc_An(MDOC_ARGS)
+{
+	uint32_t	 fl;
+	
+	if (SEC_AUTHORS != n->sec)
+		return;
+
+	for (n = n->child; n; n = n->next)
+		if (MDOC_TEXT == n->type)
+			dbt_append(key, ksz, n->string);
+
+	fl = (uint32_t)MANDOC_AUTHOR;
+	memcpy(val->data, &fl, 4);
+}
+
+/* ARGSUSED */
+static void
+pmdoc_Fd(MDOC_ARGS)
+{
+	uint32_t	 fl;
+	const char	*start, *end;
+	size_t		 sz;
+	
+	if (SEC_SYNOPSIS != n->sec)
+		return;
+	if (NULL == (n = n->child) || MDOC_TEXT != n->type)
+		return;
+
+	/*
+	 * Only consider those `Fd' macro fields that begin with an
+	 * "inclusion" token (versus, e.g., #define).
+	 */
+	if (strcmp("#include", n->string))
+		return;
+
+	if (NULL == (n = n->next) || MDOC_TEXT != n->type)
+		return;
+
+	/*
+	 * Strip away the enclosing angle brackets and make sure we're
+	 * not zero-length.
+	 */
+
+	start = n->string;
+	if ('<' == *start || '"' == *start)
+		start++;
+
+	if (0 == (sz = strlen(start)))
+		return;
+
+	end = &start[(int)sz - 1];
+	if ('>' == *end || '"' == *end)
+		end--;
+
+	assert(end >= start);
+	dbt_appendb(key, ksz, start, (size_t)(end - start + 1));
+	dbt_appendb(key, ksz, "", 1);
+
+	fl = (uint32_t)MANDOC_INCLUDES;
+	memcpy(val->data, &fl, 4);
+}
+
+/* ARGSUSED */
+static void
+pmdoc_Cd(MDOC_ARGS)
+{
+	uint32_t	 fl;
+	
+	if (SEC_SYNOPSIS != n->sec)
+		return;
+
+	for (n = n->child; n; n = n->next)
+		if (MDOC_TEXT == n->type)
+			dbt_append(key, ksz, n->string);
+
+	fl = (uint32_t)MANDOC_CONFIG;
+	memcpy(val->data, &fl, 4);
+}
+
+/* ARGSUSED */
+static void
+pmdoc_In(MDOC_ARGS)
+{
+	uint32_t	 fl;
+	
+	if (SEC_SYNOPSIS != n->sec)
+		return;
+	if (NULL == n->child || MDOC_TEXT != n->child->type)
+		return;
+
+	dbt_append(key, ksz, n->child->string);
+	fl = (uint32_t)MANDOC_INCLUDES;
+	memcpy(val->data, &fl, 4);
+}
+
+/* ARGSUSED */
+static void
+pmdoc_Fn(MDOC_ARGS)
+{
+	uint32_t	 fl;
+	const char	*cp;
+	
+	if (SEC_SYNOPSIS != n->sec)
+		return;
+	if (NULL == n->child || MDOC_TEXT != n->child->type)
+		return;
+
+	/* .Fn "struct type *arg" "foo" */
+
+	cp = strrchr(n->child->string, ' ');
+	if (NULL == cp)
+		cp = n->child->string;
+
+	/* Strip away pointer symbol. */
+
+	while ('*' == *cp)
+		cp++;
+
+	dbt_append(key, ksz, cp);
+	fl = (uint32_t)MANDOC_FUNCTION;
+	memcpy(val->data, &fl, 4);
+}
+
+/* ARGSUSED */
+static void
+pmdoc_St(MDOC_ARGS)
+{
+	uint32_t	 fl;
+	
+	if (SEC_STANDARDS != n->sec)
+		return;
+	if (NULL == n->child || MDOC_TEXT != n->child->type)
+		return;
+
+	dbt_append(key, ksz, n->child->string);
+	fl = (uint32_t)MANDOC_STANDARD;
+	memcpy(val->data, &fl, 4);
+}
+
+/* ARGSUSED */
+static void
+pmdoc_Vt(MDOC_ARGS)
+{
+	uint32_t	 fl;
+	const char	*start;
+	size_t		 sz;
+	
+	if (SEC_SYNOPSIS != n->sec)
+		return;
+	if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
+		return;
+	if (NULL == n->last || MDOC_TEXT != n->last->type)
+		return;
+
+	/*
+	 * Strip away leading pointer symbol '*' and trailing ';'.
+	 */
+
+	start = n->last->string;
+
+	while ('*' == *start)
+		start++;
+
+	if (0 == (sz = strlen(start)))
+		return;
+
+	if (';' == start[(int)sz - 1])
+		sz--;
+
+	if (0 == sz)
+		return;
+
+	dbt_appendb(key, ksz, start, sz);
+	dbt_appendb(key, ksz, "", 1);
+
+	fl = (uint32_t)MANDOC_VARIABLE;
+	memcpy(val->data, &fl, 4);
+}
+
+/* ARGSUSED */
+static void
+pmdoc_Fo(MDOC_ARGS)
+{
+	uint32_t	 fl;
+	
+	if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
+		return;
+	if (NULL == n->child || MDOC_TEXT != n->child->type)
+		return;
+
+	dbt_append(key, ksz, n->child->string);
+	fl = (uint32_t)MANDOC_FUNCTION;
+	memcpy(val->data, &fl, 4);
+}
+
+
+/* ARGSUSED */
+static void
+pmdoc_Nd(MDOC_ARGS)
+{
+	int		 first;
+	
+	for (first = 1, n = n->child; n; n = n->next) {
+		if (MDOC_TEXT != n->type)
+			continue;
+		if (first) 
+			dbt_appendb(rval, rsz, n->string, strlen(n->string) + 1);
+		else
+			dbt_append(rval, rsz, n->string);
+		first = 0;
+	}
+}
+
+/* ARGSUSED */
+static void
+pmdoc_Nm(MDOC_ARGS)
+{
+	uint32_t	 fl;
+	
+	if (SEC_NAME == n->sec) {
+		for (n = n->child; n; n = n->next) {
+			if (MDOC_TEXT != n->type)
+				continue;
+			dbt_append(key, ksz, n->string);
+		}
+		fl = (uint32_t)MANDOC_NAME;
+		memcpy(val->data, &fl, 4);
+		return;
+	} else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
+		return;
+
+	for (n = n->child; n; n = n->next) {
+		if (MDOC_TEXT != n->type)
+			continue;
+		dbt_append(key, ksz, n->string);
+	}
+
+	fl = (uint32_t)MANDOC_UTILITY;
+	memcpy(val->data, &fl, 4);
+}
+
+static void
+dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
+{
+
+	if (0 == key->size)
+		return;
+
+	assert(key->data);
+	assert(val->size);
+	assert(val->data);
+
+	if (0 == (*db->put)(db, key, val, 0))
+		return;
+	
+	perror(dbn);
+	exit((int)MANDOCLEVEL_SYSERR);
+	/* NOTREACHED */
+}
+
+/*
+ * Call out to per-macro handlers after clearing the persistent database
+ * key.  If the macro sets the database key, flush it to the database.
+ */
+static void
+pmdoc_node(MDOC_ARGS)
+{
+
+	if (NULL == n)
+		return;
+
+	switch (n->type) {
+	case (MDOC_HEAD):
+		/* FALLTHROUGH */
+	case (MDOC_BODY):
+		/* FALLTHROUGH */
+	case (MDOC_TAIL):
+		/* FALLTHROUGH */
+	case (MDOC_BLOCK):
+		/* FALLTHROUGH */
+	case (MDOC_ELEM):
+		if (NULL == mdocs[n->tok])
+			break;
+
+		dbt_init(key, ksz);
+
+		(*mdocs[n->tok])(db, dbn, key, ksz, val, rval, rsz, n);
+		dbt_put(db, dbn, key, val);
+		break;
+	default:
+		break;
+	}
+
+	pmdoc_node(db, dbn, key, ksz, val, rval, rsz, n->child);
+	pmdoc_node(db, dbn, key, ksz, val, rval, rsz, n->next);
+}
+
+static int
+pman_node(MAN_ARGS)
+{
+	const struct man_node *head, *body;
+	const char	*start, *sv;
+	size_t		 sz;
+	uint32_t	 fl;
+
+	if (NULL == n)
+		return(0);
+
+	/*
+	 * We're only searching for one thing: the first text child in
+	 * the BODY of a NAME section.  Since we don't keep track of
+	 * sections in -man, run some hoops to find out whether we're in
+	 * the correct section or not.
+	 */
+
+	if (MAN_BODY == n->type && MAN_SH == n->tok) {
+		body = n;
+		assert(body->parent);
+		if (NULL != (head = body->parent->head) &&
+				1 == head->nchild &&
+				NULL != (head = (head->child)) &&
+				MAN_TEXT == head->type &&
+				0 == strcmp(head->string, "NAME") &&
+				NULL != (body = body->child) &&
+				MAN_TEXT == body->type) {
+
+			fl = (uint32_t)MANDOC_NAME;
+			memcpy(val->data, &fl, 4);
+
+			assert(body->string);
+			start = sv = body->string;
+
+			/* 
+			 * Go through a special heuristic dance here.
+			 * This is why -man manuals are great!
+			 * (I'm being sarcastic: my eyes are bleeding.)
+			 * Conventionally, one or more manual names are
+			 * comma-specified prior to a whitespace, then a
+			 * dash, then a description.  Try to puzzle out
+			 * the name parts here.
+			 */
+
+			for ( ;; ) {
+				sz = strcspn(start, " ,");
+				if ('\0' == start[(int)sz])
+					break;
+
+				dbt_init(key, ksz);
+				dbt_appendb(key, ksz, start, sz);
+				dbt_appendb(key, ksz, "", 1);
+
+				dbt_put(db, dbn, key, val);
+
+				if (' ' == start[(int)sz]) {
+					start += (int)sz + 1;
+					break;
+				}
+
+				assert(',' == start[(int)sz]);
+				start += (int)sz + 1;
+				while (' ' == *start)
+					start++;
+			}
+
+			if (sv == start) {
+				dbt_init(key, ksz);
+				dbt_append(key, ksz, start);
+				return(1);
+			}
+
+			while (' ' == *start)
+				start++;
+
+			if (0 == strncmp(start, "-", 1))
+				start += 1;
+			else if (0 == strncmp(start, "\\-", 2))
+				start += 2;
+			else if (0 == strncmp(start, "\\(en", 4))
+				start += 4;
+			else if (0 == strncmp(start, "\\(em", 4))
+				start += 4;
+
+			while (' ' == *start)
+				start++;
+
+			dbt_appendb(rval, rsz, start, strlen(start) + 1);
+		}
+	}
+
+	if (pman_node(db, dbn, key, ksz, val, rval, rsz, n->child))
+		return(1);
+	if (pman_node(db, dbn, key, ksz, val, rval, rsz, n->next))
+		return(1);
+
+	return(0);
+}
+
+static void
+pman(DB *db, const char *dbn, DBT *key, size_t *ksz, 
+		DBT *val, DBT *rval, size_t *rsz, struct man *m)
+{
+
+	pman_node(db, dbn, key, ksz, val, rval, rsz, man_node(m));
+}
+
+
+static void
+pmdoc(DB *db, const char *dbn, DBT *key, size_t *ksz, 
+		DBT *val, DBT *rval, size_t *rsz, struct mdoc *m)
+{
+
+	pmdoc_node(db, dbn, key, ksz, val, rval, rsz, mdoc_node(m));
+}
+
+static void
+usage(void)
+{
+
+	fprintf(stderr, "usage: %s "
+			"[-d path] "
+			"[file...]\n", 
+			progname);
+}
diff --git a/contrib/mdocml/man.7 b/contrib/mdocml/man.7
index 876f32a312..8aa1f2c0ec 100644
--- a/contrib/mdocml/man.7
+++ b/contrib/mdocml/man.7
@@ -1,4 +1,4 @@
-.\"	$Id: man.7,v 1.99 2011/03/07 01:35:51 schwarze Exp $
+.\"	$Id: man.7,v 1.100 2011/05/26 09:26:16 kristaps Exp $
 .\"
 .\" Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
 .\"
@@ -14,7 +14,7 @@
 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 .\"
-.Dd $Mdocdate: March 7 2011 $
+.Dd $Mdocdate: May 26 2011 $
 .Dt MAN 7
 .Os
 .Sh NAME
@@ -205,16 +205,20 @@ appears as the first macro.
 Beyond
 .Sx \&TH ,
 at least one macro or text node must appear in the document.
-Documents are generally structured as follows:
+.Pp
+The following is a well-formed skeleton
+.Nm
+file for a utility
+.Qq progname :
 .Bd -literal -offset indent
-\&.TH FOO 1 2009-10-10
+\&.TH PROGNAME 1 2009-10-10
 \&.SH NAME
-\efBfoo\efR \e(en a description goes here
+\efBprogname\efR \e(en a description goes here
 \&.\e\*q .SH LIBRARY
 \&.\e\*q For sections 2 & 3 only.
 \&.\e\*q Not used in OpenBSD.
 \&.SH SYNOPSIS
-\efBfoo\efR [\efB\e-options\efR] arguments...
+\efBprogname\efR [\efB\e-options\efR] arguments...
 \&.SH DESCRIPTION
 The \efBfoo\efR utility processes files...
 \&.\e\*q .SH IMPLEMENTATION NOTES
diff --git a/contrib/mdocml/man_html.c b/contrib/mdocml/man_html.c
index 610e58fdbd..73953ecdd2 100644
--- a/contrib/mdocml/man_html.c
+++ b/contrib/mdocml/man_html.c
@@ -1,4 +1,4 @@
-/*	$Id: man_html.c,v 1.70 2011/03/07 01:35:51 schwarze Exp $ */
+/*	$Id: man_html.c,v 1.72 2011/05/17 11:34:31 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -157,9 +157,7 @@ print_man_head(MAN_ARGS)
 {
 
 	print_gen_head(h);
-	bufinit(h);
-	buffmt(h, "%s(%s)", m->title, m->msec);
-
+	bufcat_fmt(h, "%s(%s)", m->title, m->msec);
 	print_otag(h, TAG_TITLE, 0, NULL);
 	print_text(h, h->buf);
 }
@@ -184,7 +182,6 @@ print_man_node(MAN_ARGS)
 
 	child = 1;
 	t = h->tags.head;
-	bufinit(h);
 
 	switch (n->type) {
 	case (MAN_ROOT):
@@ -259,8 +256,6 @@ print_man_node(MAN_ARGS)
 	/* This will automatically close out any font scope. */
 	print_stagq(h, t);
 
-	bufinit(h);
-
 	switch (n->type) {
 	case (MAN_ROOT):
 		man_root_post(m, n, mh, h);
@@ -401,6 +396,7 @@ man_br_pre(MAN_ARGS)
 	} else
 		su.scale = 0;
 
+	bufinit(h);
 	bufcat_su(h, "height", &su);
 	PAIR_STYLE_INIT(&tag, h);
 	print_otag(h, TAG_DIV, 1, &tag);
@@ -569,6 +565,7 @@ man_IP_pre(MAN_ARGS)
 	if (MAN_BLOCK == n->type) {
 		print_otag(h, TAG_P, 0, NULL);
 		print_otag(h, TAG_TABLE, 0, NULL);
+		bufinit(h);
 		bufcat_su(h, "width", &su);
 		PAIR_STYLE_INIT(&tag, h);
 		print_otag(h, TAG_COL, 1, &tag);
@@ -604,6 +601,8 @@ man_HP_pre(MAN_ARGS)
 	struct roffsu	 su;
 	const struct man_node *np;
 
+	bufinit(h);
+
 	np = MAN_BLOCK == n->type ? 
 		n->head->child : 
 		n->parent->head->child;
@@ -704,6 +703,7 @@ man_RS_pre(MAN_ARGS)
 	if (n->head->child)
 		a2width(n->head->child, &su);
 
+	bufinit(h);
 	bufcat_su(h, "margin-left", &su);
 	PAIR_STYLE_INIT(&tag, h);
 	print_otag(h, TAG_DIV, 1, &tag);
diff --git a/contrib/mdocml/man_macro.c b/contrib/mdocml/man_macro.c
index b3212e6806..915648b430 100644
--- a/contrib/mdocml/man_macro.c
+++ b/contrib/mdocml/man_macro.c
@@ -1,4 +1,4 @@
-/*	$Id: man_macro.c,v 1.60 2011/03/23 15:33:57 kristaps Exp $ */
+/*	$Id: man_macro.c,v 1.62 2011/04/19 16:38:48 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  *
diff --git a/contrib/mdocml/man_term.c b/contrib/mdocml/man_term.c
index cb0b08d7e6..38ceeabdbd 100644
--- a/contrib/mdocml/man_term.c
+++ b/contrib/mdocml/man_term.c
@@ -1,4 +1,4 @@
-/*	$Id: man_term.c,v 1.105 2011/03/22 10:13:01 kristaps Exp $ */
+/*	$Id: man_term.c,v 1.109 2011/05/17 14:38:34 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -156,14 +156,7 @@ terminal_man(void *arg, const struct man *man)
 	p->tabwidth = term_len(p, 5);
 
 	if (NULL == p->symtab)
-		switch (p->enc) {
-		case (TERMENC_ASCII):
-			p->symtab = chars_init(CHARS_ASCII);
-			break;
-		default:
-			abort();
-			/* NOTREACHED */
-		}
+		p->symtab = mchars_alloc();
 
 	n = man_node(man);
 	m = man_meta(man);
diff --git a/contrib/mdocml/man_validate.c b/contrib/mdocml/man_validate.c
index 03bb120f56..e0c882d49b 100644
--- a/contrib/mdocml/man_validate.c
+++ b/contrib/mdocml/man_validate.c
@@ -1,4 +1,4 @@
-/*	$Id: man_validate.c,v 1.67 2011/03/22 15:30:30 kristaps Exp $ */
+/*	$Id: man_validate.c,v 1.69 2011/04/13 09:57:08 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
@@ -54,7 +54,7 @@ static	int	  check_par(CHKARGS);
 static	int	  check_part(CHKARGS);
 static	int	  check_root(CHKARGS);
 static	int	  check_sec(CHKARGS);
-static	int	  check_text(CHKARGS);
+static	void	  check_text(CHKARGS);
 
 static	int	  post_AT(CHKARGS);
 static	int	  post_fi(CHKARGS);
@@ -151,7 +151,8 @@ man_valid_post(struct man *m)
 
 	switch (m->last->type) {
 	case (MAN_TEXT): 
-		return(check_text(m, m->last));
+		check_text(m, m->last);
+		return(1);
 	case (MAN_ROOT):
 		return(check_root(m, m->last));
 	case (MAN_EQN):
@@ -204,43 +205,48 @@ check_root(CHKARGS)
 	return(1);
 }
 
-
-static int
+static void
 check_text(CHKARGS) 
 {
-	char		*p;
-	int		 pos, c;
+	char		*p, *pp, *cpp;
+	int		 pos;
 	size_t		 sz;
 
-	for (p = n->string, pos = n->pos + 1; *p; p++, pos++) {
-		sz = strcspn(p, "\t\\");
-		p += (int)sz;
+	p = n->string;
+	pos = n->pos + 1;
 
-		if ('\0' == *p)
-			break;
+	while ('\0' != *p) {
+		sz = strcspn(p, "\t\\");
 
+		p += (int)sz;
 		pos += (int)sz;
 
 		if ('\t' == *p) {
-			if (MAN_LITERAL & m->flags)
-				continue;
-			man_pmsg(m, n->line, pos, MANDOCERR_BADTAB);
+			if ( ! (MAN_LITERAL & m->flags))
+				man_pmsg(m, n->line, pos, MANDOCERR_BADTAB);
+			p++;
+			pos++;
 			continue;
-		}
+		} else if ('\0' == *p)
+			break;
 
-		/* Check the special character. */
+		pos++;
+		pp = ++p;
 
-		c = mandoc_special(p);
-		if (c) {
-			p += c - 1;
-			pos += c - 1;
-		} else
+		if (ESCAPE_ERROR == mandoc_escape
+				((const char **)&pp, NULL, NULL)) {
 			man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE);
-	}
+			break;
+		}
 
-	return(1);
-}
+		cpp = p;
+		while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp)))
+			*cpp = '-';
 
+		pos += pp - p;
+		p = pp;
+	}
+}
 
 #define	INEQ_DEFINE(x, ineq, name) \
 static int \
@@ -319,14 +325,11 @@ static int
 check_sec(CHKARGS)
 {
 
-	if (MAN_HEAD == n->type && 0 == n->nchild) {
-		man_nmsg(m, n, MANDOCERR_SYNTARGCOUNT);
-		return(0);
-	} else if (MAN_BODY == n->type && 0 == n->nchild)
-		mandoc_msg(MANDOCERR_ARGCWARN, m->parse, n->line, 
-				n->pos, "want children (have none)");
+	if ( ! (MAN_HEAD == n->type && 0 == n->nchild)) 
+		return(1);
 
-	return(1);
+	man_nmsg(m, n, MANDOCERR_SYNTARGCOUNT);
+	return(0);
 }
 
 
diff --git a/contrib/mdocml/mandoc.1 b/contrib/mdocml/mandoc.1
index 91cb8fe890..7cf9ca5c9b 100644
--- a/contrib/mdocml/mandoc.1
+++ b/contrib/mdocml/mandoc.1
@@ -1,4 +1,4 @@
-.\"	$Id: mandoc.1,v 1.85 2011/02/09 10:03:02 kristaps Exp $
+.\"	$Id: mandoc.1,v 1.88 2011/05/20 15:51:18 kristaps Exp $
 .\"
 .\" Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
 .\"
@@ -14,7 +14,7 @@
 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 .\"
-.Dd $Mdocdate: February 9 2011 $
+.Dd $Mdocdate: May 20 2011 $
 .Dt MANDOC 1
 .Os
 .Sh NAME
@@ -158,6 +158,15 @@ utility accepts the following
 .Fl T
 arguments, which correspond to output modes:
 .Bl -tag -width Ds
+.It Fl T Ns Cm utf8
+Encode output in the UTF-8 multi-byte format.
+See
+.Xr UTF-8 Output .
+.It Fl T Ns Cm locale
+Encode output using the current
+.Xr locale 1 .
+See
+.Sx Locale Output .
 .It Fl T Ns Cm ascii
 Produce 7-bit ASCII output.
 This is the default.
@@ -189,6 +198,23 @@ See
 .Pp
 If multiple input files are specified, these will be processed by the
 corresponding filter in-order.
+.Ss UTF-8 Output
+Use
+.Fl T Ns Cm utf8
+to force a UTF-8 locale.
+See
+.Sx Locale Output
+for details and options.
+.Ss Locale Output
+Locale-depending output encoding is triggered with
+.Fl T Ns Cm locale .
+This option is not available on all systems: systems without locale
+support, or those whose internal representation is not natively UCS-4,
+will fall back to
+.Fl T Ns Cm ascii .
+See
+.Sx ASCII Output
+for font style specification and available command-line arguments.
 .Ss ASCII Output
 Output produced by
 .Fl T Ns Cm ascii ,
@@ -209,6 +235,9 @@ Emboldened characters are rendered as
 The special characters documented in
 .Xr mandoc_char 7
 are rendered best-effort in an ASCII equivalent.
+If no equivalent is found,
+.Sq \&?
+is used instead.
 .Pp
 Output width is limited to 78 visible columns unless literal input lines
 exceed this limit.
@@ -460,6 +489,13 @@ Each input and output format is separately noted.
 .Ss ASCII Compatibility
 .Bl -bullet -compact
 .It
+Unrenderable unicode codepoints specified with
+.Sq \e[uNNNN]
+escapes are printed as
+.Sq \&?
+in mandoc.
+In GNU troff, these raise an error.
+.It
 The
 .Sq \&Bd \-literal
 and
@@ -470,7 +506,7 @@ in
 .Fl T Ns Cm ascii
 are synonyms, as are \-filled and \-ragged.
 .It
-In GNU troff, the
+In historic GNU troff, the
 .Sq \&Pa
 .Xr mdoc 7
 macro does not underline when scoped under an
@@ -495,8 +531,6 @@ macro in
 has no effect.
 .It
 Words aren't hyphenated.
-.It
-Sentences are unilaterally monospaced.
 .El
 .Ss HTML/XHTML Compatibility
 .Bl -bullet -compact
diff --git a/contrib/mdocml/mandoc.3 b/contrib/mdocml/mandoc.3
index 2fd887b8f0..300f2981bb 100644
--- a/contrib/mdocml/mandoc.3
+++ b/contrib/mdocml/mandoc.3
@@ -1,4 +1,4 @@
-.\"	$Id: mandoc.3,v 1.2 2011/03/28 21:49:42 kristaps Exp $
+.\"	$Id: mandoc.3,v 1.10 2011/05/24 21:41:11 kristaps Exp $
 .\"
 .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
 .\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
@@ -15,13 +15,20 @@
 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 .\"
-.Dd $Mdocdate: March 28 2011 $
+.Dd $Mdocdate: May 24 2011 $
 .Dt MANDOC 3
 .Os
 .Sh NAME
 .Nm mandoc ,
+.Nm mandoc_escape ,
 .Nm man_meta ,
 .Nm man_node ,
+.Nm mchars_alloc ,
+.Nm mchars_free ,
+.Nm mchars_num2char ,
+.Nm mchars_num2uc ,
+.Nm mchars_spec2cp ,
+.Nm mchars_spec2str ,
 .Nm mdoc_meta ,
 .Nm mdoc_node ,
 .Nm mparse_alloc ,
@@ -32,10 +39,18 @@
 .Nm mparse_strerror ,
 .Nm mparse_strlevel
 .Nd mandoc macro compiler library
+.Sh LIBRARY
+.Lb mandoc
 .Sh SYNOPSIS
 .In man.h
 .In mdoc.h
 .In mandoc.h
+.Ft "enum mandoc_esc"
+.Fo mandoc_escape
+.Fa "const char **in"
+.Fa "const char **seq"
+.Fa "int *len"
+.Fc
 .Ft "const struct man_meta *"
 .Fo man_meta
 .Fa "const struct man *man"
@@ -44,6 +59,28 @@
 .Fo man_node
 .Fa "const struct man *man"
 .Fc
+.Ft "struct mchars *"
+.Fn mchars_alloc
+.Ft void
+.Fn mchars_free "struct mchars *p"
+.Ft char
+.Fn mchars_num2char "const char *cp" "size_t sz"
+.Ft int
+.Fn mchars_num2uc "const char *cp" "size_t sz"
+.Ft "const char *"
+.Fo mchars_spec2str
+.Fa "struct mchars *p"
+.Fa "const char *cp"
+.Fa "size_t sz"
+.Fa "size_t *rsz"
+.Fc
+.Ft int
+.Fo mchars_spec2cp
+.Fa "struct mchars *p"
+.Fa "const char *cp"
+.Fa "size_t sz"
+.Ft "const char *"
+.Fc
 .Ft "const struct mdoc_meta *"
 .Fo mdoc_meta
 .Fa "const struct mdoc *mdoc"
@@ -90,6 +127,8 @@
 .Vt extern const char * const * man_macronames;
 .Vt extern const char * const * mdoc_argnames;
 .Vt extern const char * const * mdoc_macronames;
+.Fd "#define ASCII_NBRSP"
+.Fd "#define ASCII_HYPH"
 .Sh DESCRIPTION
 The
 .Nm mandoc
@@ -131,6 +170,151 @@ or invoke
 .Fn mparse_reset
 and parse new files.
 .El
+.Pp
+The
+.Nm
+library also contains routines for translating character strings into glyphs
+.Pq see Fn mchars_alloc
+and parsing escape sequences from strings
+.Pq see Fn mandoc_escape .
+.Pp
+This library is
+.Ud
+.Sh REFERENCE
+This section documents the functions, types, and variables available
+via
+.In mandoc.h .
+.Ss Types
+.Bl -ohang
+.It Vt "enum mandoc_esc"
+.It Vt "enum mandocerr"
+.It Vt "enum mandoclevel"
+.It Vt "struct mchars"
+An opaque pointer to an object allowing for translation between
+character strings and glyphs.
+See
+.Fn mchars_alloc .
+.It Vt "enum mparset"
+.It Vt "struct mparse"
+.It Vt "mandocmsg"
+.El
+.Ss Functions
+.Bl -ohang
+.It Fn mandoc_escape
+Scan an escape sequence, i.e., a character string beginning with
+.Sq \e .
+Pass a pointer to this string as
+.Va end ;
+it will be set to the supremum of the parsed escape sequence unless
+returning ESCAPE_ERROR, in which case the string is bogus and should be
+thrown away.
+If not ESCAPE_ERROR or ESCAPE_IGNORE,
+.Va start
+is set to the first relevant character of the substring (font, glyph,
+whatever) of length
+.Va sz .
+Both
+.Va start
+and
+.Va sz
+may be NULL.
+.It Fn man_meta
+Obtain the meta-data of a successful parse.
+This may only be used on a pointer returned by
+.Fn mparse_result .
+.It Fn man_node
+Obtain the root node of a successful parse.
+This may only be used on a pointer returned by
+.Fn mparse_result .
+.It Fn mchars_alloc
+Allocate an
+.Vt "struct mchars *"
+object for translating special characters into glyphs.
+See
+.Xr mandoc_char 7
+for an overview of special characters.
+The object must be freed with
+.Fn mchars_free .
+.It Fn mchars_free
+Free an object created with
+.Fn mchars_alloc .
+.It Fn mchars_num2char
+Convert a character index (e.g., the \eN\(aq\(aq escape) into a
+printable ASCII character.
+Returns \e0 (the nil character) if the input sequence is malformed.
+.It Fn mchars_num2uc
+Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into
+a Unicode codepoint.
+Returns \e0 (the nil character) if the input sequence is malformed.
+.It Fn mchars_spec2cp
+Convert a special character into a valid Unicode codepoint.
+Returns \-1 on failure or a non-zero Unicode codepoint on success.
+.It Fn mchars_spec2str
+Convert a special character into an ASCII string.
+Returns NULL on failure.
+.It Fn mdoc_meta
+Obtain the meta-data of a successful parse.
+This may only be used on a pointer returned by
+.Fn mparse_result .
+.It Fn mdoc_node
+Obtain the root node of a successful parse.
+This may only be used on a pointer returned by
+.Fn mparse_result .
+.It Fn mparse_alloc
+Allocate a parser.
+The same parser may be used for multiple files so long as
+.Fn mparse_reset
+is called between parses.
+.Fn mparse_free
+must be called to free the memory allocated by this function.
+.It Fn mparse_free
+Free all memory allocated by
+.Fn mparse_alloc .
+.It Fn mparse_readfd
+Parse a file or file descriptor.
+If
+.Va fd
+is -1,
+.Va fname
+is opened for reading.
+Otherwise,
+.Va fname
+is assumed to be the name associated with
+.Va fd .
+This may be called multiple times with different parameters; however,
+.Fn mparse_reset
+should be invoked between parses.
+.It Fn mparse_reset
+Reset a parser so that
+.Fn mparse_readfd
+may be used again.
+.It Fn mparse_result
+Obtain the result of a parse.
+Only successful parses
+.Po
+i.e., those where
+.Fn mparse_readfd
+returned less than MANDOCLEVEL_FATAL
+.Pc
+should invoke this function, in which case one of the two pointers will
+be filled in.
+.It Fn mparse_strerror
+Return a statically-allocated string representation of an error code.
+.It Fn mparse_strlevel
+Return a statically-allocated string representation of a level code.
+.El
+.Ss Variables
+.Bl -ohang
+.It Va man_macronames
+The string representation of a man macro as indexed by
+.Vt "enum mant" .
+.It Va mdoc_argnames
+The string representation of a mdoc macro argument as indexed by
+.Vt "enum mdocargt" .
+.It Va mdoc_macronames
+The string representation of a mdoc macro as indexed by
+.Vt "enum mdoct" .
+.El
 .Sh IMPLEMENTATION NOTES
 This section consists of structural documentation for
 .Xr mdoc 7
@@ -251,7 +435,7 @@ where a new body introduces a new phrase.
 .Pp
 The
 .Xr mdoc 7
-syntax tree accomodates for broken block structures as well.
+syntax tree accommodates for broken block structures as well.
 The ENDBODY node is available to end the formatting associated
 with a given block before the physical end of that block.
 It has a non-null
@@ -323,6 +507,7 @@ levels of badly-nested blocks.
 .Xr mandoc 1 ,
 .Xr eqn 7 ,
 .Xr man 7 ,
+.Xr mandoc_char 7 ,
 .Xr mdoc 7 ,
 .Xr roff 7 ,
 .Xr tbl 7
diff --git a/contrib/mdocml/mandoc.c b/contrib/mdocml/mandoc.c
index da4a16067c..465965a469 100644
--- a/contrib/mdocml/mandoc.c
+++ b/contrib/mdocml/mandoc.c
@@ -1,4 +1,4 @@
-/*	$Id: mandoc.c,v 1.44 2011/03/28 23:52:13 kristaps Exp $ */
+/*	$Id: mandoc.c,v 1.53 2011/05/24 21:31:23 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -23,6 +23,8 @@
 
 #include <assert.h>
 #include <ctype.h>
+#include <errno.h>
+#include <limits.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
@@ -35,199 +37,358 @@
 
 static	int	 a2time(time_t *, const char *, const char *);
 static	char	*time2a(time_t);
+static	int	 numescape(const char *);
 
-int
-mandoc_special(char *p)
+/*
+ * Pass over recursive numerical expressions.  This context of this
+ * function is important: it's only called within character-terminating
+ * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial
+ * recursion: we don't care about what's in these blocks. 
+ * This returns the number of characters skipped or -1 if an error
+ * occurs (the caller should bail).
+ */
+static int
+numescape(const char *start)
 {
-	int		 len, i;
-	char		 term;
-	char		*sv;
-	
-	len = 0;
-	term = '\0';
-	sv = p;
-
-	assert('\\' == *p);
-	p++;
-
-	switch (*p++) {
-#if 0
-	case ('Z'):
+	int		 i;
+	size_t		 sz;
+	const char	*cp;
+
+	i = 0;
+
+	/* The expression consists of a subexpression. */
+
+	if ('\\' == start[i]) {
+		cp = &start[++i];
+		/*
+		 * Read past the end of the subexpression.
+		 * Bail immediately on errors.
+		 */
+		if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
+			return(-1);
+		return(i + cp - &start[i]);
+	} 
+
+	if ('(' != start[i++])
+		return(0);
+
+	/*
+	 * A parenthesised subexpression.  Read until the closing
+	 * parenthesis, making sure to handle any nested subexpressions
+	 * that might ruin our parse.
+	 */
+
+	while (')' != start[i]) {
+		sz = strcspn(&start[i], ")\\");
+		i += (int)sz;
+
+		if ('\0' == start[i])
+			return(-1);
+		else if ('\\' != start[i])
+			continue;
+
+		cp = &start[++i];
+		if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
+			return(-1);
+		i += cp - &start[i];
+	}
+
+	/* Read past the terminating ')'. */
+	return(++i);
+}
+
+enum mandoc_esc
+mandoc_escape(const char **end, const char **start, int *sz)
+{
+	char		 c, term, numeric;
+	int		 i, lim, ssz, rlim;
+	const char	*cp, *rstart;
+	enum mandoc_esc	 gly; 
+
+	cp = *end;
+	rstart = cp;
+	if (start)
+		*start = rstart;
+	i = lim = 0;
+	gly = ESCAPE_ERROR;
+	term = numeric = '\0';
+
+	switch ((c = cp[i++])) {
+	/*
+	 * First the glyphs.  There are several different forms of
+	 * these, but each eventually returns a substring of the glyph
+	 * name.
+	 */
+	case ('('):
+		gly = ESCAPE_SPECIAL;
+		lim = 2;
+		break;
+	case ('['):
+		gly = ESCAPE_SPECIAL;
+		/*
+		 * Unicode escapes are defined in groff as \[uXXXX] to
+		 * \[u10FFFF], where the contained value must be a valid
+		 * Unicode codepoint.  Here, however, only check whether
+		 * it's not a zero-width escape.
+		 */
+		if ('u' == cp[i] && ']' != cp[i + 1])
+			gly = ESCAPE_UNICODE;
+		term = ']';
+		break;
+	case ('C'):
+		if ('\'' != cp[i])
+			return(ESCAPE_ERROR);
+		gly = ESCAPE_SPECIAL;
+		term = '\'';
+		break;
+
+	/*
+	 * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
+	 * 'X' is the trigger.  These have opaque sub-strings.
+	 */
+	case ('F'):
 		/* FALLTHROUGH */
-	case ('X'):
+	case ('g'):
 		/* FALLTHROUGH */
-	case ('x'):
+	case ('k'):
 		/* FALLTHROUGH */
-	case ('S'):
+	case ('M'):
 		/* FALLTHROUGH */
-	case ('R'):
+	case ('m'):
 		/* FALLTHROUGH */
-	case ('N'):
+	case ('n'):
 		/* FALLTHROUGH */
-	case ('l'):
+	case ('V'):
 		/* FALLTHROUGH */
-	case ('L'):
+	case ('Y'):
+		if (ESCAPE_ERROR == gly)
+			gly = ESCAPE_IGNORE;
 		/* FALLTHROUGH */
-	case ('H'):
+	case ('f'):
+		if (ESCAPE_ERROR == gly)
+			gly = ESCAPE_FONT;
+
+		rstart= &cp[i];
+		if (start) 
+			*start = rstart;
+
+		switch (cp[i++]) {
+		case ('('):
+			lim = 2;
+			break;
+		case ('['):
+			term = ']';
+			break;
+		default:
+			lim = 1;
+			i--;
+			break;
+		}
+		break;
+
+	/*
+	 * These escapes are of the form \X'Y', where 'X' is the trigger
+	 * and 'Y' is any string.  These have opaque sub-strings.
+	 */
+	case ('A'):
 		/* FALLTHROUGH */
-	case ('h'):
+	case ('b'):
 		/* FALLTHROUGH */
 	case ('D'):
 		/* FALLTHROUGH */
-	case ('C'):
-		/* FALLTHROUGH */
-	case ('b'):
+	case ('o'):
 		/* FALLTHROUGH */
-	case ('B'):
+	case ('R'):
 		/* FALLTHROUGH */
-	case ('a'):
+	case ('X'):
 		/* FALLTHROUGH */
-	case ('A'):
-		if (*p++ != '\'')
-			return(0);
+	case ('Z'):
+		if ('\'' != cp[i++])
+			return(ESCAPE_ERROR);
+		gly = ESCAPE_IGNORE;
 		term = '\'';
 		break;
-#endif
+
+	/*
+	 * These escapes are of the form \X'N', where 'X' is the trigger
+	 * and 'N' resolves to a numerical expression.
+	 */
+	case ('B'):
+		/* FALLTHROUGH */
 	case ('h'):
 		/* FALLTHROUGH */
+	case ('H'):
+		/* FALLTHROUGH */
+	case ('L'):
+		/* FALLTHROUGH */
+	case ('l'):
+		/* FALLTHROUGH */
+	case ('N'):
+		if (ESCAPE_ERROR == gly)
+			gly = ESCAPE_NUMBERED;
+		/* FALLTHROUGH */
+	case ('S'):
+		/* FALLTHROUGH */
 	case ('v'):
 		/* FALLTHROUGH */
+	case ('w'):
+		/* FALLTHROUGH */
+	case ('x'):
+		if (ESCAPE_ERROR == gly)
+			gly = ESCAPE_IGNORE;
+		if ('\'' != cp[i++])
+			return(ESCAPE_ERROR);
+		term = numeric = '\'';
+		break;
+
+	/* 
+	 * Sizes get a special category of their own.
+	 */
 	case ('s'):
-		if (ASCII_HYPH == *p)
-			*p = '-';
+		gly = ESCAPE_IGNORE;
 
-		i = 0;
-		if ('+' == *p || '-' == *p) {
-			p++;
-			i = 1;
-		}
+		rstart = &cp[i];
+		if (start) 
+			*start = rstart;
 
-		switch (*p++) {
+		/* See +/- counts as a sign. */
+		c = cp[i];
+		if ('+' == c || '-' == c || ASCII_HYPH == c)
+			++i;
+
+		switch (cp[i++]) {
 		case ('('):
-			len = 2;
+			lim = 2;
 			break;
 		case ('['):
-			term = ']';
+			term = numeric = ']';
 			break;
 		case ('\''):
-			term = '\'';
+			term = numeric = '\'';
 			break;
-		case ('0'):
-			i = 1;
-			/* FALLTHROUGH */
 		default:
-			len = 1;
-			p--;
+			lim = 1;
+			i--;
 			break;
 		}
 
-		if (ASCII_HYPH == *p)
-			*p = '-';
-		if ('+' == *p || '-' == *p) {
-			if (i)
-				return(0);
-			p++;
-		} 
-		
-		/* Handle embedded numerical subexp or escape. */
-
-		if ('(' == *p) {
-			while (*p && ')' != *p)
-				if ('\\' == *p++) {
-					i = mandoc_special(--p);
-					if (0 == i)
-						return(0);
-					p += i;
-				}
-
-			if (')' == *p++)
-				break;
+		/* See +/- counts as a sign. */
+		c = cp[i];
+		if ('+' == c || '-' == c || ASCII_HYPH == c)
+			++i;
 
-			return(0);
-		} else if ('\\' == *p) {
-			if (0 == (i = mandoc_special(p)))
-				return(0);
-			p += i;
-		}
+		break;
 
+	/*
+	 * Anything else is assumed to be a glyph.
+	 */
+	default:
+		gly = ESCAPE_SPECIAL;
+		lim = 1;
+		i--;
 		break;
-#if 0
-	case ('Y'):
-		/* FALLTHROUGH */
-	case ('V'):
-		/* FALLTHROUGH */
-	case ('$'):
-		/* FALLTHROUGH */
-	case ('n'):
-		/* FALLTHROUGH */
-#endif
-	case ('k'):
-		/* FALLTHROUGH */
-	case ('M'):
-		/* FALLTHROUGH */
-	case ('m'):
-		/* FALLTHROUGH */
-	case ('f'):
-		/* FALLTHROUGH */
-	case ('F'):
-		/* FALLTHROUGH */
-	case ('*'):
-		switch (*p++) {
-		case ('('):
-			len = 2;
+	}
+
+	assert(ESCAPE_ERROR != gly);
+
+	rstart = &cp[i];
+	if (start)
+		*start = rstart;
+
+	/*
+	 * If a terminating block has been specified, we need to
+	 * handle the case of recursion, which could have their
+	 * own terminating blocks that mess up our parse.  This, by the
+	 * way, means that the "start" and "size" values will be
+	 * effectively meaningless.
+	 */
+
+	ssz = 0;
+	if (numeric && -1 == (ssz = numescape(&cp[i])))
+		return(ESCAPE_ERROR);
+
+	i += ssz;
+	rlim = -1;
+
+	/*
+	 * We have a character terminator.  Try to read up to that
+	 * character.  If we can't (i.e., we hit the nil), then return
+	 * an error; if we can, calculate our length, read past the
+	 * terminating character, and exit.
+	 */
+
+	if ('\0' != term) {
+		*end = strchr(&cp[i], term);
+		if ('\0' == *end)
+			return(ESCAPE_ERROR);
+
+		rlim = *end - &cp[i];
+		if (sz)
+			*sz = rlim;
+		(*end)++;
+		goto out;
+	}
+
+	assert(lim > 0);
+
+	/*
+	 * We have a numeric limit.  If the string is shorter than that,
+	 * stop and return an error.  Else adjust our endpoint, length,
+	 * and return the current glyph.
+	 */
+
+	if ((size_t)lim > strlen(&cp[i]))
+		return(ESCAPE_ERROR);
+
+	rlim = lim;
+	if (sz)
+		*sz = rlim;
+
+	*end = &cp[i] + lim;
+
+out:
+	assert(rlim >= 0 && rstart);
+
+	/* Run post-processors. */
+
+	switch (gly) {
+	case (ESCAPE_FONT):
+		if (1 != rlim)
 			break;
-		case ('['):
-			term = ']';
+		switch (*rstart) {
+		case ('3'):
+			/* FALLTHROUGH */
+		case ('B'):
+			gly = ESCAPE_FONTBOLD;
 			break;
-		default:
-			len = 1;
-			p--;
+		case ('2'):
+			/* FALLTHROUGH */
+		case ('I'):
+			gly = ESCAPE_FONTITALIC;
+			break;
+		case ('P'):
+			gly = ESCAPE_FONTPREV;
+			break;
+		case ('1'):
+			/* FALLTHROUGH */
+		case ('R'):
+			gly = ESCAPE_FONTROMAN;
 			break;
 		}
 		break;
-	case ('('):
-		len = 2;
-		break;
-	case ('['):
-		term = ']';
-		break;
-	case ('z'):
-		len = 1;
-		if ('\\' == *p) {
-			if (0 == (i = mandoc_special(p)))
-				return(0);
-			p += i;
-			return(*p ? (int)(p - sv) : 0);
-		}
-		break;
-	case ('o'):
-		/* FALLTHROUGH */
-	case ('w'):
-		if ('\'' == *p++) {
-			term = '\'';
+	case (ESCAPE_SPECIAL):
+		if (1 != rlim)
 			break;
-		}
-		/* FALLTHROUGH */
+		if ('c' == *rstart)
+			gly = ESCAPE_NOSPACE;
+		break;
 	default:
-		len = 1;
-		p--;
 		break;
 	}
 
-	if (term) {
-		for ( ; *p && term != *p; p++)
-			if (ASCII_HYPH == *p)
-				*p = '-';
-		return(*p ? (int)(p - sv) : 0);
-	}
-
-	for (i = 0; *p && i < len; i++, p++)
-		if (ASCII_HYPH == *p)
-			*p = '-';
-	return(i == len ? (int)(p - sv) : 0);
+	return(gly);
 }
 
-
 void *
 mandoc_calloc(size_t num, size_t size)
 {
@@ -303,11 +464,11 @@ mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
 
 	/* Quoting can only start with a new word. */
 	start = *cpp;
+	quoted = 0;
 	if ('"' == *start) {
 		quoted = 1;
 		start++;
-	} else
-		quoted = 0;
+	} 
 
 	pairs = 0;
 	white = 0;
@@ -448,7 +609,7 @@ mandoc_eos(const char *p, size_t sz, int enclosed)
 	/*
 	 * End-of-sentence recognition must include situations where
 	 * some symbols, such as `)', allow prior EOS punctuation to
-	 * propogate outward.
+	 * propagate outward.
 	 */
 
 	found = 0;
@@ -531,3 +692,35 @@ mandoc_getcontrol(const char *cp, int *ppos)
 	*ppos = pos;
 	return(1);
 }
+
+/*
+ * Convert a string to a long that may not be <0.
+ * If the string is invalid, or is less than 0, return -1.
+ */
+int
+mandoc_strntou(const char *p, size_t sz, int base)
+{
+	char		 buf[32];
+	char		*ep;
+	long		 v;
+
+	if (sz > 31)
+		return(-1);
+
+	memcpy(buf, p, sz);
+	buf[(int)sz] = '\0';
+
+	errno = 0;
+	v = strtol(buf, &ep, base);
+
+	if (buf[0] == '\0' || *ep != '\0')
+		return(-1);
+
+	if ((errno == ERANGE && 
+			(v == LONG_MAX || v == LONG_MIN)) ||
+			(v > INT_MAX || v < 0))
+		return(-1);
+
+	return((int)v);
+}
+
diff --git a/contrib/mdocml/mandoc.h b/contrib/mdocml/mandoc.h
index 185c10bf47..20ab87a7b9 100644
--- a/contrib/mdocml/mandoc.h
+++ b/contrib/mdocml/mandoc.h
@@ -1,4 +1,4 @@
-/*	$Id: mandoc.h,v 1.69 2011/03/28 21:49:42 kristaps Exp $ */
+/*	$Id: mandoc.h,v 1.77 2011/05/24 21:31:23 kristaps Exp $ */
 /*
  * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -288,10 +288,25 @@ enum	mparset {
 	MPARSE_MAN /* assume -man */
 };
 
+enum	mandoc_esc {
+	ESCAPE_ERROR = 0, /* bail! unparsable escape */
+	ESCAPE_IGNORE, /* escape to be ignored */
+	ESCAPE_SPECIAL, /* a regular special character */
+	ESCAPE_FONT, /* a generic font mode */
+	ESCAPE_FONTBOLD, /* bold font mode */
+	ESCAPE_FONTITALIC, /* italic font mode */
+	ESCAPE_FONTROMAN, /* roman font mode */
+	ESCAPE_FONTPREV, /* previous font mode */
+	ESCAPE_NUMBERED, /* a numbered glyph */
+	ESCAPE_UNICODE, /* a unicode codepoint */
+	ESCAPE_NOSPACE /* suppress space if the last on a line */
+};
+
 typedef	void	(*mandocmsg)(enum mandocerr, enum mandoclevel,
 			const char *, int, int, const char *);
 
 struct	mparse;
+struct	mchars;
 struct	mdoc;
 struct	man;
 
@@ -310,6 +325,16 @@ void		 *mandoc_calloc(size_t, size_t);
 void		 *mandoc_malloc(size_t);
 void		 *mandoc_realloc(void *, size_t);
 
+enum mandoc_esc	  mandoc_escape(const char **, const char **, int *);
+
+struct mchars	 *mchars_alloc(void);
+char	 	  mchars_num2char(const char *, size_t);
+int		  mchars_num2uc(const char *, size_t);
+const char	 *mchars_spec2str(struct mchars *, const char *, size_t, size_t *);
+int		  mchars_spec2cp(struct mchars *, const char *, size_t);
+void		  mchars_free(struct mchars *);
+
+
 __END_DECLS
 
 #endif /*!MANDOC_H*/
diff --git a/contrib/mdocml/mandoc_char.7 b/contrib/mdocml/mandoc_char.7
index ec478e09d5..d0c5dd7f80 100644
--- a/contrib/mdocml/mandoc_char.7
+++ b/contrib/mdocml/mandoc_char.7
@@ -1,4 +1,4 @@
-.\"	$Id: mandoc_char.7,v 1.42 2011/02/09 22:53:20 schwarze Exp $
+.\"	$Id: mandoc_char.7,v 1.45 2011/05/15 15:30:33 kristaps Exp $
 .\"
 .\" Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
 .\"
@@ -14,7 +14,7 @@
 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 .\"
-.Dd $Mdocdate: February 9 2011 $
+.Dd $Mdocdate: May 15 2011 $
 .Dt MANDOC_CHAR 7
 .Os
 .Sh NAME
@@ -481,8 +481,13 @@ Greek letters:
 .It \e(ts    Ta \(ts        Ta sigma terminal
 .El
 .Sh PREDEFINED STRINGS
-These are not recommended for use, as they differ across
-implementations:
+Predefined strings are inherited from the macro packages of historical
+troff implementations.
+They are
+.Em not recommended
+for use, as they differ across implementations.
+Manuals using these predefined strings are almost certainly not
+portable.
 .Pp
 .Bl -column -compact -offset indent "Input" "Rendered" "Description"
 .It Em Input Ta Em Rendered Ta Em Description
@@ -512,7 +517,23 @@ implementations:
 .It \e*(>=   Ta \*(>=       Ta greater-than-equal
 .It \e*(aa   Ta \*(aa       Ta acute
 .It \e*(ga   Ta \*(ga       Ta grave
+.It \e*(Px   Ta \*(Px       Ta POSIX standard name
+.It \e*(Ai   Ta \*(Ai       Ta ANSI standard name
 .El
+.Sh UNICODE CHARACTERS
+The escape sequence
+.Pp
+.Dl \e[uXXXX]
+.Pp
+is interpreted as a Unicode codepoint.
+The codepoint must be in the range above U+0080 and less than U+10FFFF.
+For compatibility, points must be zero-padded to four characters; if
+greater than four characters, no zero padding is allowed.
+Unicode surrogates are not allowed.
+.\" .Pp
+.\" Unicode glyphs attenuate to the
+.\" .Sq \&?
+.\" character if invalid or not rendered by current output media.
 .Sh NUMBERED CHARACTERS
 For backward compatibility with existing manuals,
 .Xr mandoc 1
@@ -535,6 +556,9 @@ troff implementations, at this time limited to GNU troff
 .Pp
 .Bl -dash -compact
 .It
+The \eN\(aq\(aq escape sequence is limited to printable characters; in
+groff, it accepts arbitrary character numbers.
+.It
 In
 .Fl T Ns Cm ascii ,
 the
diff --git a/contrib/mdocml/mdoc.7 b/contrib/mdocml/mdoc.7
index 3a68ca5498..5bd8aa109f 100644
--- a/contrib/mdocml/mdoc.7
+++ b/contrib/mdocml/mdoc.7
@@ -1,4 +1,4 @@
-.\"	$Id: mdoc.7,v 1.184 2011/04/01 19:50:49 kristaps Exp $
+.\"	$Id: mdoc.7,v 1.188 2011/05/26 09:26:16 kristaps Exp $
 .\"
 .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
 .\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
@@ -15,7 +15,7 @@
 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 .\"
-.Dd $Mdocdate: April 1 2011 $
+.Dd $Mdocdate: May 26 2011 $
 .Dt MDOC 7
 .Os
 .Sh NAME
@@ -40,25 +40,25 @@ An
 .Nm
 document follows simple rules: lines beginning with the control
 character
-.Sq \.
+.Sq \&.
 are parsed for macros.
-Other lines are interpreted within the scope of
-prior macros:
+Text lines, those not beginning with the control character, are
+interpreted within the scope of prior macros:
 .Bd -literal -offset indent
 \&.Sh Macro lines change control state.
-Other lines are interpreted within the current state.
+Text lines are interpreted within the current state.
 .Ed
 .Sh LANGUAGE SYNTAX
 .Nm
 documents may contain only graphable 7-bit ASCII characters, the space
 character, and, in certain circumstances, the tab character.
 .Pp
-If the first character of a line is a space, that line is printed
+If the first character of a text line is a space, that line is printed
 with a leading newline.
 .Ss Comments
 Text following a
 .Sq \e\*q ,
-whether in a macro or free-form text line, is ignored to the end of
+whether in a macro or text line, is ignored to the end of
 line.
 A macro line with only a control character and comment escape,
 .Sq \&.\e\*q ,
@@ -97,13 +97,12 @@ Within a macro line, the following terms are reserved:
 .Pq reserved-word vertical bar
 .El
 .Pp
-Use of reserved terms is described in
-.Sx MACRO SYNTAX .
 For general use in macro lines, these can be escaped with a non-breaking
 space
 .Pq Sq \e& .
+In text lines, these may be used as normal punctuation.
 .Ss Special Characters
-Special characters may occur in both macro and free-form lines.
+Special characters may occur in both macro and text lines.
 Sequences begin with the escape character
 .Sq \e
 followed by either an open-parenthesis
@@ -132,18 +131,15 @@ escape followed by an indicator: B (bold), I (italic), R (Roman), or P
 .Pp
 A numerical representation 3, 2, or 1 (bold, italic, and Roman,
 respectively) may be used instead.
-A text decoration is valid within
-the current font scope only: if a macro opens a font scope alongside
-its own scope, such as
-.Sx \&Bf
-.Cm \&Sy ,
-in-scope invocations of
-.Sq \ef
-are only valid within the font scope of the macro.
-If
+If a macro opens a font scope after calling
+.Sq \ef ,
+such as with
+.Sx \&Bf ,
+the
 .Sq \ef
-is specified outside of any font scope, such as in unenclosed, free-form
-text, it will affect the remainder of the document.
+mode will be restored upon exiting the
+.Sx \&Bf
+scope.
 .Pp
 Note this form is
 .Em not
@@ -177,9 +173,9 @@ and
 .Pq vertical bar .
 .Ss Whitespace
 Whitespace consists of the space character.
-In free-form lines, whitespace is preserved within a line; unescaped
+In text lines, whitespace is preserved within a line; unescaped
 trailing spaces are stripped from input (unless in a literal context).
-Blank free-form lines, which may include whitespace, are only permitted
+Blank text lines, which may include whitespace, are only permitted
 within literal contexts.
 .Pp
 In macro lines, whitespace delimits arguments and is discarded.
@@ -199,7 +195,7 @@ Thus, the following produces
 \&.Op "Fl a"
 .Ed
 .Pp
-In free-form mode, quotes are regarded as opaque text.
+In text lines, quotes are regarded as opaque text.
 .Ss Scaling Widths
 Many macros support scaled widths for their arguments, such as
 stipulating a two-inch list indentation with the following:
@@ -270,8 +266,8 @@ The proper spacing is also intelligently preserved if a sentence ends at
 the boundary of a macro line.
 For example:
 .Pp
-.Dl \&Xr mandoc 1 \.
-.Dl \&Fl T \&Ns \&Cm ascii \.
+.Dl \&.Xr mandoc 1 \&.
+.Dl \&.Fl T \&Ns \&Cm ascii \&.
 .Sh MANUAL STRUCTURE
 A well-formed
 .Nm
@@ -300,19 +296,20 @@ sections, although this varies between manual sections.
 .Pp
 The following is a well-formed skeleton
 .Nm
-file:
+file for a utility
+.Qq progname :
 .Bd -literal -offset indent
 \&.Dd $\&Mdocdate$
-\&.Dt mdoc 7
+\&.Dt PROGNAME section
 \&.Os
 \&.Sh NAME
-\&.Nm foo
+\&.Nm progname
 \&.Nd a description goes here
 \&.\e\*q .Sh LIBRARY
 \&.\e\*q For sections 2, 3, & 9 only.
 \&.\e\*q Not used in OpenBSD.
 \&.Sh SYNOPSIS
-\&.Nm foo
+\&.Nm progname
 \&.Op Fl options
 \&.Ar
 \&.Sh DESCRIPTION
@@ -359,6 +356,10 @@ The syntax for this as follows:
 \&.Nd a one line description
 .Ed
 .Pp
+Multiple
+.Sq \&Nm
+names should be separated by commas.
+.Pp
 The
 .Sx \&Nm
 macro(s) must precede the
@@ -386,16 +387,18 @@ configuration.
 For the first, utilities (sections 1, 6, and 8), this is
 generally structured as follows:
 .Bd -literal -offset indent
-\&.Nm foo
+\&.Nm bar
 \&.Op Fl v
 \&.Op Fl o Ar file
 \&.Op Ar
-\&.Nm bar
+\&.Nm foo
 \&.Op Fl v
 \&.Op Fl o Ar file
 \&.Op Ar
 .Ed
 .Pp
+Commands should be ordered alphabetically.
+.Pp
 For the second, function calls (sections 2, 3, 9):
 .Bd -literal -offset indent
 \&.In header.h
@@ -406,6 +409,14 @@ For the second, function calls (sections 2, 3, 9):
 \&.Fn bar "const char *src"
 .Ed
 .Pp
+Ordering of
+.Sx \&In ,
+.Sx \&Vt ,
+.Sx \&Fn ,
+and
+.Sx \&Fo
+macros should follow C header-file conventions.
+.Pp
 And for the third, configurations (section 4):
 .Bd -literal -offset indent
 \&.Cd \*qit* at isa? port 0x2e\*q
@@ -454,9 +465,15 @@ or
 .Sx \&Ss
 macro or the end of an enclosing block, whichever comes first.
 .It Em DESCRIPTION
-This expands upon the brief, one line description in
-.Em NAME .
-It usually contains a breakdown of the options (if documenting a
+This begins with an expansion of the brief, one line description in
+.Em NAME :
+.Bd -literal -offset indent
+The
+\&.Nm
+utility does this, that, and the other.
+.Ed
+.Pp
+It usually follows with a breakdown of the options (if documenting a
 command), such as:
 .Bd -literal -offset indent
 The arguments are as follows:
@@ -604,7 +621,10 @@ column, if applicable, describes closure rules.
 Multi-line scope closed by an explicit closing macro.
 All macros contains bodies; only
 .Sx \&Bf
-contains a head.
+and
+.Pq optionally
+.Sx \&Bl
+contain a head.
 .Bd -literal -offset indent
 \&.Yo \(lB\-arg \(lBparm...\(rB\(rB \(lBhead...\(rB
 \(lBbody...\(rB
@@ -1040,7 +1060,7 @@ Its syntax is as follows:
 .Pp
 Display blocks are used to select a different indentation and
 justification than the one used by the surrounding text.
-They may contain both macro lines and free-form text lines.
+They may contain both macro lines and text lines.
 By default, a display block is preceded by a vertical space.
 .Pp
 The
@@ -1155,9 +1175,10 @@ See also
 and
 .Sx \&Sy .
 .Ss \&Bk
-Keep the output generated from each macro input line together
-on one single output line.
-Line breaks in free-form text lines are unaffected.
+For each macro, keep its output together on the same output line,
+until the end of the macro or the end of the input line is reached,
+whichever comes first.
+Line breaks in text lines are unaffected.
 The syntax is as follows:
 .Pp
 .D1 Pf \. Sx \&Bk Fl words
@@ -1851,9 +1872,9 @@ A function name.
 Its syntax is as follows:
 .Bd -ragged -offset indent
 .Pf \. Ns Sx \&Fn
-.Op Cm functype
-.Cm funcname
-.Op Oo Cm argtype Oc Cm argname
+.Op Ar functype
+.Ar funcname
+.Op Oo Ar argtype Oc Ar argname
 .Ed
 .Pp
 Function arguments are surrounded in parenthesis and
@@ -1882,15 +1903,15 @@ This is a multi-line version of
 .Sx \&Fn .
 Its syntax is as follows:
 .Pp
-.D1 Pf \. Sx \&Fo Cm funcname
+.D1 Pf \. Sx \&Fo Ar funcname
 .Pp
 Invocations usually occur in the following context:
 .Bd -ragged -offset indent
-.Pf \. Sx \&Ft Cm functype
+.Pf \. Sx \&Ft Ar functype
 .br
-.Pf \. Sx \&Fo Cm funcname
+.Pf \. Sx \&Fo Ar funcname
 .br
-.Pf \. Sx \&Fa Oo Cm argtype Oc Cm argname
+.Pf \. Sx \&Fa Oo Ar argtype Oc Ar argname
 .br
 \&.\.\.
 .br
@@ -1911,7 +1932,7 @@ and
 A function type.
 Its syntax is as follows:
 .Pp
-.D1 Pf \. Sx \&Ft Cm functype
+.D1 Pf \. Sx \&Ft Ar functype
 .Pp
 Examples:
 .Dl \&.Ft int
@@ -1992,7 +2013,7 @@ and
 .Fl diag
 have the following syntax:
 .Pp
-.D1 Pf \. Sx \&It Cm args
+.D1 Pf \. Sx \&It Ar args
 .Pp
 Lists of type
 .Fl bullet ,
@@ -2065,14 +2086,14 @@ See also
 Specify a library.
 The syntax is as follows:
 .Pp
-.D1 Pf \. Sx \&Lb Cm library
+.D1 Pf \. Sx \&Lb Ar library
 .Pp
 The
-.Cm library
+.Ar library
 parameter may be a system library, such as
-.Cm libz
+.Ar libz
 or
-.Cm libpam ,
+.Ar libpam ,
 in which case a small library description is printed next to the linker
 invocation; or a custom library, in which case the library name is
 printed in quotes.
@@ -2098,7 +2119,7 @@ and
 Format a hyperlink.
 Its syntax is as follows:
 .Pp
-.D1 Pf \. Sx \&Lk Cm uri Op Cm name
+.D1 Pf \. Sx \&Lk Ar uri Op Ar name
 .Pp
 Examples:
 .Dl \&.Lk http://bsd.lv \*qThe BSD.lv Project\*q
@@ -2113,7 +2134,7 @@ Synonym for
 Display a mathematical symbol.
 Its syntax is as follows:
 .Pp
-.D1 Pf \. Sx \&Ms Cm symbol
+.D1 Pf \. Sx \&Ms Ar symbol
 .Pp
 Examples:
 .Dl \&.Ms sigma
@@ -2124,7 +2145,7 @@ Format a
 hyperlink.
 Its syntax is as follows:
 .Pp
-.D1 Pf \. Sx \&Mt Cm address
+.D1 Pf \. Sx \&Mt Ar address
 .Pp
 Examples:
 .Dl \&.Mt discuss@manpages.bsd.lv
@@ -2262,10 +2283,10 @@ any
 file.
 Its syntax is as follows:
 .Pp
-.D1 Pf \. Sx \&Os Op Cm system Op Cm version
+.D1 Pf \. Sx \&Os Op Ar system Op Ar version
 .Pp
 The optional
-.Cm system
+.Ar system
 parameter specifies the relevant operating system or environment.
 Left unspecified, it defaults to the local operating system version.
 This is the suggested form.
@@ -2324,14 +2345,14 @@ Removes the space
 between its arguments.
 Its syntax is as follows:
 .Pp
-.D1 Pf \. \&Pf Cm prefix suffix
+.D1 Pf \. \&Pf Ar prefix suffix
 .Pp
 The
-.Cm suffix
+.Ar suffix
 argument may be a macro.
 .Pp
 Examples:
-.Dl \&.Pf \e. \&Sx \&Pf \&Cm prefix suffix
+.Dl \&.Pf \e. \&Sx \&Pf \&Ar prefix suffix
 .Ss \&Po
 Multi-line version of
 .Sx \&Pq .
@@ -2452,11 +2473,11 @@ Its syntax is as follows:
 .D1 Pf \. Sx \&Sm Cm on | off
 .Pp
 By default, spacing is
-.Cm on .
+.Ar on .
 When switched
-.Cm off ,
+.Ar off ,
 no white space is inserted between macro arguments and between the
-output generated from adjacent macros, but free-form text lines
+output generated from adjacent macros, but text lines
 still get normal spacing between words and sentences.
 .Ss \&So
 Multi-line version of
@@ -2679,15 +2700,15 @@ Link to another manual
 .Pq Qq cross-reference .
 Its syntax is as follows:
 .Pp
-.D1 Pf \. Sx \&Xr Cm name section
+.D1 Pf \. Sx \&Xr Ar name section
 .Pp
 The
-.Cm name
+.Ar name
 and
-.Cm section
+.Ar section
 are the name and section of the linked manual.
 If
-.Cm section
+.Ar section
 is followed by non-punctuation, an
 .Sx \&Ns
 is inserted into the token stream.
@@ -2712,10 +2733,10 @@ This macro should not be used; it is implemented for compatibility with
 historical manuals.
 Its syntax is as follows:
 .Pp
-.D1 Pf \. Sx \&sp Op Cm height
+.D1 Pf \. Sx \&sp Op Ar height
 .Pp
 The
-.Cm height
+.Ar height
 argument must be formatted as described in
 .Sx Scaling Widths .
 If unspecified,
diff --git a/contrib/mdocml/mdoc_argv.c b/contrib/mdocml/mdoc_argv.c
index c3fd74b0f3..38909f94b9 100644
--- a/contrib/mdocml/mdoc_argv.c
+++ b/contrib/mdocml/mdoc_argv.c
@@ -1,4 +1,4 @@
-/*	$Id: mdoc_argv.c,v 1.73 2011/03/23 15:46:02 kristaps Exp $ */
+/*	$Id: mdoc_argv.c,v 1.77 2011/05/12 23:44:01 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -32,12 +32,25 @@
 #include "libmandoc.h"
 
 #define	MULTI_STEP	 5 /* pre-allocate argument values */
+#define	DELIMSZ	  	 6 /* max possible size of a delimiter */
+
+enum	argsflag {
+	ARGSFL_NONE = 0,
+	ARGSFL_DELIM, /* handle delimiters of [[::delim::][ ]+]+ */
+	ARGSFL_TABSEP /* handle tab/`Ta' separated phrases */
+};
+
+enum	argvflag {
+	ARGV_NONE, /* no args to flag (e.g., -split) */
+	ARGV_SINGLE, /* one arg to flag (e.g., -file xxx)  */
+	ARGV_MULTI, /* multiple args (e.g., -column xxx yyy) */
+	ARGV_OPT_SINGLE /* optional arg (e.g., -offset [xxx]) */
+};
 
 static	enum mdocargt	 argv_a2arg(enum mdoct, const char *);
 static	enum margserr	 args(struct mdoc *, int, int *, 
-				char *, int, char **);
-static	int		 args_checkpunct(struct mdoc *,
-				const char *, int, int, int);
+				char *, enum argsflag, char **);
+static	int		 args_checkpunct(const char *, int);
 static	int		 argv(struct mdoc *, int, 
 				struct mdoc_argv *, int *, char *);
 static	int		 argv_single(struct mdoc *, int, 
@@ -48,13 +61,6 @@ static	int		 argv_multi(struct mdoc *, int,
 				struct mdoc_argv *, int *, char *);
 static	void		 argn_free(struct mdoc_arg *, int);
 
-enum	argvflag {
-	ARGV_NONE, /* no args to flag (e.g., -split) */
-	ARGV_SINGLE, /* one arg to flag (e.g., -file xxx)  */
-	ARGV_MULTI, /* multiple args (e.g., -column xxx yyy) */
-	ARGV_OPT_SINGLE /* optional arg (e.g., -offset [xxx]) */
-};
-
 static	const enum argvflag argvflags[MDOC_ARG_MAX] = {
 	ARGV_NONE,	/* MDOC_Split */
 	ARGV_NONE,	/* MDOC_Nosplit */
@@ -84,129 +90,129 @@ static	const enum argvflag argvflags[MDOC_ARG_MAX] = {
 	ARGV_NONE	/* MDOC_Symbolic */
 };
 
-static	const int argflags[MDOC_MAX] = {
-	0, /* Ap */
-	0, /* Dd */
-	0, /* Dt */
-	0, /* Os */
-	0, /* Sh */
-	0, /* Ss */ 
-	0, /* Pp */ 
-	ARGS_DELIM, /* D1 */
-	ARGS_DELIM, /* Dl */
-	0, /* Bd */
-	0, /* Ed */
-	0, /* Bl */
-	0, /* El */
-	0, /* It */
-	ARGS_DELIM, /* Ad */ 
-	ARGS_DELIM, /* An */
-	ARGS_DELIM, /* Ar */
-	0, /* Cd */
-	ARGS_DELIM, /* Cm */
-	ARGS_DELIM, /* Dv */ 
-	ARGS_DELIM, /* Er */ 
-	ARGS_DELIM, /* Ev */ 
-	0, /* Ex */
-	ARGS_DELIM, /* Fa */ 
-	0, /* Fd */ 
-	ARGS_DELIM, /* Fl */
-	ARGS_DELIM, /* Fn */ 
-	ARGS_DELIM, /* Ft */ 
-	ARGS_DELIM, /* Ic */ 
-	0, /* In */ 
-	ARGS_DELIM, /* Li */
-	0, /* Nd */ 
-	ARGS_DELIM, /* Nm */ 
-	ARGS_DELIM, /* Op */
-	0, /* Ot */
-	ARGS_DELIM, /* Pa */
-	0, /* Rv */
-	ARGS_DELIM, /* St */ 
-	ARGS_DELIM, /* Va */
-	ARGS_DELIM, /* Vt */ 
-	ARGS_DELIM, /* Xr */
-	0, /* %A */
-	0, /* %B */
-	0, /* %D */
-	0, /* %I */
-	0, /* %J */
-	0, /* %N */
-	0, /* %O */
-	0, /* %P */
-	0, /* %R */
-	0, /* %T */
-	0, /* %V */
-	ARGS_DELIM, /* Ac */
-	0, /* Ao */
-	ARGS_DELIM, /* Aq */
-	ARGS_DELIM, /* At */
-	ARGS_DELIM, /* Bc */
-	0, /* Bf */ 
-	0, /* Bo */
-	ARGS_DELIM, /* Bq */
-	ARGS_DELIM, /* Bsx */
-	ARGS_DELIM, /* Bx */
-	0, /* Db */
-	ARGS_DELIM, /* Dc */
-	0, /* Do */
-	ARGS_DELIM, /* Dq */
-	ARGS_DELIM, /* Ec */
-	0, /* Ef */
-	ARGS_DELIM, /* Em */ 
-	0, /* Eo */
-	ARGS_DELIM, /* Fx */
-	ARGS_DELIM, /* Ms */
-	ARGS_DELIM, /* No */
-	ARGS_DELIM, /* Ns */
-	ARGS_DELIM, /* Nx */
-	ARGS_DELIM, /* Ox */
-	ARGS_DELIM, /* Pc */
-	ARGS_DELIM, /* Pf */
-	0, /* Po */
-	ARGS_DELIM, /* Pq */
-	ARGS_DELIM, /* Qc */
-	ARGS_DELIM, /* Ql */
-	0, /* Qo */
-	ARGS_DELIM, /* Qq */
-	0, /* Re */
-	0, /* Rs */
-	ARGS_DELIM, /* Sc */
-	0, /* So */
-	ARGS_DELIM, /* Sq */
-	0, /* Sm */
-	ARGS_DELIM, /* Sx */
-	ARGS_DELIM, /* Sy */
-	ARGS_DELIM, /* Tn */
-	ARGS_DELIM, /* Ux */
-	ARGS_DELIM, /* Xc */
-	0, /* Xo */
-	0, /* Fo */ 
-	0, /* Fc */ 
-	0, /* Oo */
-	ARGS_DELIM, /* Oc */
-	0, /* Bk */
-	0, /* Ek */
-	0, /* Bt */
-	0, /* Hf */
-	0, /* Fr */
-	0, /* Ud */
-	0, /* Lb */
-	0, /* Lp */
-	ARGS_DELIM, /* Lk */
-	ARGS_DELIM, /* Mt */
-	ARGS_DELIM, /* Brq */
-	0, /* Bro */
-	ARGS_DELIM, /* Brc */
-	0, /* %C */
-	0, /* Es */
-	0, /* En */
-	0, /* Dx */
-	0, /* %Q */
-	0, /* br */
-	0, /* sp */
-	0, /* %U */
-	0, /* Ta */
+static	const enum argsflag argflags[MDOC_MAX] = {
+	ARGSFL_NONE, /* Ap */
+	ARGSFL_NONE, /* Dd */
+	ARGSFL_NONE, /* Dt */
+	ARGSFL_NONE, /* Os */
+	ARGSFL_NONE, /* Sh */
+	ARGSFL_NONE, /* Ss */ 
+	ARGSFL_NONE, /* Pp */ 
+	ARGSFL_DELIM, /* D1 */
+	ARGSFL_DELIM, /* Dl */
+	ARGSFL_NONE, /* Bd */
+	ARGSFL_NONE, /* Ed */
+	ARGSFL_NONE, /* Bl */
+	ARGSFL_NONE, /* El */
+	ARGSFL_NONE, /* It */
+	ARGSFL_DELIM, /* Ad */ 
+	ARGSFL_DELIM, /* An */
+	ARGSFL_DELIM, /* Ar */
+	ARGSFL_NONE, /* Cd */
+	ARGSFL_DELIM, /* Cm */
+	ARGSFL_DELIM, /* Dv */ 
+	ARGSFL_DELIM, /* Er */ 
+	ARGSFL_DELIM, /* Ev */ 
+	ARGSFL_NONE, /* Ex */
+	ARGSFL_DELIM, /* Fa */ 
+	ARGSFL_NONE, /* Fd */ 
+	ARGSFL_DELIM, /* Fl */
+	ARGSFL_DELIM, /* Fn */ 
+	ARGSFL_DELIM, /* Ft */ 
+	ARGSFL_DELIM, /* Ic */ 
+	ARGSFL_NONE, /* In */ 
+	ARGSFL_DELIM, /* Li */
+	ARGSFL_NONE, /* Nd */ 
+	ARGSFL_DELIM, /* Nm */ 
+	ARGSFL_DELIM, /* Op */
+	ARGSFL_NONE, /* Ot */
+	ARGSFL_DELIM, /* Pa */
+	ARGSFL_NONE, /* Rv */
+	ARGSFL_DELIM, /* St */ 
+	ARGSFL_DELIM, /* Va */
+	ARGSFL_DELIM, /* Vt */ 
+	ARGSFL_DELIM, /* Xr */
+	ARGSFL_NONE, /* %A */
+	ARGSFL_NONE, /* %B */
+	ARGSFL_NONE, /* %D */
+	ARGSFL_NONE, /* %I */
+	ARGSFL_NONE, /* %J */
+	ARGSFL_NONE, /* %N */
+	ARGSFL_NONE, /* %O */
+	ARGSFL_NONE, /* %P */
+	ARGSFL_NONE, /* %R */
+	ARGSFL_NONE, /* %T */
+	ARGSFL_NONE, /* %V */
+	ARGSFL_DELIM, /* Ac */
+	ARGSFL_NONE, /* Ao */
+	ARGSFL_DELIM, /* Aq */
+	ARGSFL_DELIM, /* At */
+	ARGSFL_DELIM, /* Bc */
+	ARGSFL_NONE, /* Bf */ 
+	ARGSFL_NONE, /* Bo */
+	ARGSFL_DELIM, /* Bq */
+	ARGSFL_DELIM, /* Bsx */
+	ARGSFL_DELIM, /* Bx */
+	ARGSFL_NONE, /* Db */
+	ARGSFL_DELIM, /* Dc */
+	ARGSFL_NONE, /* Do */
+	ARGSFL_DELIM, /* Dq */
+	ARGSFL_DELIM, /* Ec */
+	ARGSFL_NONE, /* Ef */
+	ARGSFL_DELIM, /* Em */ 
+	ARGSFL_NONE, /* Eo */
+	ARGSFL_DELIM, /* Fx */
+	ARGSFL_DELIM, /* Ms */
+	ARGSFL_DELIM, /* No */
+	ARGSFL_DELIM, /* Ns */
+	ARGSFL_DELIM, /* Nx */
+	ARGSFL_DELIM, /* Ox */
+	ARGSFL_DELIM, /* Pc */
+	ARGSFL_DELIM, /* Pf */
+	ARGSFL_NONE, /* Po */
+	ARGSFL_DELIM, /* Pq */
+	ARGSFL_DELIM, /* Qc */
+	ARGSFL_DELIM, /* Ql */
+	ARGSFL_NONE, /* Qo */
+	ARGSFL_DELIM, /* Qq */
+	ARGSFL_NONE, /* Re */
+	ARGSFL_NONE, /* Rs */
+	ARGSFL_DELIM, /* Sc */
+	ARGSFL_NONE, /* So */
+	ARGSFL_DELIM, /* Sq */
+	ARGSFL_NONE, /* Sm */
+	ARGSFL_DELIM, /* Sx */
+	ARGSFL_DELIM, /* Sy */
+	ARGSFL_DELIM, /* Tn */
+	ARGSFL_DELIM, /* Ux */
+	ARGSFL_DELIM, /* Xc */
+	ARGSFL_NONE, /* Xo */
+	ARGSFL_NONE, /* Fo */ 
+	ARGSFL_NONE, /* Fc */ 
+	ARGSFL_NONE, /* Oo */
+	ARGSFL_DELIM, /* Oc */
+	ARGSFL_NONE, /* Bk */
+	ARGSFL_NONE, /* Ek */
+	ARGSFL_NONE, /* Bt */
+	ARGSFL_NONE, /* Hf */
+	ARGSFL_NONE, /* Fr */
+	ARGSFL_NONE, /* Ud */
+	ARGSFL_NONE, /* Lb */
+	ARGSFL_NONE, /* Lp */
+	ARGSFL_DELIM, /* Lk */
+	ARGSFL_DELIM, /* Mt */
+	ARGSFL_DELIM, /* Brq */
+	ARGSFL_NONE, /* Bro */
+	ARGSFL_DELIM, /* Brc */
+	ARGSFL_NONE, /* %C */
+	ARGSFL_NONE, /* Es */
+	ARGSFL_NONE, /* En */
+	ARGSFL_NONE, /* Dx */
+	ARGSFL_NONE, /* %Q */
+	ARGSFL_NONE, /* br */
+	ARGSFL_NONE, /* sp */
+	ARGSFL_NONE, /* %U */
+	ARGSFL_NONE, /* Ta */
 };
 
 static	const enum mdocargt args_Ex[] = {
@@ -376,18 +382,17 @@ argn_free(struct mdoc_arg *p, int iarg)
 }
 
 enum margserr
-mdoc_zargs(struct mdoc *m, int line, int *pos, 
-		char *buf, int flags, char **v)
+mdoc_zargs(struct mdoc *m, int line, int *pos, char *buf, char **v)
 {
 
-	return(args(m, line, pos, buf, flags, v));
+	return(args(m, line, pos, buf, ARGSFL_NONE, v));
 }
 
 enum margserr
 mdoc_args(struct mdoc *m, int line, int *pos, 
 		char *buf, enum mdoct tok, char **v)
 {
-	int		  fl;
+	enum argsflag	  fl;
 	struct mdoc_node *n;
 
 	fl = argflags[tok];
@@ -404,38 +409,21 @@ mdoc_args(struct mdoc *m, int line, int *pos,
 
 	for (n = m->last; n; n = n->parent)
 		if (MDOC_Bl == n->tok)
-			break;
-
-	if (n && LIST_column == n->norm->Bl.type) {
-		fl |= ARGS_TABSEP;
-		fl &= ~ARGS_DELIM;
-	}
+			if (LIST_column == n->norm->Bl.type) {
+				fl = ARGSFL_TABSEP;
+				break;
+			}
 
 	return(args(m, line, pos, buf, fl, v));
 }
 
 static enum margserr
 args(struct mdoc *m, int line, int *pos, 
-		char *buf, int fl, char **v)
+		char *buf, enum argsflag fl, char **v)
 {
 	char		*p, *pp;
 	enum margserr	 rc;
 
-	/*
-	 * Parse out the terms (like `val' in `.Xx -arg val' or simply
-	 * `.Xx val'), which can have all sorts of properties:
-	 *
-	 *   ARGS_DELIM: use special handling if encountering trailing
-	 *   delimiters in the form of [[::delim::][ ]+]+.
-	 *
-	 *   ARGS_NOWARN: don't post warnings.  This is only used when
-	 *   re-parsing delimiters, as the warnings have already been
-	 *   posted.
-	 *
-	 *   ARGS_TABSEP: use special handling for tab/`Ta' separated
-	 *   phrases like in `Bl -column'.
-	 */
-
 	assert(' ' != buf[*pos]);
 
 	if ('\0' == buf[*pos]) {
@@ -455,8 +443,9 @@ args(struct mdoc *m, int line, int *pos,
 
 	*v = &buf[*pos];
 
-	if (ARGS_DELIM & fl && args_checkpunct(m, buf, *pos, line, fl))
-		return(ARGS_PUNCT);
+	if (ARGSFL_DELIM == fl)
+		if (args_checkpunct(buf, *pos))
+			return(ARGS_PUNCT);
 
 	/*
 	 * First handle TABSEP items, restricted to `Bl -column'.  This
@@ -465,7 +454,7 @@ args(struct mdoc *m, int line, int *pos,
 	 * for arguments at a later phase.
 	 */
 
-	if (ARGS_TABSEP & fl) {
+	if (ARGSFL_TABSEP == fl) {
 		/* Scan ahead to tab (can't be escaped). */
 		p = strchr(*v, '\t');
 		pp = NULL;
@@ -504,7 +493,7 @@ args(struct mdoc *m, int line, int *pos,
 		}
 
 		/* Whitespace check for eoln case... */
-		if ('\0' == *p && ' ' == *(p - 1) && ! (ARGS_NOWARN & fl))
+		if ('\0' == *p && ' ' == *(p - 1))
 			mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE);
 
 		*pos += (int)(p - *v);
@@ -547,7 +536,7 @@ args(struct mdoc *m, int line, int *pos,
 		}
 
 		if ('\0' == buf[*pos]) {
-			if (ARGS_NOWARN & fl || MDOC_PPHRASE & m->flags)
+			if (MDOC_PPHRASE & m->flags)
 				return(ARGS_QWORD);
 			mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE);
 			return(ARGS_QWORD);
@@ -562,31 +551,14 @@ args(struct mdoc *m, int line, int *pos,
 		while (' ' == buf[*pos])
 			(*pos)++;
 
-		if (0 == buf[*pos] && ! (ARGS_NOWARN & fl))
+		if ('\0' == buf[*pos])
 			mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE);
 
 		return(ARGS_QWORD);
 	}
 
-	/* 
-	 * A non-quoted term progresses until either the end of line or
-	 * a non-escaped whitespace.
-	 */
-
-	for ( ; buf[*pos]; (*pos)++)
-		if (*pos && ' ' == buf[*pos] && '\\' != buf[*pos - 1])
-			break;
-
-	if ('\0' == buf[*pos])
-		return(ARGS_WORD);
-
-	buf[(*pos)++] = '\0';
-
-	while (' ' == buf[*pos])
-		(*pos)++;
-
-	if ('\0' == buf[*pos] && ! (ARGS_NOWARN & fl))
-		mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE);
+	p = &buf[*pos];
+	*v = mandoc_getarg(m->parse, &p, line, pos);
 
 	return(ARGS_WORD);
 }
@@ -598,7 +570,7 @@ args(struct mdoc *m, int line, int *pos,
  * whitespace may separate these tokens.
  */
 static int
-args_checkpunct(struct mdoc *m, const char *buf, int i, int ln, int fl)
+args_checkpunct(const char *buf, int i)
 {
 	int		 j;
 	char		 dbuf[DELIMSZ];
@@ -638,9 +610,6 @@ args_checkpunct(struct mdoc *m, const char *buf, int i, int ln, int fl)
 			i++;
 	}
 
-	if ( ! (ARGS_NOWARN & fl) && i && ' ' == buf[i - 1])
-		mdoc_pmsg(m, ln, i - 1, MANDOCERR_EOLNSPACE);
-
 	return('\0' == buf[i]);
 }
 
@@ -652,40 +621,40 @@ args_checkpunct(struct mdoc *m, const char *buf, int i, int ln, int fl)
 static enum mdocargt
 argv_a2arg(enum mdoct tok, const char *p)
 {
-	const enum mdocargt *args;
+	const enum mdocargt *argsp;
 
-	args = NULL;
+	argsp = NULL;
 
 	switch (tok) {
 	case (MDOC_An):
-		args = args_An;
+		argsp = args_An;
 		break;
 	case (MDOC_Bd):
-		args = args_Bd;
+		argsp = args_Bd;
 		break;
 	case (MDOC_Bf):
-		args = args_Bf;
+		argsp = args_Bf;
 		break;
 	case (MDOC_Bk):
-		args = args_Bk;
+		argsp = args_Bk;
 		break;
 	case (MDOC_Bl):
-		args = args_Bl;
+		argsp = args_Bl;
 		break;
 	case (MDOC_Rv):
 		/* FALLTHROUGH */
 	case (MDOC_Ex):
-		args = args_Ex;
+		argsp = args_Ex;
 		break;
 	default:
 		return(MDOC_ARG_MAX);
 	}
 
-	assert(args);
+	assert(argsp);
 
-	for ( ; MDOC_ARG_MAX != *args ; args++)
-		if (0 == strcmp(p, mdoc_argnames[*args]))
-			return(*args);
+	for ( ; MDOC_ARG_MAX != *argsp ; argsp++)
+		if (0 == strcmp(p, mdoc_argnames[*argsp]))
+			return(*argsp);
 
 	return(MDOC_ARG_MAX);
 }
@@ -700,7 +669,7 @@ argv_multi(struct mdoc *m, int line,
 	for (v->sz = 0; ; v->sz++) {
 		if ('-' == buf[*pos])
 			break;
-		ac = args(m, line, pos, buf, 0, &p);
+		ac = args(m, line, pos, buf, ARGSFL_NONE, &p);
 		if (ARGS_ERROR == ac)
 			return(0);
 		else if (ARGS_EOLN == ac)
@@ -726,7 +695,7 @@ argv_opt_single(struct mdoc *m, int line,
 	if ('-' == buf[*pos])
 		return(1);
 
-	ac = args(m, line, pos, buf, 0, &p);
+	ac = args(m, line, pos, buf, ARGSFL_NONE, &p);
 	if (ARGS_ERROR == ac)
 		return(0);
 	if (ARGS_EOLN == ac)
@@ -752,7 +721,7 @@ argv_single(struct mdoc *m, int line,
 
 	ppos = *pos;
 
-	ac = args(m, line, pos, buf, 0, &p);
+	ac = args(m, line, pos, buf, ARGSFL_NONE, &p);
 	if (ARGS_EOLN == ac) {
 		mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTARGVCOUNT);
 		return(0);
diff --git a/contrib/mdocml/mdoc_html.c b/contrib/mdocml/mdoc_html.c
index 49782a39f9..57ebc34804 100644
--- a/contrib/mdocml/mdoc_html.c
+++ b/contrib/mdocml/mdoc_html.c
@@ -1,4 +1,4 @@
-/*	$Id: mdoc_html.c,v 1.162 2011/04/04 16:48:18 kristaps Exp $ */
+/*	$Id: mdoc_html.c,v 1.169 2011/05/17 11:38:18 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -288,7 +288,7 @@ a2width(const char *p, struct roffsu *su)
 
 	if ( ! a2roffsu(p, su, SCALE_MAX)) {
 		su->unit = SCALE_BU;
-		su->scale = (int)strlen(p);
+		su->scale = html_strlen(p);
 	}
 }
 
@@ -355,7 +355,7 @@ a2offs(const char *p, struct roffsu *su)
 		SCALE_HS_INIT(su, INDENT * 2);
 	else if ( ! a2roffsu(p, su, SCALE_MAX)) {
 		su->unit = SCALE_BU;
-		su->scale = (int)strlen(p);
+		su->scale = html_strlen(p);
 	}
 }
 
@@ -382,13 +382,10 @@ print_mdoc_head(MDOC_ARGS)
 
 	print_gen_head(h);
 	bufinit(h);
-	buffmt(h, "%s(%s)", m->title, m->msec);
+	bufcat_fmt(h, "%s(%s)", m->title, m->msec);
 
-	if (m->arch) {
-		bufcat(h, " (");
-		bufcat(h, m->arch);
-		bufcat(h, ")");
-	}
+	if (m->arch)
+		bufcat_fmt(h, " (%s)", m->arch);
 
 	print_otag(h, TAG_TITLE, 0, NULL);
 	print_text(h, h->buf);
@@ -415,7 +412,6 @@ print_mdoc_node(MDOC_ARGS)
 	child = 1;
 	t = h->tags.head;
 
-	bufinit(h);
 	switch (n->type) {
 	case (MDOC_ROOT):
 		child = mdoc_root_pre(m, n, h);
@@ -484,7 +480,6 @@ print_mdoc_node(MDOC_ARGS)
 
 	print_stagq(h, t);
 
-	bufinit(h);
 	switch (n->type) {
 	case (MDOC_ROOT):
 		mdoc_root_post(m, n, h);
@@ -606,7 +601,6 @@ static int
 mdoc_sh_pre(MDOC_ARGS)
 {
 	struct htmlpair	 tag;
-	char		 buf[BUFSIZ];
 
 	if (MDOC_BLOCK == n->type) {
 		PAIR_CLASS_INIT(&tag, "section");
@@ -615,14 +609,14 @@ mdoc_sh_pre(MDOC_ARGS)
 	} else if (MDOC_BODY == n->type)
 		return(1);
 
-	buf[0] = '\0';
+	bufinit(h);
 	for (n = n->child; n; n = n->next) {
-		html_idcat(buf, n->string, BUFSIZ);
+		bufcat_id(h, n->string);
 		if (n->next)
-			html_idcat(buf, " ", BUFSIZ);
+			bufcat_id(h, " ");
 	}
 
-	PAIR_ID_INIT(&tag, buf);
+	PAIR_ID_INIT(&tag, h->buf);
 	print_otag(h, TAG_H1, 1, &tag);
 	return(1);
 }
@@ -633,7 +627,6 @@ static int
 mdoc_ss_pre(MDOC_ARGS)
 {
 	struct htmlpair	 tag;
-	char		 buf[BUFSIZ];
 
 	if (MDOC_BLOCK == n->type) {
 		PAIR_CLASS_INIT(&tag, "subsection");
@@ -642,14 +635,14 @@ mdoc_ss_pre(MDOC_ARGS)
 	} else if (MDOC_BODY == n->type)
 		return(1);
 
-	buf[0] = '\0';
+	bufinit(h);
 	for (n = n->child; n; n = n->next) {
-		html_idcat(buf, n->string, BUFSIZ);
+		bufcat_id(h, n->string);
 		if (n->next)
-			html_idcat(buf, " ", BUFSIZ);
+			bufcat_id(h, " ");
 	}
 
-	PAIR_ID_INIT(&tag, buf);
+	PAIR_ID_INIT(&tag, h->buf);
 	print_otag(h, TAG_H2, 1, &tag);
 	return(1);
 }
@@ -703,7 +696,7 @@ mdoc_nm_pre(MDOC_ARGS)
 {
 	struct htmlpair	 tag;
 	struct roffsu	 su;
-	size_t		 len;
+	int		 len;
 
 	switch (n->type) {
 	case (MDOC_ELEM):
@@ -731,12 +724,13 @@ mdoc_nm_pre(MDOC_ARGS)
 
 	for (len = 0, n = n->child; n; n = n->next)
 		if (MDOC_TEXT == n->type)
-			len += strlen(n->string);
+			len += html_strlen(n->string);
 
 	if (0 == len && m->name)
-		len = strlen(m->name);
+		len = html_strlen(m->name);
 
 	SCALE_HS_INIT(&su, (double)len);
+	bufinit(h);
 	bufcat_su(h, "width", &su);
 	PAIR_STYLE_INIT(&tag, h);
 	print_otag(h, TAG_COL, 1, &tag);
@@ -899,6 +893,8 @@ mdoc_it_pre(MDOC_ARGS)
 	assert(lists[type]);
 	PAIR_CLASS_INIT(&tag[0], lists[type]);
 
+	bufinit(h);
+
 	if (MDOC_HEAD == n->type) {
 		switch (type) {
 		case(LIST_bullet):
@@ -999,6 +995,8 @@ mdoc_bl_pre(MDOC_ARGS)
 	struct roffsu	 su;
 	char		 buf[BUFSIZ];
 
+	bufinit(h);
+
 	if (MDOC_BODY == n->type) {
 		if (LIST_column == n->norm->Bl.type)
 			print_otag(h, TAG_TBODY, 0, NULL);
@@ -1018,7 +1016,6 @@ mdoc_bl_pre(MDOC_ARGS)
 
 		for (i = 0; i < (int)n->norm->Bl.ncols; i++) {
 			a2width(n->norm->Bl.cols[i], &su);
-			bufinit(h);
 			if (i < (int)n->norm->Bl.ncols - 1)
 				bufcat_su(h, "width", &su);
 			else
@@ -1147,6 +1144,7 @@ mdoc_d1_pre(MDOC_ARGS)
 		return(1);
 
 	SCALE_VS_INIT(&su, 0);
+	bufinit(h);
 	bufcat_su(h, "margin-top", &su);
 	bufcat_su(h, "margin-bottom", &su);
 	PAIR_STYLE_INIT(&tag[0], h);
@@ -1171,17 +1169,17 @@ static int
 mdoc_sx_pre(MDOC_ARGS)
 {
 	struct htmlpair	 tag[2];
-	char		 buf[BUFSIZ];
 
-	strlcpy(buf, "#", BUFSIZ);
+	bufinit(h);
+	bufcat(h, "#x");
 	for (n = n->child; n; n = n->next) {
-		html_idcat(buf, n->string, BUFSIZ);
+		bufcat_id(h, n->string);
 		if (n->next)
-			html_idcat(buf, " ", BUFSIZ);
+			bufcat_id(h, " ");
 	}
 
 	PAIR_CLASS_INIT(&tag[0], "link-sec");
-	PAIR_HREF_INIT(&tag[1], buf);
+	PAIR_HREF_INIT(&tag[1], h->buf);
 
 	print_otag(h, TAG_I, 1, tag);
 	print_otag(h, TAG_A, 2, tag);
@@ -1219,7 +1217,8 @@ mdoc_bd_pre(MDOC_ARGS)
 	SCALE_HS_INIT(&su, 0);
 	if (n->norm->Bd.offs)
 		a2offs(n->norm->Bd.offs, &su);
-
+	
+	bufinit(h);
 	bufcat_su(h, "margin-left", &su);
 	PAIR_STYLE_INIT(&tag[0], h);
 
@@ -1438,7 +1437,6 @@ mdoc_fd_pre(MDOC_ARGS)
 			buf[sz - 1] = '\0';
 
 		PAIR_CLASS_INIT(&tag[0], "link-includes");
-		bufinit(h);
 		
 		i = 1;
 		if (h->base_includes) {
@@ -1558,9 +1556,10 @@ mdoc_fn_pre(MDOC_ARGS)
 
 	h->flags |= HTML_NOSPACE;
 	print_text(h, "(");
+	h->flags |= HTML_NOSPACE;
 
-	bufinit(h);
 	PAIR_CLASS_INIT(&tag[0], "farg");
+	bufinit(h);
 	bufcat_style(h, "white-space", "nowrap");
 	PAIR_STYLE_INIT(&tag[1], h);
 
@@ -1639,6 +1638,7 @@ mdoc_sp_pre(MDOC_ARGS)
 	} else
 		su.scale = 0;
 
+	bufinit(h);
 	bufcat_su(h, "height", &su);
 	PAIR_STYLE_INIT(&tag, h);
 	print_otag(h, TAG_DIV, 1, &tag);
@@ -1775,10 +1775,8 @@ mdoc_in_pre(MDOC_ARGS)
 		assert(MDOC_TEXT == n->type);
 
 		PAIR_CLASS_INIT(&tag[0], "link-includes");
-		bufinit(h);
 
 		i = 1;
-
 		if (h->base_includes) {
 			buffmt_includes(h, n->string);
 			PAIR_HREF_INIT(&tag[i], h->buf);
@@ -1917,6 +1915,7 @@ mdoc_bf_pre(MDOC_ARGS)
 	 * We want this to be inline-formatted, but needs to be div to
 	 * accept block children. 
 	 */
+	bufinit(h);
 	bufcat_style(h, "display", "inline");
 	SCALE_HS_INIT(&su, 1);
 	/* Needs a left-margin for spacing. */
diff --git a/contrib/mdocml/mdoc_macro.c b/contrib/mdocml/mdoc_macro.c
index b334b4e402..03d1b91cb7 100644
--- a/contrib/mdocml/mdoc_macro.c
+++ b/contrib/mdocml/mdoc_macro.c
@@ -1,4 +1,4 @@
-/*	$Id: mdoc_macro.c,v 1.106 2011/03/22 14:33:05 kristaps Exp $ */
+/*	$Id: mdoc_macro.c,v 1.109 2011/04/30 10:18:24 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
@@ -600,7 +600,19 @@ dword(struct mdoc *m, int line,
 
 	if (DELIM_OPEN == d)
 		m->last->flags |= MDOC_DELIMO;
-	else if (DELIM_CLOSE == d)
+
+	/*
+	 * Closing delimiters only suppress the preceding space
+	 * when they follow something, not when they start a new
+	 * block or element, and not when they follow `No'.
+	 *
+	 * XXX	Explicitly special-casing MDOC_No here feels
+	 *	like a layering violation.  Find a better way
+	 *	and solve this in the code related to `No'!
+	 */
+
+	else if (DELIM_CLOSE == d && m->last->prev &&
+			m->last->prev->tok != MDOC_No)
 		m->last->flags |= MDOC_DELIMC;
 
 	return(1);
@@ -618,7 +630,7 @@ append_delims(struct mdoc *m, int line, int *pos, char *buf)
 
 	for (;;) {
 		la = *pos;
-		ac = mdoc_zargs(m, line, pos, buf, ARGS_NOWARN, &p);
+		ac = mdoc_zargs(m, line, pos, buf, &p);
 
 		if (ARGS_ERROR == ac)
 			return(0);
@@ -631,12 +643,12 @@ append_delims(struct mdoc *m, int line, int *pos, char *buf)
 		 * If we encounter end-of-sentence symbols, then trigger
 		 * the double-space.
 		 *
-		 * XXX: it's easy to allow this to propogate outward to
+		 * XXX: it's easy to allow this to propagate outward to
 		 * the last symbol, such that `. )' will cause the
 		 * correct double-spacing.  However, (1) groff isn't
 		 * smart enough to do this and (2) it would require
 		 * knowing which symbols break this behaviour, for
-		 * example, `.  ;' shouldn't propogate the double-space.
+		 * example, `.  ;' shouldn't propagate the double-space.
 		 */
 		if (mandoc_eos(p, strlen(p), 0))
 			m->last->flags |= MDOC_EOS;
@@ -995,7 +1007,7 @@ blk_full(MACRO_PROT_ARGS)
 	}
 
 	/*
-	 * This routine accomodates implicitly- and explicitly-scoped
+	 * This routine accommodates implicitly- and explicitly-scoped
 	 * macro openings.  Implicit ones first close out prior scope
 	 * (seen above).  Delay opening the head until necessary to
 	 * allow leading punctuation to print.  Special consideration
@@ -1292,7 +1304,7 @@ blk_part_imp(MACRO_PROT_ARGS)
 		if (mandoc_eos(n->string, strlen(n->string), 1))
 			n->flags |= MDOC_EOS;
 
-	/* Up-propogate the end-of-space flag. */
+	/* Up-propagate the end-of-space flag. */
 
 	if (n && (MDOC_EOS & n->flags)) {
 		body->flags |= MDOC_EOS;
@@ -1717,7 +1729,7 @@ phrase(struct mdoc *m, int line, int ppos, char *buf)
 	for (pos = ppos; ; ) {
 		la = pos;
 
-		ac = mdoc_zargs(m, line, &pos, buf, 0, &p);
+		ac = mdoc_zargs(m, line, &pos, buf, &p);
 
 		if (ARGS_ERROR == ac)
 			return(0);
@@ -1762,7 +1774,7 @@ phrase_ta(MACRO_PROT_ARGS)
 
 	for (;;) {
 		la = *pos;
-		ac = mdoc_zargs(m, line, pos, buf, 0, &p);
+		ac = mdoc_zargs(m, line, pos, buf, &p);
 
 		if (ARGS_ERROR == ac)
 			return(0);
diff --git a/contrib/mdocml/mdoc_term.c b/contrib/mdocml/mdoc_term.c
index 47c212489d..1a5ce4c214 100644
--- a/contrib/mdocml/mdoc_term.c
+++ b/contrib/mdocml/mdoc_term.c
@@ -1,4 +1,4 @@
-/*	$Id: mdoc_term.c,v 1.226 2011/04/04 16:27:03 kristaps Exp $ */
+/*	$Id: mdoc_term.c,v 1.230 2011/05/17 14:38:34 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
@@ -264,14 +264,7 @@ terminal_mdoc(void *arg, const struct mdoc *mdoc)
 	p->tabwidth = term_len(p, 5);
 
 	if (NULL == p->symtab)
-		switch (p->enc) {
-		case (TERMENC_ASCII):
-			p->symtab = chars_init(CHARS_ASCII);
-			break;
-		default:
-			abort();
-			/* NOTREACHED */
-		}
+		p->symtab = mchars_alloc();
 
 	n = mdoc_node(mdoc);
 	m = mdoc_meta(mdoc);
diff --git a/contrib/mdocml/mdoc_validate.c b/contrib/mdocml/mdoc_validate.c
index 707864441c..a34a221d69 100644
--- a/contrib/mdocml/mdoc_validate.c
+++ b/contrib/mdocml/mdoc_validate.c
@@ -1,4 +1,4 @@
-/*	$Id: mdoc_validate.c,v 1.166 2011/04/03 09:53:50 kristaps Exp $ */
+/*	$Id: mdoc_validate.c,v 1.169 2011/04/30 10:18:24 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -155,9 +155,9 @@ static	v_post	 posts_notext[] = { ewarn_eq0, NULL };
 static	v_post	 posts_ns[] = { post_ns, NULL };
 static	v_post	 posts_os[] = { post_os, post_prol, NULL };
 static	v_post	 posts_rs[] = { post_rs, NULL };
-static	v_post	 posts_sh[] = { post_ignpar, hwarn_ge1, bwarn_ge1, post_sh, NULL };
+static	v_post	 posts_sh[] = { post_ignpar, hwarn_ge1, post_sh, NULL };
 static	v_post	 posts_sp[] = { ewarn_le1, NULL };
-static	v_post	 posts_ss[] = { post_ignpar, hwarn_ge1, bwarn_ge1, NULL };
+static	v_post	 posts_ss[] = { post_ignpar, hwarn_ge1, NULL };
 static	v_post	 posts_st[] = { post_st, NULL };
 static	v_post	 posts_std[] = { post_std, NULL };
 static	v_post	 posts_text[] = { ewarn_ge1, NULL };
@@ -545,31 +545,39 @@ check_argv(struct mdoc *m, struct mdoc_node *n, struct mdoc_argv *v)
 static void
 check_text(struct mdoc *m, int ln, int pos, char *p)
 {
-	int		 c;
+	char		*cpp, *pp;
 	size_t		 sz;
 
-	for ( ; *p; p++, pos++) {
+	while ('\0' != *p) {
 		sz = strcspn(p, "\t\\");
-		p += (int)sz;
-
-		if ('\0' == *p)
-			break;
 
+		p += (int)sz;
 		pos += (int)sz;
 
 		if ('\t' == *p) {
 			if ( ! (MDOC_LITERAL & m->flags))
 				mdoc_pmsg(m, ln, pos, MANDOCERR_BADTAB);
+			p++;
+			pos++;
 			continue;
-		}
+		} else if ('\0' == *p)
+			break;
+
+		pos++;
+		pp = ++p;
 
-		if (0 == (c = mandoc_special(p))) {
+		if (ESCAPE_ERROR == mandoc_escape
+				((const char **)&pp, NULL, NULL)) {
 			mdoc_pmsg(m, ln, pos, MANDOCERR_BADESCAPE);
-			continue;
+			break;
 		}
 
-		p += c - 1;
-		pos += c - 1;
+		cpp = p;
+		while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp)))
+			*cpp = '-';
+
+		pos += pp - p;
+		p = pp;
 	}
 }
 
@@ -1527,7 +1535,7 @@ post_bl_head(POST_ARGS)
 	assert(0 == np->args->argv[j].sz);
 
 	/*
-	 * Accomodate for new-style groff column syntax.  Shuffle the
+	 * Accommodate for new-style groff column syntax.  Shuffle the
 	 * child nodes, all of which must be TEXT, as arguments for the
 	 * column field.  Then, delete the head children.
 	 */
diff --git a/contrib/mdocml/out.c b/contrib/mdocml/out.c
index eb303d5194..225d4639d8 100644
--- a/contrib/mdocml/out.c
+++ b/contrib/mdocml/out.c
@@ -1,4 +1,4 @@
-/*	$Id: out.c,v 1.39 2011/03/17 08:49:34 kristaps Exp $ */
+/*	$Id: out.c,v 1.40 2011/04/09 15:29:40 kristaps Exp $ */
 /*
  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -174,243 +174,6 @@ time2a(time_t t, char *dst, size_t sz)
 	(void)strftime(p, sz, "%Y", &tm);
 }
 
-
-int
-a2roffdeco(enum roffdeco *d, const char **word, size_t *sz)
-{
-	int		 i, j, lim;
-	char		 term, c;
-	const char	*wp;
-	enum roffdeco	 dd;
-
-	*d = DECO_NONE;
-	lim = i = 0;
-	term = '\0';
-	wp = *word;
-
-	switch ((c = wp[i++])) {
-	case ('('):
-		*d = DECO_SPECIAL;
-		lim = 2;
-		break;
-	case ('F'):
-		/* FALLTHROUGH */
-	case ('f'):
-		*d = 'F' == c ? DECO_FFONT : DECO_FONT;
-
-		switch (wp[i++]) {
-		case ('('):
-			lim = 2;
-			break;
-		case ('['):
-			term = ']';
-			break;
-		case ('3'):
-			/* FALLTHROUGH */
-		case ('B'):
-			*d = DECO_BOLD;
-			return(i);
-		case ('2'):
-			/* FALLTHROUGH */
-		case ('I'):
-			*d = DECO_ITALIC;
-			return(i);
-		case ('P'):
-			*d = DECO_PREVIOUS;
-			return(i);
-		case ('1'):
-			/* FALLTHROUGH */
-		case ('R'):
-			*d = DECO_ROMAN;
-			return(i);
-		default:
-			i--;
-			lim = 1;
-			break;
-		}
-		break;
-	case ('k'):
-		/* FALLTHROUGH */
-	case ('M'):
-		/* FALLTHROUGH */
-	case ('m'):
-		/* FALLTHROUGH */
-	case ('*'):
-		if ('*' == c)
-			*d = DECO_RESERVED;
-
-		switch (wp[i++]) {
-		case ('('):
-			lim = 2;
-			break;
-		case ('['):
-			term = ']';
-			break;
-		default:
-			i--;
-			lim = 1;
-			break;
-		}
-		break;
-
-	case ('N'):
-
-		/*
-		 * Sequence of characters:  backslash,  'N' (i = 0),
-		 * starting delimiter (i = 1), character number (i = 2).
-		 */
-
-		*word = wp + 2;
-		*sz = 0;
-
-		/*
-		 * Cannot use a digit as a starting delimiter;
-		 * but skip the digit anyway.
-		 */
-
-		if (isdigit((int)wp[1]))
-			return(2);
-
-		/*
-		 * Any non-digit terminates the character number.
-		 * That is, the terminating delimiter need not
-		 * match the starting delimiter.
-		 */
-
-		for (i = 2; isdigit((int)wp[i]); i++)
-			(*sz)++;
-
-		/*
-		 * This is only a numbered character
-		 * if the character number has at least one digit.
-		 */
-
-		if (*sz)
-			*d = DECO_NUMBERED;
-
-		/*
-		 * Skip the terminating delimiter, even if it does not
-		 * match, and even if there is no character number.
-		 */
-
-		return(++i);
-
-	case ('h'):
-		/* FALLTHROUGH */
-	case ('v'):
-		/* FALLTHROUGH */
-	case ('s'):
-		j = 0;
-		if ('+' == wp[i] || '-' == wp[i]) {
-			i++;
-			j = 1;
-		}
-
-		switch (wp[i++]) {
-		case ('('):
-			lim = 2;
-			break;
-		case ('['):
-			term = ']';
-			break;
-		case ('\''):
-			term = '\'';
-			break;
-		case ('0'):
-			j = 1;
-			/* FALLTHROUGH */
-		default:
-			i--;
-			lim = 1;
-			break;
-		}
-
-		if ('+' == wp[i] || '-' == wp[i]) {
-			if (j)
-				return(i);
-			i++;
-		} 
-
-		/* Handle embedded numerical subexp or escape. */
-
-		if ('(' == wp[i]) {
-			while (wp[i] && ')' != wp[i])
-				if ('\\' == wp[i++]) {
-					/* Handle embedded escape. */
-					*word = &wp[i];
-					i += a2roffdeco(&dd, word, sz);
-				}
-
-			if (')' == wp[i++])
-				break;
-
-			*d = DECO_NONE;
-			return(i - 1);
-		} else if ('\\' == wp[i]) {
-			*word = &wp[++i];
-			i += a2roffdeco(&dd, word, sz);
-		}
-
-		break;
-	case ('['):
-		*d = DECO_SPECIAL;
-		term = ']';
-		break;
-	case ('c'):
-		*d = DECO_NOSPACE;
-		return(i);
-	case ('z'):
-		*d = DECO_NONE;
-		if ('\\' == wp[i]) {
-			*word = &wp[++i];
-			return(i + a2roffdeco(&dd, word, sz));
-		} else
-			lim = 1;
-		break;
-	case ('o'):
-		/* FALLTHROUGH */
-	case ('w'):
-		if ('\'' == wp[i++]) {
-			term = '\'';
-			break;
-		} 
-		/* FALLTHROUGH */
-	default:
-		*d = DECO_SSPECIAL;
-		i--;
-		lim = 1;
-		break;
-	}
-
-	assert(term || lim);
-	*word = &wp[i];
-
-	if (term) {
-		j = i;
-		while (wp[i] && wp[i] != term)
-			i++;
-		if ('\0' == wp[i]) {
-			*d = DECO_NONE;
-			return(i);
-		}
-
-		assert(i >= j);
-		*sz = (size_t)(i - j);
-
-		return(i + 1);
-	}
-
-	assert(lim > 0);
-	*sz = (size_t)lim;
-
-	for (j = 0; wp[i] && j < lim; j++)
-		i++;
-	if (j < lim)
-		*d = DECO_NONE;
-
-	return(i);
-}
-
 /*
  * Calculate the abstract widths and decimal positions of columns in a
  * table.  This routine allocates the columns structures then runs over
diff --git a/contrib/mdocml/out.h b/contrib/mdocml/out.h
index 9bf5357b30..63f10c28ff 100644
--- a/contrib/mdocml/out.h
+++ b/contrib/mdocml/out.h
@@ -1,4 +1,4 @@
-/*	$Id: out.h,v 1.18 2011/03/22 10:13:01 kristaps Exp $ */
+/*	$Id: out.h,v 1.20 2011/04/29 22:18:12 kristaps Exp $ */
 /*
  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -31,27 +31,6 @@ enum	roffscale {
 	SCALE_MAX
 };
 
-enum	roffdeco {
-	DECO_NONE,
-	DECO_NUMBERED, /* numbered character */
-	DECO_SPECIAL, /* special character */
-	DECO_SSPECIAL, /* single-char special */
-	DECO_RESERVED, /* reserved word */
-	DECO_BOLD, /* bold font */
-	DECO_ITALIC, /* italic font */
-	DECO_ROMAN, /* "normal" undecorated font */
-	DECO_PREVIOUS, /* revert to previous font */
-	DECO_NOSPACE, /* suppress spacing */
-	DECO_FONT, /* font */
-	DECO_FFONT, /* font family */
-	DECO_MAX
-};
-
-enum	chars {
-	CHARS_ASCII, /* 7-bit ascii representation */
-	CHARS_HTML /* unicode values */
-};
-
 struct	roffcol {
 	size_t		 width; /* width of cell */
 	size_t		 decimal; /* decimal position in cell */
@@ -85,18 +64,9 @@ __BEGIN_DECLS
 	while (/* CONSTCOND */ 0)
 
 int	  	  a2roffsu(const char *, struct roffsu *, enum roffscale);
-int	  	  a2roffdeco(enum roffdeco *, const char **, size_t *);
 void	  	  time2a(time_t, char *, size_t);
 void	  	  tblcalc(struct rofftbl *tbl, const struct tbl_span *);
 
-void		 *chars_init(enum chars);
-const char	 *chars_num2char(const char *, size_t);
-const char	 *chars_spec2str(void *, const char *, size_t, size_t *);
-int		  chars_spec2cp(void *, const char *, size_t);
-const char	 *chars_res2str(void *, const char *, size_t, size_t *);
-int		  chars_res2cp(void *, const char *, size_t);
-void		  chars_free(void *);
-
 __END_DECLS
 
 #endif /*!OUT_H*/
diff --git a/contrib/mdocml/preconv.1 b/contrib/mdocml/preconv.1
new file mode 100644
index 0000000000..96fcaeb12e
--- /dev/null
+++ b/contrib/mdocml/preconv.1
@@ -0,0 +1,161 @@
+.\"	$Id: preconv.1,v 1.4 2011/05/26 14:45:04 kristaps Exp $
+.\"
+.\" Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: May 26 2011 $
+.Dt PRECONV 1
+.Os
+.Sh NAME
+.Nm preconv
+.Nd recode multibyte UNIX manuals
+.Sh SYNOPSIS
+.Nm preconv
+.Op Fl D Ar enc
+.Op Fl e Ar enc
+.Op Ar file
+.Sh DESCRIPTION
+The
+.Nm
+utility recodes multibyte
+.Ux
+manual files into
+.Xr mandoc 1
+.Po
+or other troff system supporting the
+.Sq \e[uNNNN]
+escape sequence
+.Pc
+input.
+Its arguments are as follows:
+.Bl -tag -width Ds
+.It Fl D Ar enc
+The default encoding.
+.It Fl e Ar enc
+The document's encoding.
+.It Ar file
+The input file.
+.El
+.Pp
+If
+.Ar file
+is not provided,
+.Nm
+accepts standard input.
+See
+.Sx Algorithm
+for encoding choice.
+.Pp
+The recoded input is written to standard output: Unicode characters in
+the ASCII range are printed as regular ASCII characters, while those
+above this range are printed using the
+.Sq \e[uNNNN]
+format documented in
+.Xr mandoc_char 7 .
+.Pp
+If input bytes are improperly formed in the current encoding, they're
+passed unmodified to standard output.
+For some encodings, such as UTF-8, unrecoverable input sequences will
+cause
+.Nm
+to stop processing and exit.
+.Ss Algorithm
+An encoding is chosen according to the following steps:
+.Bl -enum
+.It
+From the argument passed to
+.Fl e Ar enc .
+.It
+If a BOM exists, UTF\-8 encoding is selected.
+.It
+From the coding tags parsed from
+.Qq File Variables
+on the first two lines of input.
+A file variable is an input line of the form
+.Pp
+.Dl \%.\e\(dq -*- key: val [; key: val ]* -*-
+.Pp
+A coding tag variable is where
+.Cm key
+is
+.Qq coding
+and
+.Cm val
+is the name of the encoding.
+A typical file variable with a coding tag is
+.Pp
+.Dl \%.\e\(dq -*- mode: troff; coding: utf-8 -*-
+.It
+From the argument passed to
+.Fl D Ar enc .
+.It
+If all else fails, Latin\-1 is used.
+.El
+.Pp
+The
+.Nm
+utility recognises the UTF\-8, us\-ascii, and latin\-1 encodings as
+passed to the
+.Fl e
+and
+.Fl D
+arguments, or as coding tags.
+Encodings are matched case-insensitively.
+.\" .Sh IMPLEMENTATION NOTES
+.\" Not used in OpenBSD.
+.\" .Sh RETURN VALUES
+.\" For sections 2, 3, & 9 only.
+.\" .Sh ENVIRONMENT
+.\" For sections 1, 6, 7, & 8 only.
+.\" .Sh FILES
+.Sh EXIT STATUS
+.Ex -std
+.Sh EXAMPLES
+Explicitly page a UTF\-8 manual
+.Pa foo.1
+in the current locale:
+.Pp
+.Dl $ preconv \-e utf\-8 foo.1 | mandoc -Tlocale | less
+.\" .Sh DIAGNOSTICS
+.\" For sections 1, 4, 6, 7, & 8 only.
+.\" .Sh ERRORS
+.\" For sections 2, 3, & 9 only.
+.Sh SEE ALSO
+.Xr mandoc 1 ,
+.Xr mandoc_char 7
+.Sh STANDARDS
+The
+.Nm
+utility references the US-ASCII character set standard, ANSI_X3.4\-1968;
+the Latin\-1 character set standard, ISO/IEC 8859\-1:1998; the UTF\-8
+character set standard; and UCS (Unicode), ISO/IEC 10646.
+.Sh HISTORY
+The
+.Nm
+utility first appeared in the GNU troff
+.Pq Dq groff
+system in December 2005, authored by Tomohiro Kubota and Werner
+Lemberg.
+The implementation that is part of the
+.Xr mandoc 1
+utility appeared in May 2011.
+.Sh AUTHORS
+The
+.Nm
+utility was written by
+.An Kristaps Dzonsons Aq kristaps@bsd.lv .
+.\" .Sh CAVEATS
+.\" .Sh BUGS
+.\" .Sh SECURITY CONSIDERATIONS
+.\" Not used in OpenBSD.
diff --git a/contrib/mdocml/preconv.c b/contrib/mdocml/preconv.c
new file mode 100644
index 0000000000..a53504ece9
--- /dev/null
+++ b/contrib/mdocml/preconv.c
@@ -0,0 +1,522 @@
+/*	$Id: preconv.c,v 1.4 2011/05/26 21:13:07 kristaps Exp $ */
+/*
+ * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include <assert.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+/* 
+ * The read_whole_file() and resize_buf() functions are copied from
+ * read.c, including all dependency code (MAP_FILE, etc.).
+ */
+
+#ifndef MAP_FILE
+#define	MAP_FILE	0
+#endif
+
+enum	enc {
+	ENC_UTF_8, /* UTF-8 */
+	ENC_US_ASCII, /* US-ASCII */
+	ENC_LATIN_1, /* Latin-1 */
+	ENC__MAX
+};
+
+struct	buf {
+	char		 *buf; /* binary input buffer */
+	size_t	 	  sz; /* size of binary buffer */
+	size_t		  offs; /* starting buffer offset */
+};
+
+struct	encode {
+	const char	 *name;
+	int		(*conv)(const struct buf *);
+};
+
+static	int	 cue_enc(const struct buf *, size_t *, enum enc *);
+static	int	 conv_latin_1(const struct buf *);
+static	int	 conv_us_ascii(const struct buf *);
+static	int	 conv_utf_8(const struct buf *);
+static	int	 read_whole_file(const char *, int, 
+			struct buf *, int *);
+static	void	 resize_buf(struct buf *, size_t);
+static	void	 usage(void);
+
+static	const struct encode encs[ENC__MAX] = {
+	{ "utf-8", conv_utf_8 }, /* ENC_UTF_8 */
+	{ "us-ascii", conv_us_ascii }, /* ENC_US_ASCII */
+	{ "latin-1", conv_latin_1 }, /* ENC_LATIN_1 */
+};
+
+static	const char	 *progname;
+
+static void
+usage(void)
+{
+
+	fprintf(stderr, "usage: %s "
+			"[-D enc] "
+			"[-e ENC] "
+			"[file]\n", progname);
+}
+
+static int
+conv_latin_1(const struct buf *b)
+{
+	size_t		 i;
+	unsigned char	 cu;
+	const char	*cp;
+
+	cp = b->buf + (int)b->offs;
+
+	/*
+	 * Latin-1 falls into the first 256 code-points of Unicode, so
+	 * there's no need for any sort of translation.  Just make the
+	 * 8-bit characters use the Unicode escape.
+	 * Note that binary values 128 < v < 160 are passed through
+	 * unmodified to mandoc.
+	 */
+
+	for (i = b->offs; i < b->sz; i++) {
+		cu = (unsigned char)*cp++;
+		cu < 160U ? putchar(cu) : printf("\\[u%.4X]", cu);
+	}
+
+	return(1);
+}
+
+static int
+conv_us_ascii(const struct buf *b)
+{
+
+	/*
+	 * US-ASCII has no conversion since it falls into the first 128
+	 * bytes of Unicode.
+	 */
+
+	fwrite(b->buf, 1, b->sz, stdout);
+	return(1);
+}
+
+static int
+conv_utf_8(const struct buf *b)
+{
+	int		 state, be;
+	unsigned int	 accum;
+	size_t		 i;
+	unsigned char	 cu;
+	const char	*cp;
+	const long	 one = 1L;
+
+	cp = b->buf + (int)b->offs;
+	state = 0;
+	accum = 0U;
+	be = 0;
+
+	/* Quick test for big-endian value. */
+
+	if ( ! (*((const char *)(&one))))
+		be = 1;
+
+	for (i = b->offs; i < b->sz; i++) {
+		cu = (unsigned char)*cp++;
+		if (state) {
+			if ( ! (cu & 128) || (cu & 64)) {
+				/* Bad sequence header. */
+				return(0);
+			}
+
+			/* Accept only legitimate bit patterns. */
+
+			if (cu > 191 || cu < 128) {
+				/* Bad in-sequence bits. */
+				return(0);
+			}
+
+			accum |= (cu & 63) << --state * 6;
+
+			/*
+			 * Accum is held in little-endian order as
+			 * stipulated by the UTF-8 sequence coding.  We
+			 * need to convert to a native big-endian if our
+			 * architecture requires it.
+			 */
+
+			if (0 == state && be) 
+				accum = (accum >> 24) | 
+					((accum << 8) & 0x00FF0000) |
+					((accum >> 8) & 0x0000FF00) |
+					(accum << 24);
+
+			if (0 == state) {
+				accum < 128U ? putchar(accum) : 
+					printf("\\[u%.4X]", accum);
+				accum = 0U;
+			}
+		} else if (cu & (1 << 7)) {
+			/*
+			 * Entering a UTF-8 state:  if we encounter a
+			 * UTF-8 bitmask, calculate the expected UTF-8
+			 * state from it.
+			 */
+			for (state = 0; state < 7; state++) 
+				if ( ! (cu & (1 << (7 - state))))
+					break;
+
+			/* Accept only legitimate bit patterns. */
+
+			switch (state) {
+			case (4):
+				if (cu <= 244 && cu >= 240) {
+					accum = (cu & 7) << 18;
+					break;
+				}
+				/* Bad 4-sequence start bits. */
+				return(0);
+			case (3):
+				if (cu <= 239 && cu >= 224) {
+					accum = (cu & 15) << 12;
+					break;
+				}
+				/* Bad 3-sequence start bits. */
+				return(0);
+			case (2):
+				if (cu <= 223 && cu >= 194) {
+					accum = (cu & 31) << 6;
+					break;
+				}
+				/* Bad 2-sequence start bits. */
+				return(0);
+			default:
+				/* Bad sequence bit mask. */
+				return(0);
+			}
+			state--;
+		} else
+			putchar(cu);
+	}
+
+	if (0 != state) {
+		/* Bad trailing bits. */
+		return(0);
+	}
+
+	return(1);
+}
+
+static void
+resize_buf(struct buf *buf, size_t initial)
+{
+
+	buf->sz = buf->sz > initial / 2 ? 
+		2 * buf->sz : initial;
+
+	buf->buf = realloc(buf->buf, buf->sz);
+	if (NULL == buf->buf) {
+		perror(NULL);
+		exit(EXIT_FAILURE);
+	}
+}
+
+static int
+read_whole_file(const char *f, int fd, 
+		struct buf *fb, int *with_mmap)
+{
+	struct stat	 st;
+	size_t		 off;
+	ssize_t		 ssz;
+
+	if (-1 == fstat(fd, &st)) {
+		perror(f);
+		return(0);
+	}
+
+	/*
+	 * If we're a regular file, try just reading in the whole entry
+	 * via mmap().  This is faster than reading it into blocks, and
+	 * since each file is only a few bytes to begin with, I'm not
+	 * concerned that this is going to tank any machines.
+	 */
+
+	if (S_ISREG(st.st_mode) && st.st_size >= (1U << 31)) {
+		fprintf(stderr, "%s: input too large\n", f);
+		return(0);
+	} 
+	
+	if (S_ISREG(st.st_mode)) {
+		*with_mmap = 1;
+		fb->sz = (size_t)st.st_size;
+		fb->buf = mmap(NULL, fb->sz, PROT_READ, 
+				MAP_FILE|MAP_SHARED, fd, 0);
+		if (fb->buf != MAP_FAILED)
+			return(1);
+	}
+
+	/*
+	 * If this isn't a regular file (like, say, stdin), then we must
+	 * go the old way and just read things in bit by bit.
+	 */
+
+	*with_mmap = 0;
+	off = 0;
+	fb->sz = 0;
+	fb->buf = NULL;
+	for (;;) {
+		if (off == fb->sz && fb->sz == (1U << 31)) {
+			fprintf(stderr, "%s: input too large\n", f);
+			break;
+		} 
+		
+		if (off == fb->sz)
+			resize_buf(fb, 65536);
+
+		ssz = read(fd, fb->buf + (int)off, fb->sz - off);
+		if (ssz == 0) {
+			fb->sz = off;
+			return(1);
+		}
+		if (ssz == -1) {
+			perror(f);
+			break;
+		}
+		off += (size_t)ssz;
+	}
+
+	free(fb->buf);
+	fb->buf = NULL;
+	return(0);
+}
+
+static int
+cue_enc(const struct buf *b, size_t *offs, enum enc *enc)
+{
+	const char	*ln, *eoln, *eoph;
+	size_t		 sz, phsz, nsz;
+	int		 i;
+
+	ln = b->buf + (int)*offs;
+	sz = b->sz - *offs;
+
+	/* Look for the end-of-line. */
+
+	if (NULL == (eoln = memchr(ln, '\n', sz)))
+		return(-1);
+
+	/* Set next-line marker. */
+
+	*offs = (size_t)((eoln + 1) - b->buf);
+
+	/* Check if we have the correct header/trailer. */
+
+	if ((sz = (size_t)(eoln - ln)) < 10 || 
+			memcmp(ln, ".\\\" -*-", 7) ||
+			memcmp(eoln - 3, "-*-", 3))
+		return(0);
+
+	/* Move after the header and adjust for the trailer. */
+
+	ln += 7;
+	sz -= 10;
+
+	while (sz > 0) {
+		while (sz > 0 && ' ' == *ln) {
+			ln++;
+			sz--;
+		}
+		if (0 == sz)
+			break;
+
+		/* Find the end-of-phrase marker (or eoln). */
+
+		if (NULL == (eoph = memchr(ln, ';', sz)))
+			eoph = eoln - 3;
+		else
+			eoph++;
+
+		/* Only account for the "coding" phrase. */
+
+		if ((phsz = (size_t)(eoph - ln)) < 7 ||
+				strncasecmp(ln, "coding:", 7)) {
+			sz -= phsz;
+			ln += phsz;
+			continue;
+		} 
+
+		sz -= 7;
+		ln += 7;
+
+		while (sz > 0 && ' ' == *ln) {
+			ln++;
+			sz--;
+		}
+		if (0 == sz)
+			break;
+
+		/* Check us against known encodings. */
+
+		for (i = 0; i < (int)ENC__MAX; i++) {
+			nsz = strlen(encs[i].name);
+			if (phsz < nsz)
+				continue;
+			if (strncasecmp(ln, encs[i].name, nsz))
+				continue;
+
+			*enc = (enum enc)i;
+			return(1);
+		}
+
+		/* Unknown encoding. */
+
+		*enc = ENC__MAX;
+		return(1);
+	}
+
+	return(0);
+}
+
+int
+main(int argc, char *argv[])
+{
+	int	 	 i, ch, map, fd, rc;
+	struct buf	 b;
+	const char	*fn;
+	enum enc	 enc, def;
+	unsigned char 	 bom[3] = { 0xEF, 0xBB, 0xBF };
+	size_t		 offs;
+	extern int	 optind;
+	extern char	*optarg;
+
+	progname = strrchr(argv[0], '/');
+	if (progname == NULL)
+		progname = argv[0];
+	else
+		++progname;
+
+	fn = "<stdin>";
+	fd = STDIN_FILENO;
+	rc = EXIT_FAILURE;
+	enc = def = ENC__MAX;
+	map = 0;
+
+	memset(&b, 0, sizeof(struct buf));
+
+	while (-1 != (ch = getopt(argc, argv, "D:e:rdvh")))
+		switch (ch) {
+		case ('D'):
+			/* FALLTHROUGH */
+		case ('e'):
+			for (i = 0; i < (int)ENC__MAX; i++) {
+				if (strcasecmp(optarg, encs[i].name))
+					continue;
+				break;
+			}
+			if (i < (int)ENC__MAX) {
+				if ('D' == ch)
+					def = (enum enc)i;
+				else
+					enc = (enum enc)i;
+				break;
+			}
+
+			fprintf(stderr, "%s: Bad encoding\n", optarg);
+			return(EXIT_FAILURE);
+		case ('r'):
+			/* FALLTHROUGH */
+		case ('d'):
+			/* FALLTHROUGH */
+		case ('v'):
+			/* Compatibility with GNU preconv. */
+			break;
+		case ('h'):
+			/* Compatibility with GNU preconv. */
+			/* FALLTHROUGH */
+		default:
+			usage();
+			return(EXIT_FAILURE);
+		}
+
+	argc -= optind;
+	argv += optind;
+	
+	/* 
+	 * Open and read the first argument on the command-line.
+	 * If we don't have one, we default to stdin.
+	 */
+
+	if (argc > 0) {
+		fn = *argv;
+		fd = open(fn, O_RDONLY, 0);
+		if (-1 == fd) {
+			perror(fn);
+			return(EXIT_FAILURE);
+		}
+	}
+
+	if ( ! read_whole_file(fn, fd, &b, &map))
+		goto out;
+
+	/* Try to read the UTF-8 BOM. */
+
+	if (ENC__MAX == enc)
+		if (b.sz > 3 && 0 == memcmp(b.buf, bom, 3)) {
+			b.offs = 3;
+			enc = ENC_UTF_8;
+		}
+
+	/* Try reading from the "-*-" cue. */
+
+	if (ENC__MAX == enc) {
+		offs = b.offs;
+		ch = cue_enc(&b, &offs, &enc);
+		if (0 == ch)
+			ch = cue_enc(&b, &offs, &enc);
+	}
+
+	/*
+	 * No encoding has been detected.
+	 * Thus, we either fall into our default encoder, if specified,
+	 * or use Latin-1 if all else fails.
+	 */
+
+	if (ENC__MAX == enc) 
+		enc = ENC__MAX == def ? ENC_LATIN_1 : def;
+
+	if ( ! (*encs[(int)enc].conv)(&b)) {
+		fprintf(stderr, "%s: Bad encoding\n", fn);
+		goto out;
+	}
+
+	rc = EXIT_SUCCESS;
+out:
+	if (map)
+		munmap(b.buf, b.sz);
+	else 
+		free(b.buf);
+
+	if (fd > STDIN_FILENO)
+		close(fd);
+
+	return(rc);
+}
diff --git a/contrib/mdocml/predefs.in b/contrib/mdocml/predefs.in
new file mode 100644
index 0000000000..9c56112715
--- /dev/null
+++ b/contrib/mdocml/predefs.in
@@ -0,0 +1,65 @@
+/*	$Id: predefs.in,v 1.2 2011/05/26 14:30:28 kristaps Exp $ */
+/*
+ * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * The predefined-string translation tables.  Each corresponds to a
+ * predefined strings from (e.g.) tmac/mdoc/doc-nroff.  The left-hand
+ * side corresponds to the input sequence (\*x, \*(xx and so on).  The
+ * right-hand side is what's produced by libroff.
+ *
+ * XXX - C-escape strings!
+ * XXX - update PREDEF_MAX in roff.c if adding more!
+ */
+
+PREDEF("Am", "&")
+PREDEF("Ba", "|")
+PREDEF("Ge", "\\(>=")
+PREDEF("Gt", ">")
+PREDEF("If", "\\(if")
+PREDEF("Le", "\\(<=")
+PREDEF("Lq", "\\(lq")
+PREDEF("Lt", "<")
+PREDEF("Na", "NaN")
+PREDEF("Ne", "\\(!=")
+PREDEF("Pi", "\\(*p")
+PREDEF("Pm", "\\(+-")
+PREDEF("Rq", "\\(rq")
+PREDEF("left-bracket", "[")
+PREDEF("left-parenthesis", "(")
+PREDEF("lp", "(")
+PREDEF("left-singlequote", "\\(oq")
+PREDEF("q", "\\(dq")
+PREDEF("quote-left", "\\(oq")
+PREDEF("quote-right", "\\(cq")
+PREDEF("R", "\\(rg")
+PREDEF("right-bracket", "]")
+PREDEF("right-parenthesis", ")")
+PREDEF("rp", ")")
+PREDEF("right-singlequote", "\\(cq")
+PREDEF("Tm", "\\(tm")
+PREDEF("Px", "POSIX")
+PREDEF("Ai", "ANSI")
+PREDEF("\'", "\\\'")
+PREDEF("aa", "\\(aa")
+PREDEF("ga", "\\(ga")
+PREDEF("`",  "\\`")
+PREDEF("lq", "\\(lq")
+PREDEF("rq", "\\(rq")
+PREDEF("ua", "\\(ua")
+PREDEF("va", "\\(va")
+PREDEF("<=", "\\(<=")
+PREDEF(">=", "\\(>=")
diff --git a/contrib/mdocml/read.c b/contrib/mdocml/read.c
index 6c240c2bd2..da273ae183 100644
--- a/contrib/mdocml/read.c
+++ b/contrib/mdocml/read.c
@@ -1,4 +1,4 @@
-/*	$Id: read.c,v 1.10 2011/04/03 10:11:25 kristaps Exp $ */
+/*	$Id: read.c,v 1.15 2011/05/26 20:36:21 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -15,8 +15,14 @@
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
-#include <sys/stat.h>
-#include <sys/mman.h>
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_MMAP
+# include <sys/stat.h>
+# include <sys/mman.h>
+#endif
 
 #include <assert.h>
 #include <ctype.h>
@@ -138,7 +144,7 @@ static	const char * const	mandocerrs[MANDOCERR_MAX] = {
 	"tab in non-literal context",
 	"end of line whitespace",
 	"bad comment style",
-	"unknown escape sequence",
+	"bad escape sequence",
 	"unterminated quoted string",
 	
 	"generic error",
@@ -350,7 +356,7 @@ mparse_buf_r(struct mparse *curp, struct buf blk, int start)
 				continue;
 			}
 
-			if ('"' == blk.buf[i + 1]) {
+			if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
 				i += 2;
 				/* Comment, skip to end of line */
 				for (; i < (int)blk.sz; ++i) {
@@ -441,7 +447,7 @@ rerun:
 
 		/*
 		 * If input parsers have not been allocated, do so now.
-		 * We keep these instanced betwen parsers, but set them
+		 * We keep these instanced between parsers, but set them
 		 * locally per parse routine since we can use different
 		 * parsers with each one.
 		 */
@@ -525,19 +531,22 @@ pdesc(struct mparse *curp, const char *file, int fd)
 
 	mparse_buf_r(curp, blk, 1);
 
+#ifdef	HAVE_MMAP
 	if (with_mmap)
 		munmap(blk.buf, blk.sz);
 	else
+#endif
 		free(blk.buf);
 }
 
 static int
 read_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap)
 {
-	struct stat	 st;
 	size_t		 off;
 	ssize_t		 ssz;
 
+#ifdef	HAVE_MMAP
+	struct stat	 st;
 	if (-1 == fstat(fd, &st)) {
 		perror(file);
 		return(0);
@@ -562,6 +571,7 @@ read_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap)
 		if (fb->buf != MAP_FAILED)
 			return(1);
 	}
+#endif
 
 	/*
 	 * If this isn't a regular file (like, say, stdin), then we must
diff --git a/contrib/mdocml/roff.7 b/contrib/mdocml/roff.7
index 8f40d96cbe..41837a1d3c 100644
--- a/contrib/mdocml/roff.7
+++ b/contrib/mdocml/roff.7
@@ -1,4 +1,4 @@
-.\"	$Id: roff.7,v 1.27 2011/02/09 10:03:02 kristaps Exp $
+.\"	$Id: roff.7,v 1.29 2011/05/24 15:22:14 kristaps Exp $
 .\"
 .\" Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
 .\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
@@ -15,7 +15,7 @@
 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 .\"
-.Dd $Mdocdate: February 9 2011 $
+.Dd $Mdocdate: May 24 2011 $
 .Dt ROFF 7
 .Os
 .Sh NAME
@@ -448,15 +448,20 @@ than having the request or macro follow as
 The scope of a conditional is always parsed, but only executed if the
 conditional evaluates to true.
 .Pp
-Note that text following an
-.Sq \&.\e}
-escape sequence is discarded.
-Furthermore, if an explicit closing sequence
+Note that the
 .Sq \e}
-is specified in a free-form line, the entire line is accepted within the
-scope of the prior request, not only the text preceding the close, with the
+is converted into a zero-width escape sequence if not passed as a
+standalone macro
+.Sq \&.\e} .
+For example,
+.Pp
+.D1 \&.Fl a \e} b
+.Pp
+will result in
 .Sq \e}
-collapsing into a zero-width space.
+being considered an argument of the
+.Sq \&Fl
+macro.
 .Ss \&ig
 Ignore input.
 Its syntax can be either
@@ -567,7 +572,7 @@ The
 will be read and its contents processed as input in place of the
 .Sq \&.so
 request line.
-To avoid inadvertant inclusion of unrelated files,
+To avoid inadvertent inclusion of unrelated files,
 .Xr mandoc 1
 only accepts relative paths not containing the strings
 .Qq ../
diff --git a/contrib/mdocml/roff.c b/contrib/mdocml/roff.c
index 3aa3972b6d..92a4a9b372 100644
--- a/contrib/mdocml/roff.c
+++ b/contrib/mdocml/roff.c
@@ -1,4 +1,4 @@
-/*	$Id: roff.c,v 1.130 2011/03/29 09:00:48 kristaps Exp $ */
+/*	$Id: roff.c,v 1.142 2011/05/26 11:58:25 kristaps Exp $ */
 /*
  * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -20,17 +20,15 @@
 #endif
 
 #include <assert.h>
-#include <errno.h>
 #include <ctype.h>
-#include <limits.h>
 #include <stdlib.h>
 #include <string.h>
-#include <stdio.h>
 
 #include "mandoc.h"
 #include "libroff.h"
 #include "libmandoc.h"
 
+/* Maximum number of nested if-else conditionals. */
 #define	RSTACK_MAX	128
 
 enum	rofft {
@@ -63,7 +61,7 @@ enum	rofft {
 	ROFF_EQ,
 	ROFF_EN,
 	ROFF_cblock,
-	ROFF_ccond, /* FIXME: remove this. */
+	ROFF_ccond,
 	ROFF_USERDEF,
 	ROFF_MAX
 };
@@ -127,6 +125,14 @@ struct	roffmac {
 	struct roffmac	*next;
 };
 
+struct	predef {
+	const char	*name; /* predefined input name */
+	const char	*str; /* replacement symbol */
+};
+
+#define	PREDEF(__name, __str) \
+	{ (__name), (__str) },
+
 static	enum rofferr	 roff_block(ROFF_ARGS);
 static	enum rofferr	 roff_block_text(ROFF_ARGS);
 static	enum rofferr	 roff_block_sub(ROFF_ARGS);
@@ -144,7 +150,7 @@ static	const char	*roff_getstrn(const struct roff *,
 static	enum rofferr	 roff_line_ignore(ROFF_ARGS);
 static	enum rofferr	 roff_nr(ROFF_ARGS);
 static	int		 roff_res(struct roff *, 
-				char **, size_t *, int);
+				char **, size_t *, int, int);
 static	enum rofferr	 roff_rm(ROFF_ARGS);
 static	void		 roff_setstr(struct roff *,
 				const char *, const char *, int);
@@ -198,6 +204,12 @@ static	struct roffmac	 roffs[ROFF_MAX] = {
 	{ NULL, roff_userdef, NULL, NULL, 0, NULL },
 };
 
+/* Array of injected predefined strings. */
+#define	PREDEFS_MAX	 38
+static	const struct predef predefs[PREDEFS_MAX] = {
+#include "predefs.in"
+};
+
 static	void		 roff_free1(struct roff *);
 static	enum rofft	 roff_hash_find(const char *, size_t);
 static	void		 roff_hash_init(void);
@@ -206,7 +218,6 @@ static	void		 roffnode_push(struct roff *, enum rofft,
 				const char *, int, int);
 static	void		 roffnode_pop(struct roff *);
 static	enum rofft	 roff_parse(struct roff *, const char *, int *);
-static	int		 roff_parse_nat(const char *, unsigned int *);
 
 /* See roff_hash_find() */
 #define	ROFF_HASH(p)	(p[0] - ASCII_LO)
@@ -232,7 +243,6 @@ roff_hash_init(void)
 	}
 }
 
-
 /*
  * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
  * the nil-terminated string name could be found.
@@ -277,10 +287,6 @@ roffnode_pop(struct roff *r)
 	assert(r->last);
 	p = r->last; 
 
-	if (ROFF_el == p->tok)
-		if (r->rstackpos > -1)
-			r->rstackpos--;
-
 	r->last = r->last->parent;
 	free(p->name);
 	free(p->end);
@@ -359,6 +365,7 @@ struct roff *
 roff_alloc(struct regset *regs, struct mparse *parse)
 {
 	struct roff	*r;
+	int		 i;
 
 	r = mandoc_calloc(1, sizeof(struct roff));
 	r->regs = regs;
@@ -366,6 +373,10 @@ roff_alloc(struct regset *regs, struct mparse *parse)
 	r->rstackpos = -1;
 	
 	roff_hash_init();
+
+	for (i = 0; i < PREDEFS_MAX; i++) 
+		roff_setstr(r, predefs[i].name, predefs[i].str, 0);
+
 	return(r);
 }
 
@@ -376,7 +387,7 @@ roff_alloc(struct regset *regs, struct mparse *parse)
  * is processed. 
  */
 static int
-roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
+roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
 {
 	const char	*stesc;	/* start of an escape sequence ('\\') */
 	const char	*stnam;	/* start of the name, after "[(*" */
@@ -443,8 +454,9 @@ roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
 		res = roff_getstrn(r, stnam, (size_t)i);
 
 		if (NULL == res) {
-			cp -= maxl ? 1 : 0;
-			continue;
+			/* TODO: keep track of the correct position. */
+			mandoc_msg(MANDOCERR_BADESCAPE, r->parse, ln, pos, NULL);
+			res = "";
 		}
 
 		/* Replace the escape sequence by the string. */
@@ -480,7 +492,7 @@ roff_parseln(struct roff *r, int ln, char **bufp,
 	 * words to fill in.
 	 */
 
-	if (r->first_string && ! roff_res(r, bufp, szp, pos))
+	if (r->first_string && ! roff_res(r, bufp, szp, ln, pos))
 		return(ROFF_REPARSE);
 
 	ppos = pos;
@@ -597,27 +609,6 @@ roff_parse(struct roff *r, const char *buf, int *pos)
 	return(t);
 }
 
-
-static int
-roff_parse_nat(const char *buf, unsigned int *res)
-{
-	char		*ep;
-	long		 lval;
-
-	errno = 0;
-	lval = strtol(buf, &ep, 10);
-	if (buf[0] == '\0' || *ep != '\0')
-		return(0);
-	if ((errno == ERANGE && 
-			(lval == LONG_MAX || lval == LONG_MIN)) ||
-			(lval > INT_MAX || lval < 0))
-		return(0);
-
-	*res = (unsigned int)lval;
-	return(1);
-}
-
-
 /* ARGSUSED */
 static enum rofferr
 roff_cblock(ROFF_ARGS)
@@ -739,10 +730,10 @@ roff_block(ROFF_ARGS)
 			mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
 			    roffs[tok].name);
 
-		while ((*bufp)[pos] && ' ' != (*bufp)[pos])
+		while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
 			pos++;
 
-		while (' ' == (*bufp)[pos])
+		while (isspace((unsigned char)(*bufp)[pos]))
 			(*bufp)[pos++] = '\0';
 	}
 
@@ -763,9 +754,7 @@ roff_block(ROFF_ARGS)
 	/* If present, process the custom end-of-line marker. */
 
 	sv = pos;
-	while ((*bufp)[pos] &&
-			' ' != (*bufp)[pos] && 
-			'\t' != (*bufp)[pos])
+	while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
 		pos++;
 
 	/*
@@ -835,8 +824,7 @@ roff_block_sub(ROFF_ARGS)
 	 * pulling it out of the hashtable.
 	 */
 
-	if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
-		return(ROFF_IGN);
+	t = roff_parse(r, *bufp, &pos);
 
 	/*
 	 * Macros other than block-end are only significant
@@ -872,21 +860,29 @@ roff_cond_sub(ROFF_ARGS)
 {
 	enum rofft	 t;
 	enum roffrule	 rr;
+	char		*ep;
 
 	rr = r->last->rule;
+	roffnode_cleanscope(r);
 
-	/* 
-	 * Clean out scope.  If we've closed ourselves, then don't
-	 * continue. 
+	/*
+	 * If the macro is unknown, first check if it contains a closing
+	 * delimiter `\}'.  If it does, close out our scope and return
+	 * the currently-scoped rule (ignore or continue).  Else, drop
+	 * into the currently-scoped rule.
 	 */
 
-	roffnode_cleanscope(r);
-
 	if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
-		if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1])
-			return(roff_ccond
-				(r, ROFF_ccond, bufp, szp,
-				 ln, pos, pos + 2, offs));
+		ep = &(*bufp)[pos];
+		for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
+			ep++;
+			if ('}' != *ep)
+				continue;
+			*ep = '&';
+			roff_ccond(r, ROFF_ccond, bufp, szp, 
+					ln, pos, pos + 2, offs);
+			break;
+		}
 		return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
 	}
 
@@ -895,6 +891,7 @@ roff_cond_sub(ROFF_ARGS)
 	 * if they're either structurally required (such as loops and
 	 * conditionals) or a closing macro.
 	 */
+
 	if (ROFFRULE_DENY == rr)
 		if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
 			if (ROFF_ccond != t)
@@ -905,37 +902,28 @@ roff_cond_sub(ROFF_ARGS)
 				ln, ppos, pos, offs));
 }
 
-
 /* ARGSUSED */
 static enum rofferr
 roff_cond_text(ROFF_ARGS)
 {
-	char		*ep, *st;
+	char		*ep;
 	enum roffrule	 rr;
 
 	rr = r->last->rule;
+	roffnode_cleanscope(r);
 
-	/*
-	 * We display the value of the text if out current evaluation
-	 * scope permits us to do so.
-	 */
-
-	/* FIXME: use roff_ccond? */
-
-	st = &(*bufp)[pos];
-	if (NULL == (ep = strstr(st, "\\}"))) {
-		roffnode_cleanscope(r);
-		return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
+	ep = &(*bufp)[pos];
+	for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
+		ep++;
+		if ('}' != *ep)
+			continue;
+		*ep = '&';
+		roff_ccond(r, ROFF_ccond, bufp, szp, 
+				ln, pos, pos + 2, offs);
 	}
-
-	if (ep == st || (ep > st && '\\' != *(ep - 1)))
-		roffnode_pop(r);
-
-	roffnode_cleanscope(r);
 	return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
 }
 
-
 static enum roffrule
 roff_evalcond(const char *v, int *pos)
 {
@@ -978,29 +966,20 @@ roff_cond(ROFF_ARGS)
 	int		 sv;
 	enum roffrule	 rule;
 
-	/* Stack overflow! */
-
-	if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) {
-		mandoc_msg(MANDOCERR_MEM, r->parse, ln, ppos, NULL);
-		return(ROFF_ERR);
-	}
-
-	/* First, evaluate the conditional. */
+	/* 
+	 * An `.el' has no conditional body: it will consume the value
+	 * of the current rstack entry set in prior `ie' calls or
+	 * defaults to DENY.  
+	 *
+	 * If we're not an `el', however, then evaluate the conditional.
+	 */
 
-	if (ROFF_el == tok) {
-		/* 
-		 * An `.el' will get the value of the current rstack
-		 * entry set in prior `ie' calls or defaults to DENY.
-	 	 */
-		if (r->rstackpos < 0)
-			rule = ROFFRULE_DENY;
-		else
-			rule = r->rstack[r->rstackpos];
-	} else
-		rule = roff_evalcond(*bufp, &pos);
+	rule = ROFF_el == tok ?
+		(r->rstackpos < 0 ? 
+		 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
+		roff_evalcond(*bufp, &pos);
 
 	sv = pos;
-
 	while (' ' == (*bufp)[pos])
 		pos++;
 
@@ -1020,16 +999,20 @@ roff_cond(ROFF_ARGS)
 
 	r->last->rule = rule;
 
+	/*
+	 * An if-else will put the NEGATION of the current evaluated
+	 * conditional into the stack of rules.
+	 */
+
 	if (ROFF_ie == tok) {
-		/*
-		 * An if-else will put the NEGATION of the current
-		 * evaluated conditional into the stack.
-		 */
-		r->rstackpos++;
-		if (ROFFRULE_DENY == r->last->rule)
-			r->rstack[r->rstackpos] = ROFFRULE_ALLOW;
-		else
-			r->rstack[r->rstackpos] = ROFFRULE_DENY;
+		if (r->rstackpos == RSTACK_MAX - 1) {
+			mandoc_msg(MANDOCERR_MEM, 
+				r->parse, ln, ppos, NULL);
+			return(ROFF_ERR);
+		}
+		r->rstack[++r->rstackpos] = 
+			ROFFRULE_DENY == r->last->rule ?
+			ROFFRULE_ALLOW : ROFFRULE_DENY;
 	}
 
 	/* If the parent has false as its rule, then so do we. */
@@ -1102,6 +1085,7 @@ roff_nr(ROFF_ARGS)
 {
 	const char	*key;
 	char		*val;
+	int		 iv;
 	struct reg	*rg;
 
 	val = *bufp + pos;
@@ -1110,8 +1094,10 @@ roff_nr(ROFF_ARGS)
 
 	if (0 == strcmp(key, "nS")) {
 		rg[(int)REG_nS].set = 1;
-		if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u))
-			rg[(int)REG_nS].v.u = 0;
+		if ((iv = mandoc_strntou(val, strlen(val), 10)) >= 0)
+			rg[REG_nS].v.u = (unsigned)iv;
+		else
+			rg[(int)REG_nS].v.u = 0u;
 	}
 
 	return(ROFF_IGN);
diff --git a/contrib/mdocml/st.in b/contrib/mdocml/st.in
index 2d7d005e61..888e5e44fb 100644
--- a/contrib/mdocml/st.in
+++ b/contrib/mdocml/st.in
@@ -1,4 +1,4 @@
-/*	$Id: st.in,v 1.15 2010/07/31 23:52:58 schwarze Exp $ */
+/*	$Id: st.in,v 1.16 2011/04/24 17:56:44 schwarze Exp $ */
 /*
  * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -22,6 +22,8 @@
  * the formatted output string.
  *
  * Be sure to escape strings.
+ * The non-breaking blanks prevent ending an output line right before 
+ * a number.  Groff prevent line breaks at the same places.
  *
  * REMEMBER TO ADD NEW STANDARDS TO MDOC.7!
  */
@@ -43,32 +45,32 @@ LINE("-p1003.2a-92",	"IEEE Std 1003.2a-1992 (\\(lqPOSIX.2\\(rq)")
 LINE("-p1387.2-95",	"IEEE Std 1387.2-1995 (\\(lqPOSIX.7.2\\(rq)")
 LINE("-p1003.2",	"IEEE Std 1003.2 (\\(lqPOSIX.2\\(rq)")
 LINE("-p1387.2",	"IEEE Std 1387.2 (\\(lqPOSIX.7.2\\(rq)")
-LINE("-isoC",		"ISO/IEC 9899:1990 (\\(lqISO C90\\(rq)")
-LINE("-isoC-90",	"ISO/IEC 9899:1990 (\\(lqISO C90\\(rq)")
-LINE("-isoC-amd1",	"ISO/IEC 9899/AMD1:1995 (\\(lqISO C90\\(rq)")
-LINE("-isoC-tcor1",	"ISO/IEC 9899/TCOR1:1994 (\\(lqISO C90\\(rq)")
-LINE("-isoC-tcor2",	"ISO/IEC 9899/TCOR2:1995 (\\(lqISO C90\\(rq)")
-LINE("-isoC-99",	"ISO/IEC 9899:1999 (\\(lqISO C99\\(rq)")
+LINE("-isoC",		"ISO/IEC 9899:1990 (\\(lqISO\\~C90\\(rq)")
+LINE("-isoC-90",	"ISO/IEC 9899:1990 (\\(lqISO\\~C90\\(rq)")
+LINE("-isoC-amd1",	"ISO/IEC 9899/AMD1:1995 (\\(lqISO\\~C90, Amendment 1\\(rq)")
+LINE("-isoC-tcor1",	"ISO/IEC 9899/TCOR1:1994 (\\(lqISO\\~C90, Technical Corrigendum 1\\(rq)")
+LINE("-isoC-tcor2",	"ISO/IEC 9899/TCOR2:1995 (\\(lqISO\\~C90, Technical Corrigendum 2\\(rq)")
+LINE("-isoC-99",	"ISO/IEC 9899:1999 (\\(lqISO\\~C99\\(rq)")
 LINE("-iso9945-1-90",	"ISO/IEC 9945-1:1990 (\\(lqPOSIX.1\\(rq)")
 LINE("-iso9945-1-96",	"ISO/IEC 9945-1:1996 (\\(lqPOSIX.1\\(rq)")
 LINE("-iso9945-2-93",	"ISO/IEC 9945-2:1993 (\\(lqPOSIX.2\\(rq)")
-LINE("-ansiC",		"ANSI X3.159-1989 (\\(lqANSI C\\(rq)")
-LINE("-ansiC-89",	"ANSI X3.159-1989 (\\(lqANSI C\\(rq)")
-LINE("-ansiC-99",	"ANSI/ISO/IEC 9899-1999 (\\(lqANSI C99\\(rq)")
+LINE("-ansiC",		"ANSI X3.159-1989 (\\(lqANSI\\~C89\\(rq)")
+LINE("-ansiC-89",	"ANSI X3.159-1989 (\\(lqANSI\\~C89\\(rq)")
+LINE("-ansiC-99",	"ANSI/ISO/IEC 9899-1999 (\\(lqANSI\\~C99\\(rq)")
 LINE("-ieee754",	"IEEE Std 754-1985")
 LINE("-iso8802-3",	"ISO 8802-3: 1989")
 LINE("-ieee1275-94",	"IEEE Std 1275-1994 (\\(lqOpen Firmware\\(rq)")
-LINE("-xpg3",		"X/Open Portability Guide Issue 3 (\\(lqXPG3\\(rq)")
-LINE("-xpg4",		"X/Open Portability Guide Issue 4 (\\(lqXPG4\\(rq)")
-LINE("-xpg4.2",		"X/Open Portability Guide Issue 4.2 (\\(lqXPG4.2\\(rq)")
-LINE("-xpg4.3",		"X/Open Portability Guide Issue 4.3 (\\(lqXPG4.3\\(rq)")
-LINE("-xbd5",		"X/Open System Interface Definitions Issue 5 (\\(lqXBD5\\(rq)")
-LINE("-xcu5",		"X/Open Commands and Utilities Issue 5 (\\(lqXCU5\\(rq)")
-LINE("-xsh5",		"X/Open System Interfaces and Headers Issue 5 (\\(lqXSH5\\(rq)")
-LINE("-xns5",		"X/Open Networking Services Issue 5 (\\(lqXNS5\\(rq)")
-LINE("-xns5.2",		"X/Open Networking Services Issue 5.2 (\\(lqXNS5.2\\(rq)")
-LINE("-xns5.2d2.0",	"X/Open Networking Services Issue 5.2 Draft 2.0 (\\(lqXNS5.2D2.0\\(rq)")
-LINE("-xcurses4.2",	"X/Open Curses Issue 4 Version 2 (\\(lqXCURSES4.2\\(rq)")
-LINE("-susv2",		"Version 2 of the Single UNIX Specification")
-LINE("-susv3",		"Version 3 of the Single UNIX Specification")
-LINE("-svid4",		"System V Interface Definition, Fourth Edition (\\(lqSVID4\\(rq)")
+LINE("-xpg3",		"X/Open Portability Guide Issue\\~3 (\\(lqXPG3\\(rq)")
+LINE("-xpg4",		"X/Open Portability Guide Issue\\~4 (\\(lqXPG4\\(rq)")
+LINE("-xpg4.2",		"X/Open Portability Guide Issue\\~4, Version\\~2 (\\(lqXPG4.2\\(rq)")
+LINE("-xpg4.3",		"X/Open Portability Guide Issue\\~4, Version\\~3 (\\(lqXPG4.3\\(rq)")
+LINE("-xbd5",		"X/Open System Interface Definitions Issue\\~5 (\\(lqXBD5\\(rq)")
+LINE("-xcu5",		"X/Open Commands and Utilities Issue\\~5 (\\(lqXCU5\\(rq)")
+LINE("-xsh5",		"X/Open System Interfaces and Headers Issue\\~5 (\\(lqXSH5\\(rq)")
+LINE("-xns5",		"X/Open Networking Services Issue\\~5 (\\(lqXNS5\\(rq)")
+LINE("-xns5.2",		"X/Open Networking Services Issue\\~5.2 (\\(lqXNS5.2\\(rq)")
+LINE("-xns5.2d2.0",	"X/Open Networking Services Issue\\~5.2 Draft\\~2.0 (\\(lqXNS5.2D2.0\\(rq)")
+LINE("-xcurses4.2",	"X/Open Curses Issue\\~4, Version\\~2 (\\(lqXCURSES4.2\\(rq)")
+LINE("-susv2",		"Version\\~2 of the Single UNIX Specification")
+LINE("-susv3",		"Version\\~3 of the Single UNIX Specification")
+LINE("-svid4",		"System\\~V Interface Definition, Fourth Edition (\\(lqSVID4\\(rq)")
diff --git a/contrib/mdocml/style.css b/contrib/mdocml/style.css
index b51c72191c..22abf5115b 100644
--- a/contrib/mdocml/style.css
+++ b/contrib/mdocml/style.css
@@ -1,54 +1,47 @@
-/* $Id: style.css,v 1.21 2011/02/09 09:52:47 kristaps Exp $ */
-
-html		{ max-width: 800px; }
-body		{ color: #333333;
-		  font-size: 0.93em;
-		  font-family: Times, serif; }
+/* $Id: style.css,v 1.22 2011/05/14 23:40:49 kristaps Exp $ */
+
+/*
+ * This is an example style-sheet provided for mandoc(1) and the -Thtml
+ * or -Txhtml output mode.
+ *
+ * It mimics the appearance of the traditional cvsweb output.
+ *
+ * See mdoc(7) and man(7) for macro explanations.
+ */
+
+html		{ max-width: 880px; }
+body		{ font-size: smaller; font-family: Helvetica,Arial,sans-serif; }
+h1		{ margin-bottom: 1ex; font-size: 110%; margin-left: -4ex; } /* Section header (Sh, SH). */
+h2		{ margin-bottom: 1ex; font-size: 105%; margin-left: -2ex; } /* Sub-section header (Ss, SS). */
+table		{ width: 100%; margin-top: 0ex; margin-bottom: 0ex; } /* All tables. */
+td		{ vertical-align: top; } /* All table cells. */
+p		{ } /* Paragraph: Pp, Lp. */
+blockquote	{ margin-top: 0ex; margin-bottom: 0ex; } /* D1. */
+div.section	{ margin-bottom: 2ex; margin-left: 5ex; } /* Sections (Sh, SH). */
+div.subsection	{ } /* Sub-sections (Ss, SS). */
+table.synopsis	{ } /* SYNOPSIS section table. */
 
 /* Preamble structure. */
 
-table.foot	{ width: 100%; 
-		  font-size: 0.8em;
-		  margin-top: 1em;
-		  border-top: 1px dotted #dddddd; 
-		  color: #999999; } /* Document footer. */
+table.foot	{ font-size: smaller; margin-top: 1em; border-top: 1px dotted #dddddd; } /* Document footer. */
 td.foot-date	{ width: 50%; } /* Document footer: date. */
 td.foot-os	{ width: 50%; text-align: right; } /* Document footer: OS/source. */
-table.head	{ width: 100%; 
-		  font-size: 0.8em;
-		  margin-bottom: 1em;
-		  border-bottom: 1px dotted #dddddd; 
-		  color: #999999; } /* Document header. */
+table.head	{ font-size: smaller; margin-bottom: 1em; border-bottom: 1px dotted #dddddd; } /* Document header. */
 td.head-ltitle	{ width: 10%; } /* Document header: left-title. */
 td.head-vol	{ width: 80%; text-align: center; } /* Document header: volume. */
 td.head-rtitle	{ width: 10%; text-align: right; } /* Document header: right-title. */
 
-/* Sections. */
-
-h1		{ margin-bottom: 0px; color: #000000; font-size: 0.93em; margin-left: -4ex; } /* Section header (Sh, SH). */
-h2		{ margin-bottom: 0px; color: #000000; font-size: 0.93em; margin-left: -2ex; } /* Sub-section header (Ss, SS). */
-div.section	{ margin-bottom: 2ex; margin-left: 4ex; } /* Sections (Sh, SH). */
-div.subsection	{ } /* Sub-sections (Ss, SS). */
-table.synopsis	{ } /* SYNOPSIS section table. */
-
-/* Vertical spacing. */
-
-p		{ } /* Paragraph: Pp, Lp. */
-blockquote	{ margin-top: 0px; margin-bottom: 0px; }
-table		{ margin-top: 0px; margin-bottom: 0px; }
-td 		{ vertical-align: top; } /* SYNOPSIS section table. */
-
 /* General font modes. */
 
+i		{ } /* Italic: BI, IB, I, (implicit). */
 .emph		{ font-style: italic; font-weight: normal; } /* Emphasis: Em, Bl -emphasis. */
+b		{ } /* Bold: SB, BI, IB, BR, RB, B, (implicit). */
 .symb		{ font-style: normal; font-weight: bold; } /* Symbolic: Sy, Ms, Bf -symbolic. */
+small		{ } /* Small: SB, SM. */
 
 /* Block modes. */
 
-.display	{ background-color: #EEEEEE; 
-		  margin: 3px;
-		  padding: 3px;
-		  border: 1px solid #339999; } /* Top of all Bd, D1, Dl. */
+.display	{ } /* Top of all Bd, D1, Dl. */
 .list		{ } /* Top of all Bl. */
 
 /* Context-specific modes. */
@@ -73,7 +66,7 @@ span.lib	{ } /* Library (Lb). */
 i.link-sec	{ font-weight: normal; } /* Section links (Sx). */
 code.lit	{ font-style: normal; font-weight: normal; } /* Literal: Dl, Li, Bf -literal, Bl -literal, Bl -unfilled. */
 b.macro		{ font-style: normal; } /* Macro-ish thing (Fd). */
-b.name		{ color: blue; font-style: normal; } /* Name of utility (Nm). */
+b.name		{ font-style: normal; } /* Name of utility (Nm). */
 span.opt	{ } /* Options (Op, Oo/Oc). */
 span.ref	{ } /* Citations (Rs). */
 span.ref-auth	{ } /* Reference author (%A). */
@@ -94,14 +87,14 @@ span.unix	{ } /* Unices (Ux, Ox, Nx, Fx, Bx, Bsx, Dx). */
 b.utility	{ font-style: normal; } /* Name of utility (Ex). */
 b.var		{ font-style: normal; } /* Variables (Rv). */
 
-a.link-ext	{ background: transparent url(external.png) center right no-repeat; padding-right: 12px; }/* Off-site link (Lk). */
+a.link-ext	{ } /* Off-site link (Lk). */
 a.link-includes	{ } /* Include-file link (In). */
-a.link-mail 	{ background: transparent url(external.png) center right no-repeat; padding-right: 12px; }/* Mailto links (Mt). */
+a.link-mail	{ } /* Mailto links (Mt). */
 a.link-man	{ } /* Manual links (Xr). */
 a.link-ref	{ } /* Reference section links (%Q). */
-a.link-sec	{ text-decoration: none; border-bottom: 1px dotted #339999; } /* Section links (Sx). */
+a.link-sec	{ } /* Section links (Sx). */
 
-/* Formatting for lists. */
+/* Formatting for lists.  See mdoc(7). */
 
 dl.list-diag	{ }
 dt.list-diag	{ }
@@ -117,7 +110,7 @@ dd.list-inset	{ }
 
 dl.list-ohang	{ }
 dt.list-ohang	{ }
-dd.list-ohang	{ margin-left: 0em; }
+dd.list-ohang	{ margin-left: 0ex; }
 
 dl.list-tag	{ }
 dt.list-tag	{ }
diff --git a/contrib/mdocml/tbl.c b/contrib/mdocml/tbl.c
index 9b331e86bf..6ef2f735b3 100644
--- a/contrib/mdocml/tbl.c
+++ b/contrib/mdocml/tbl.c
@@ -1,4 +1,4 @@
-/*	$Id: tbl.c,v 1.24 2011/03/22 09:48:13 kristaps Exp $ */
+/*	$Id: tbl.c,v 1.25 2011/04/04 23:04:38 kristaps Exp $ */
 /*
  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -15,6 +15,10 @@
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/contrib/mdocml/tbl_layout.c b/contrib/mdocml/tbl_layout.c
index 8245003b1d..0aa18dcbeb 100644
--- a/contrib/mdocml/tbl_layout.c
+++ b/contrib/mdocml/tbl_layout.c
@@ -1,4 +1,4 @@
-/*	$Id: tbl_layout.c,v 1.17 2011/03/20 16:02:05 kristaps Exp $ */
+/*	$Id: tbl_layout.c,v 1.20 2011/05/17 13:11:40 kristaps Exp $ */
 /*
  * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -14,6 +14,10 @@
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
 #include <assert.h>
 #include <ctype.h>
 #include <stdlib.h>
@@ -68,6 +72,23 @@ mods(struct tbl_node *tbl, struct tbl_cell *cp,
 	char		 buf[5];
 	int		 i;
 
+	/* Not all types accept modifiers. */
+
+	switch (cp->pos) {
+	case (TBL_CELL_DOWN):
+		/* FALLTHROUGH */
+	case (TBL_CELL_HORIZ):
+		/* FALLTHROUGH */
+	case (TBL_CELL_DHORIZ):
+		/* FALLTHROUGH */
+	case (TBL_CELL_VERT):
+		/* FALLTHROUGH */
+	case (TBL_CELL_DVERT):
+		return(1);
+	default:
+		break;
+	}
+
 mod:
 	/* 
 	 * XXX: since, at least for now, modifiers are non-conflicting
@@ -423,19 +444,19 @@ cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos)
 }
 
 static void
-head_adjust(const struct tbl_cell *cell, struct tbl_head *head)
+head_adjust(const struct tbl_cell *cellp, struct tbl_head *head)
 {
-	if (TBL_CELL_VERT != cell->pos &&
-			TBL_CELL_DVERT != cell->pos) {
+	if (TBL_CELL_VERT != cellp->pos &&
+			TBL_CELL_DVERT != cellp->pos) {
 		head->pos = TBL_HEAD_DATA;
 		return;
 	}
 
-	if (TBL_CELL_VERT == cell->pos)
+	if (TBL_CELL_VERT == cellp->pos)
 		if (TBL_HEAD_DVERT != head->pos)
 			head->pos = TBL_HEAD_VERT;
 
-	if (TBL_CELL_DVERT == cell->pos)
+	if (TBL_CELL_DVERT == cellp->pos)
 		head->pos = TBL_HEAD_DVERT;
 }
 
diff --git a/contrib/mdocml/tbl_opts.c b/contrib/mdocml/tbl_opts.c
index dbdcaa81b7..7b67c13b47 100644
--- a/contrib/mdocml/tbl_opts.c
+++ b/contrib/mdocml/tbl_opts.c
@@ -1,4 +1,4 @@
-/*	$Id: tbl_opts.c,v 1.10 2011/03/20 16:02:05 kristaps Exp $ */
+/*	$Id: tbl_opts.c,v 1.11 2011/04/04 23:04:38 kristaps Exp $ */
 /*
  * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -14,6 +14,10 @@
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
 #include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/contrib/mdocml/term.c b/contrib/mdocml/term.c
index b0ddd1ed5f..70260bec31 100644
--- a/contrib/mdocml/term.c
+++ b/contrib/mdocml/term.c
@@ -1,4 +1,4 @@
-/*	$Id: term.c,v 1.183 2011/04/04 21:14:12 kristaps Exp $ */
+/*	$Id: term.c,v 1.197 2011/05/24 21:31:23 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -33,13 +33,10 @@
 #include "term.h"
 #include "main.h"
 
-static	void		  spec(struct termp *, enum roffdeco,
-				const char *, size_t);
-static	void		  res(struct termp *, const char *, size_t);
-static	void		  bufferc(struct termp *, char);
-static	void		  adjbuf(struct termp *p, size_t);
-static	void		  encode(struct termp *, const char *, size_t);
-
+static	void		 adjbuf(struct termp *p, int);
+static	void		 bufferc(struct termp *, char);
+static	void		 encode(struct termp *, const char *, size_t);
+static	void		 encode1(struct termp *, int);
 
 void
 term_free(struct termp *p)
@@ -48,7 +45,7 @@ term_free(struct termp *p)
 	if (p->buf)
 		free(p->buf);
 	if (p->symtab)
-		chars_free(p->symtab);
+		mchars_free(p->symtab);
 
 	free(p);
 }
@@ -73,18 +70,6 @@ term_end(struct termp *p)
 	(*p->end)(p);
 }
 
-
-struct termp *
-term_alloc(enum termenc enc)
-{
-	struct termp	*p;
-
-	p = mandoc_calloc(1, sizeof(struct termp));
-	p->enc = enc;
-	return(p);
-}
-
-
 /*
  * Flush a line of text.  A "line" is loosely defined as being something
  * that should be followed by a newline, regardless of whether it's
@@ -156,12 +141,12 @@ term_flushln(struct termp *p)
 	vis = vend = 0;
 	i = 0;
 
-	while (i < (int)p->col) {
+	while (i < p->col) {
 		/*
 		 * Handle literal tab characters: collapse all
 		 * subsequent tabs into a single huge set of spaces.
 		 */
-		while (i < (int)p->col && '\t' == p->buf[i]) {
+		while (i < p->col && '\t' == p->buf[i]) {
 			vend = (vis / p->tabwidth + 1) * p->tabwidth;
 			vbl += vend - vis;
 			vis = vend;
@@ -175,7 +160,7 @@ term_flushln(struct termp *p)
 		 * space is printed according to regular spacing rules).
 		 */
 
-		for (j = i, jhy = 0; j < (int)p->col; j++) {
+		for (j = i, jhy = 0; j < p->col; j++) {
 			if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
 				break;
 
@@ -218,7 +203,7 @@ term_flushln(struct termp *p)
 		}
 
 		/* Write out the [remaining] word. */
-		for ( ; i < (int)p->col; i++) {
+		for ( ; i < p->col; i++) {
 			if (vend > bp && jhy > 0 && i > jhy)
 				break;
 			if ('\t' == p->buf[i])
@@ -345,44 +330,6 @@ term_vspace(struct termp *p)
 	(*p->endline)(p);
 }
 
-
-static void
-numbered(struct termp *p, const char *word, size_t len)
-{
-	const char	*rhs;
-
-	rhs = chars_num2char(word, len);
-	if (rhs) 
-		encode(p, rhs, 1);
-}
-
-
-static void
-spec(struct termp *p, enum roffdeco d, const char *word, size_t len)
-{
-	const char	*rhs;
-	size_t		 sz;
-
-	rhs = chars_spec2str(p->symtab, word, len, &sz);
-	if (rhs) 
-		encode(p, rhs, sz);
-	else if (DECO_SSPECIAL == d)
-		encode(p, word, len);
-}
-
-
-static void
-res(struct termp *p, const char *word, size_t len)
-{
-	const char	*rhs;
-	size_t		 sz;
-
-	rhs = chars_res2str(p->symtab, word, len, &sz);
-	if (rhs)
-		encode(p, rhs, sz);
-}
-
-
 void
 term_fontlast(struct termp *p)
 {
@@ -447,7 +394,6 @@ term_fontpop(struct termp *p)
 	p->fonti--;
 }
 
-
 /*
  * Handle pwords, partial words, which may be either a single word or a
  * phrase that cannot be broken down (such as a literal string).  This
@@ -456,9 +402,11 @@ term_fontpop(struct termp *p)
 void
 term_word(struct termp *p, const char *word)
 {
-	const char	*seq;
+	const char	*seq, *cp;
+	char		 c;
+	int		 sz, uc;
 	size_t		 ssz;
-	enum roffdeco	 deco;
+	enum mandoc_esc	 esc;
 
 	if ( ! (TERMP_NOSPACE & p->flags)) {
 		if ( ! (TERMP_KEEP & p->flags)) {
@@ -478,7 +426,7 @@ term_word(struct termp *p, const char *word)
 
 	p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM);
 
-	while (*word) {
+	while ('\0' != *word) {
 		if ((ssz = strcspn(word, "\\")) > 0)
 			encode(p, word, ssz);
 
@@ -486,45 +434,71 @@ term_word(struct termp *p, const char *word)
 		if ('\\' != *word)
 			continue;
 
-		seq = ++word;
-		word += a2roffdeco(&deco, &seq, &ssz);
+		word++;
+		esc = mandoc_escape(&word, &seq, &sz);
+		if (ESCAPE_ERROR == esc)
+			break;
+
+		if (TERMENC_ASCII != p->enc)
+			switch (esc) {
+			case (ESCAPE_UNICODE):
+				uc = mchars_num2uc(seq + 1, sz - 1);
+				if ('\0' == uc)
+					break;
+				encode1(p, uc);
+				continue;
+			case (ESCAPE_SPECIAL):
+				uc = mchars_spec2cp(p->symtab, seq, sz);
+				if (uc <= 0)
+					break;
+				encode1(p, uc);
+				continue;
+			default:
+				break;
+			}
 
-		switch (deco) {
-		case (DECO_NUMBERED):
-			numbered(p, seq, ssz);
+		switch (esc) {
+		case (ESCAPE_UNICODE):
+			encode1(p, '?');
 			break;
-		case (DECO_RESERVED):
-			res(p, seq, ssz);
+		case (ESCAPE_NUMBERED):
+			c = mchars_num2char(seq, sz);
+			if ('\0' != c)
+				encode(p, &c, 1);
 			break;
-		case (DECO_SPECIAL):
-			/* FALLTHROUGH */
-		case (DECO_SSPECIAL):
-			spec(p, deco, seq, ssz);
+		case (ESCAPE_SPECIAL):
+			cp = mchars_spec2str(p->symtab, seq, sz, &ssz);
+			if (NULL != cp) 
+				encode(p, cp, ssz);
+			else if (1 == ssz)
+				encode(p, seq, sz);
 			break;
-		case (DECO_BOLD):
+		case (ESCAPE_FONTBOLD):
 			term_fontrepl(p, TERMFONT_BOLD);
 			break;
-		case (DECO_ITALIC):
+		case (ESCAPE_FONTITALIC):
 			term_fontrepl(p, TERMFONT_UNDER);
 			break;
-		case (DECO_ROMAN):
+		case (ESCAPE_FONT):
+			/* FALLTHROUGH */
+		case (ESCAPE_FONTROMAN):
 			term_fontrepl(p, TERMFONT_NONE);
 			break;
-		case (DECO_PREVIOUS):
+		case (ESCAPE_FONTPREV):
 			term_fontlast(p);
 			break;
+		case (ESCAPE_NOSPACE):
+			if ('\0' == *word)
+				p->flags |= TERMP_NOSPACE;
+			break;
 		default:
 			break;
 		}
-
-		if (DECO_NOSPACE == deco && '\0' == *word)
-			p->flags |= TERMP_NOSPACE;
 	}
 }
 
-
 static void
-adjbuf(struct termp *p, size_t sz)
+adjbuf(struct termp *p, int sz)
 {
 
 	if (0 == p->maxcols)
@@ -532,10 +506,10 @@ adjbuf(struct termp *p, size_t sz)
 	while (sz >= p->maxcols)
 		p->maxcols <<= 2;
 
-	p->buf = mandoc_realloc(p->buf, p->maxcols);
+	p->buf = mandoc_realloc
+		(p->buf, sizeof(int) * (size_t)p->maxcols);
 }
 
-
 static void
 bufferc(struct termp *p, char c)
 {
@@ -543,15 +517,44 @@ bufferc(struct termp *p, char c)
 	if (p->col + 1 >= p->maxcols)
 		adjbuf(p, p->col + 1);
 
-	p->buf[(int)p->col++] = c;
+	p->buf[p->col++] = c;
 }
 
+/*
+ * See encode().
+ * Do this for a single (probably unicode) value.
+ * Does not check for non-decorated glyphs.
+ */
+static void
+encode1(struct termp *p, int c)
+{
+	enum termfont	  f;
+
+	if (p->col + 4 >= p->maxcols)
+		adjbuf(p, p->col + 4);
+
+	f = term_fonttop(p);
+
+	if (TERMFONT_NONE == f) {
+		p->buf[p->col++] = c;
+		return;
+	} else if (TERMFONT_UNDER == f) {
+		p->buf[p->col++] = '_';
+	} else
+		p->buf[p->col++] = c;
+
+	p->buf[p->col++] = 8;
+	p->buf[p->col++] = c;
+}
 
 static void
 encode(struct termp *p, const char *word, size_t sz)
 {
 	enum termfont	  f;
-	int		  i;
+	int		  i, len;
+
+	/* LINTED */
+	len = sz;
 
 	/*
 	 * Encode and buffer a string of characters.  If the current
@@ -560,35 +563,34 @@ encode(struct termp *p, const char *word, size_t sz)
 	 */
 
 	if (TERMFONT_NONE == (f = term_fonttop(p))) {
-		if (p->col + sz >= p->maxcols) 
-			adjbuf(p, p->col + sz);
-		memcpy(&p->buf[(int)p->col], word, sz);
-		p->col += sz;
+		if (p->col + len >= p->maxcols) 
+			adjbuf(p, p->col + len);
+		for (i = 0; i < len; i++)
+			p->buf[p->col++] = word[i];
 		return;
 	}
 
 	/* Pre-buffer, assuming worst-case. */
 
-	if (p->col + 1 + (sz * 3) >= p->maxcols)
-		adjbuf(p, p->col + 1 + (sz * 3));
+	if (p->col + 1 + (len * 3) >= p->maxcols)
+		adjbuf(p, p->col + 1 + (len * 3));
 
-	for (i = 0; i < (int)sz; i++) {
-		if ( ! isgraph((u_char)word[i])) {
-			p->buf[(int)p->col++] = word[i];
+	for (i = 0; i < len; i++) {
+		if ( ! isgraph((unsigned char)word[i])) {
+			p->buf[p->col++] = word[i];
 			continue;
 		}
 
 		if (TERMFONT_UNDER == f)
-			p->buf[(int)p->col++] = '_';
+			p->buf[p->col++] = '_';
 		else
-			p->buf[(int)p->col++] = word[i];
+			p->buf[p->col++] = word[i];
 
-		p->buf[(int)p->col++] = 8;
-		p->buf[(int)p->col++] = word[i];
+		p->buf[p->col++] = 8;
+		p->buf[p->col++] = word[i];
 	}
 }
 
-
 size_t
 term_len(const struct termp *p, size_t sz)
 {
@@ -600,59 +602,99 @@ term_len(const struct termp *p, size_t sz)
 size_t
 term_strlen(const struct termp *p, const char *cp)
 {
-	size_t		 sz, ssz, rsz, i;
-	enum roffdeco	 d;
+	size_t		 sz, rsz, i;
+	int		 ssz, c;
 	const char	*seq, *rhs;
+	enum mandoc_esc	 esc;
+	static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' };
 
-	for (sz = 0; '\0' != *cp; )
-		/*
-		 * Account for escaped sequences within string length
-		 * calculations.  This follows the logic in term_word()
-		 * as we must calculate the width of produced strings.
-		 */
-		if ('\\' == *cp) {
-			seq = ++cp;
-			cp += a2roffdeco(&d, &seq, &ssz);
+	/*
+	 * Account for escaped sequences within string length
+	 * calculations.  This follows the logic in term_word() as we
+	 * must calculate the width of produced strings.
+	 */
 
-			switch (d) {
-			case (DECO_RESERVED):
-				rhs = chars_res2str
-					(p->symtab, seq, ssz, &rsz);
+	sz = 0;
+	while ('\0' != *cp) {
+		rsz = strcspn(cp, rej);
+		for (i = 0; i < rsz; i++)
+			sz += (*p->width)(p, *cp++);
+
+		c = 0;
+		switch (*cp) {
+		case ('\\'):
+			cp++;
+			esc = mandoc_escape(&cp, &seq, &ssz);
+			if (ESCAPE_ERROR == esc)
+				return(sz);
+
+			if (TERMENC_ASCII != p->enc)
+				switch (esc) {
+				case (ESCAPE_UNICODE):
+					c = mchars_num2uc
+						(seq + 1, ssz - 1);
+					if ('\0' == c)
+						break;
+					sz += (*p->width)(p, c);
+					continue;
+				case (ESCAPE_SPECIAL):
+					c = mchars_spec2cp
+						(p->symtab, seq, ssz);
+					if (c <= 0)
+						break;
+					sz += (*p->width)(p, c);
+					continue;
+				default:
+					break;
+				}
+
+			rhs = NULL;
+
+			switch (esc) {
+			case (ESCAPE_UNICODE):
+				sz += (*p->width)(p, '?');
 				break;
-			case (DECO_SPECIAL):
-				/* FALLTHROUGH */
-			case (DECO_SSPECIAL):
-				rhs = chars_spec2str
+			case (ESCAPE_NUMBERED):
+				c = mchars_num2char(seq, ssz);
+				if ('\0' != c)
+					sz += (*p->width)(p, c);
+				break;
+			case (ESCAPE_SPECIAL):
+				rhs = mchars_spec2str
 					(p->symtab, seq, ssz, &rsz);
 
-				/* Allow for one-char escapes. */
-				if (DECO_SSPECIAL != d || rhs)
+				if (ssz != 1 || rhs)
 					break;
 
 				rhs = seq;
 				rsz = ssz;
 				break;
 			default:
-				rhs = NULL;
 				break;
 			}
 
-			if (rhs)
-				for (i = 0; i < rsz; i++)
-					sz += (*p->width)(p, *rhs++);
-		} else if (ASCII_NBRSP == *cp) {
+			if (NULL == rhs)
+				break;
+
+			for (i = 0; i < rsz; i++)
+				sz += (*p->width)(p, *rhs++);
+			break;
+		case (ASCII_NBRSP):
 			sz += (*p->width)(p, ' ');
 			cp++;
-		} else if (ASCII_HYPH == *cp) {
+			break;
+		case (ASCII_HYPH):
 			sz += (*p->width)(p, '-');
 			cp++;
-		} else
-			sz += (*p->width)(p, *cp++);
+			break;
+		default:
+			break;
+		}
+	}
 
 	return(sz);
 }
 
-
 /* ARGSUSED */
 size_t
 term_vspan(const struct termp *p, const struct roffsu *su)
@@ -689,7 +731,6 @@ term_vspan(const struct termp *p, const struct roffsu *su)
 			r);
 }
 
-
 size_t
 term_hspan(const struct termp *p, const struct roffsu *su)
 {
diff --git a/contrib/mdocml/term.h b/contrib/mdocml/term.h
index 79b738cae9..130024de6c 100644
--- a/contrib/mdocml/term.h
+++ b/contrib/mdocml/term.h
@@ -1,4 +1,4 @@
-/*	$Id: term.h,v 1.79 2011/01/05 15:37:23 kristaps Exp $ */
+/*	$Id: term.h,v 1.85 2011/05/20 15:48:22 kristaps Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -22,7 +22,9 @@ __BEGIN_DECLS
 struct	termp;
 
 enum	termenc {
-	TERMENC_ASCII
+	TERMENC_ASCII,
+	TERMENC_LOCALE,
+	TERMENC_UTF8
 };
 
 enum	termtype {
@@ -42,35 +44,6 @@ enum	termfont {
 
 typedef void	(*term_margin)(struct termp *, const void *);
 
-struct	termp_ps {
-	int		  flags;
-#define	PS_INLINE	 (1 << 0)	/* we're in a word */
-#define	PS_MARGINS	 (1 << 1)	/* we're in the margins */
-#define	PS_NEWPAGE	 (1 << 2)	/* new page, no words yet */
-	size_t		  pscol;	/* visible column (AFM units) */
-	size_t		  psrow;	/* visible row (AFM units) */
-	char		 *psmarg;	/* margin buf */
-	size_t		  psmargsz;	/* margin buf size */
-	size_t		  psmargcur;	/* cur index in margin buf */
-	char		  last;		/* character buffer */
-	enum termfont	  lastf;	/* last set font */
-	size_t		  scale;	/* font scaling factor */
-	size_t		  pages;	/* number of pages shown */
-	size_t		  lineheight;	/* line height (AFM units) */
-	size_t		  top;		/* body top (AFM units) */
-	size_t		  bottom;	/* body bottom (AFM units) */
-	size_t		  height;	/* page height (AFM units */
-	size_t		  width;	/* page width (AFM units) */
-	size_t		  left;		/* body left (AFM units) */
-	size_t		  header;	/* header pos (AFM units) */
-	size_t		  footer;	/* footer pos (AFM units) */
-	size_t		  pdfbytes; 	/* current output byte */
-	size_t		  pdflastpg;	/* byte of last page mark */
-	size_t		  pdfbody;	/* start of body object */
-	size_t		 *pdfobjs;	/* table of object offsets */
-	size_t		  pdfobjsz;	/* size of pdfobjs */
-};
-
 struct	termp_tbl {
 	int		  width;	/* width in fixed chars */
 	int		  decimal;	/* decimal point position */
@@ -82,10 +55,10 @@ struct	termp {
 	size_t		  defrmargin;	/* Right margin of the device. */
 	size_t		  rmargin;	/* Current right margin. */
 	size_t		  maxrmargin;	/* Max right margin. */
-	size_t		  maxcols;	/* Max size of buf. */
+	int		  maxcols;	/* Max size of buf. */
 	size_t		  offset;	/* Margin offest. */
 	size_t		  tabwidth;	/* Distance of tab positions. */
-	size_t		  col;		/* Bytes in buf. */
+	int		  col;		/* Bytes in buf. */
 	size_t		  viscol;	/* Chars on current line. */
 	int		  overstep;	/* See termp_flushln(). */
 	int		  flags;
@@ -103,29 +76,26 @@ struct	termp {
 #define	TERMP_ANPREC	 (1 << 13)	/* See termp_an_pre(). */
 #define	TERMP_KEEP	 (1 << 14)	/* Keep words together. */
 #define	TERMP_PREKEEP	 (1 << 15)	/* ...starting with the next one. */
-	char		 *buf;		/* Output buffer. */
+	int		 *buf;		/* Output buffer. */
 	enum termenc	  enc;		/* Type of encoding. */
-	void		 *symtab;	/* Encoded-symbol table. */
+	struct mchars	 *symtab;	/* Encoded-symbol table. */
 	enum termfont	  fontl;	/* Last font set. */
 	enum termfont	  fontq[10];	/* Symmetric fonts. */
 	int		  fonti;	/* Index of font stack. */
 	term_margin	  headf;	/* invoked to print head */
 	term_margin	  footf;	/* invoked to print foot */
-	void		(*letter)(struct termp *, char);
+	void		(*letter)(struct termp *, int);
 	void		(*begin)(struct termp *);
 	void		(*end)(struct termp *);
 	void		(*endline)(struct termp *);
 	void		(*advance)(struct termp *, size_t);
-	size_t		(*width)(const struct termp *, char);
+	size_t		(*width)(const struct termp *, int);
 	double		(*hspan)(const struct termp *,
 				const struct roffsu *);
 	const void	 *argf;		/* arg for headf/footf */
-	union {
-		struct termp_ps ps;
-	} engine;
+	struct termp_ps	 *ps;
 };
 
-struct termp	 *term_alloc(enum termenc);
 void		  term_tbl(struct termp *, const struct tbl_span *);
 void		  term_free(struct termp *);
 void		  term_newln(struct termp *);
diff --git a/contrib/mdocml/term_ascii.c b/contrib/mdocml/term_ascii.c
index 374a2a02e0..e65f590a71 100644
--- a/contrib/mdocml/term_ascii.c
+++ b/contrib/mdocml/term_ascii.c
@@ -1,4 +1,4 @@
-/*	$Id: term_ascii.c,v 1.12 2011/01/25 17:32:04 kristaps Exp $ */
+/*	$Id: term_ascii.c,v 1.17 2011/05/20 15:48:22 kristaps Exp $ */
 /*
  * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -21,47 +21,89 @@
 #include <sys/types.h>
 
 #include <assert.h>
+#ifdef USE_WCHAR
+# include <locale.h>
+#endif
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
+#ifdef USE_WCHAR
+# include <wchar.h>
+#endif
 
 #include "mandoc.h"
 #include "out.h"
 #include "term.h"
 #include "main.h"
 
+/* 
+ * Sadly, this doesn't seem to be defined on systems even when they
+ * support it.  For the time being, remove it and let those compiling
+ * the software decide for themselves what to use.
+ */
+#if 0
+#if ! defined(__STDC_ISO_10646__)
+# undef USE_WCHAR
+#endif
+#endif
+
+static	struct termp	 *ascii_init(enum termenc, char *);
 static	double		  ascii_hspan(const struct termp *,
 				const struct roffsu *);
-static	size_t		  ascii_width(const struct termp *, char);
+static	size_t		  ascii_width(const struct termp *, int);
 static	void		  ascii_advance(struct termp *, size_t);
 static	void		  ascii_begin(struct termp *);
 static	void		  ascii_end(struct termp *);
 static	void		  ascii_endline(struct termp *);
-static	void		  ascii_letter(struct termp *, char);
+static	void		  ascii_letter(struct termp *, int);
 
+#ifdef	USE_WCHAR
+static	void		  locale_advance(struct termp *, size_t);
+static	void		  locale_endline(struct termp *);
+static	void		  locale_letter(struct termp *, int);
+static	size_t		  locale_width(const struct termp *, int);
+#endif
 
-void *
-ascii_alloc(char *outopts)
+static struct termp *
+ascii_init(enum termenc enc, char *outopts)
 {
-	struct termp	*p;
 	const char	*toks[2];
 	char		*v;
+	struct termp	*p;
 
-	p = term_alloc(TERMENC_ASCII);
+	p = mandoc_calloc(1, sizeof(struct termp));
+	p->enc = enc;
 
 	p->tabwidth = 5;
 	p->defrmargin = 78;
 
-	p->advance = ascii_advance;
 	p->begin = ascii_begin;
 	p->end = ascii_end;
-	p->endline = ascii_endline;
 	p->hspan = ascii_hspan;
-	p->letter = ascii_letter;
 	p->type = TERMTYPE_CHAR;
+
+	p->enc = TERMENC_ASCII;
+	p->advance = ascii_advance;
+	p->endline = ascii_endline;
+	p->letter = ascii_letter;
 	p->width = ascii_width;
 
+#ifdef	USE_WCHAR
+	if (TERMENC_ASCII != enc) {
+		v = TERMENC_LOCALE == enc ?
+			setlocale(LC_ALL, "") :
+			setlocale(LC_CTYPE, "UTF-8");
+		if (NULL != v && MB_CUR_MAX > 1) {
+			p->enc = enc;
+			p->advance = locale_advance;
+			p->endline = locale_endline;
+			p->letter = locale_letter;
+			p->width = locale_width;
+		}
+	}
+#endif
+
 	toks[0] = "width";
 	toks[1] = NULL;
 
@@ -81,16 +123,36 @@ ascii_alloc(char *outopts)
 	return(p);
 }
 
+void *
+ascii_alloc(char *outopts)
+{
+
+	return(ascii_init(TERMENC_ASCII, outopts));
+}
+
+void *
+utf8_alloc(char *outopts)
+{
+
+	return(ascii_init(TERMENC_UTF8, outopts));
+}
+
+
+void *
+locale_alloc(char *outopts)
+{
+
+	return(ascii_init(TERMENC_LOCALE, outopts));
+}
 
 /* ARGSUSED */
 static size_t
-ascii_width(const struct termp *p, char c)
+ascii_width(const struct termp *p, int c)
 {
 
 	return(1);
 }
 
-
 void
 ascii_free(void *arg)
 {
@@ -98,17 +160,14 @@ ascii_free(void *arg)
 	term_free((struct termp *)arg);
 }
 
-
 /* ARGSUSED */
 static void
-ascii_letter(struct termp *p, char c)
+ascii_letter(struct termp *p, int c)
 {
 	
-	/* LINTED */
 	putchar(c);
 }
 
-
 static void
 ascii_begin(struct termp *p)
 {
@@ -116,7 +175,6 @@ ascii_begin(struct termp *p)
 	(*p->headf)(p, p->argf);
 }
 
-
 static void
 ascii_end(struct termp *p)
 {
@@ -124,7 +182,6 @@ ascii_end(struct termp *p)
 	(*p->footf)(p, p->argf);
 }
 
-
 /* ARGSUSED */
 static void
 ascii_endline(struct termp *p)
@@ -133,19 +190,16 @@ ascii_endline(struct termp *p)
 	putchar('\n');
 }
 
-
 /* ARGSUSED */
 static void
 ascii_advance(struct termp *p, size_t len)
 {
 	size_t	 	i;
 
-	/* Just print whitespace on the terminal. */
 	for (i = 0; i < len; i++)
 		putchar(' ');
 }
 
-
 /* ARGSUSED */
 static double
 ascii_hspan(const struct termp *p, const struct roffsu *su)
@@ -184,3 +238,39 @@ ascii_hspan(const struct termp *p, const struct roffsu *su)
 	return(r);
 }
 
+#ifdef USE_WCHAR
+/* ARGSUSED */
+static size_t
+locale_width(const struct termp *p, int c)
+{
+	int		rc;
+
+	return((rc = wcwidth(c)) < 0 ? 0 : rc);
+}
+
+/* ARGSUSED */
+static void
+locale_advance(struct termp *p, size_t len)
+{
+	size_t	 	i;
+
+	for (i = 0; i < len; i++)
+		putwchar(L' ');
+}
+
+/* ARGSUSED */
+static void
+locale_endline(struct termp *p)
+{
+
+	putwchar(L'\n');
+}
+
+/* ARGSUSED */
+static void
+locale_letter(struct termp *p, int c)
+{
+	
+	putwchar(c);
+}
+#endif
diff --git a/contrib/mdocml/term_ps.c b/contrib/mdocml/term_ps.c
index 233118b819..44e492a2ea 100644
--- a/contrib/mdocml/term_ps.c
+++ b/contrib/mdocml/term_ps.c
@@ -1,4 +1,4 @@
-/*	$Id: term_ps.c,v 1.48 2011/03/17 08:49:34 kristaps Exp $ */
+/*	$Id: term_ps.c,v 1.51 2011/05/17 14:38:34 kristaps Exp $ */
 /*
  * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -34,13 +34,16 @@
 #include "main.h"
 #include "term.h"
 
+/* These work the buffer used by the header and footer. */
+#define	PS_BUFSLOP	  128
+
 /* Convert PostScript point "x" to an AFM unit. */
 #define	PNT2AFM(p, x) /* LINTED */ \
-	(size_t)((double)(x) * (1000.0 / (double)(p)->engine.ps.scale))
+	(size_t)((double)(x) * (1000.0 / (double)(p)->ps->scale))
 
 /* Convert an AFM unit "x" to a PostScript points */
 #define	AFM2PNT(p, x) /* LINTED */ \
-	((double)(x) / (1000.0 / (double)(p)->engine.ps.scale))
+	((double)(x) / (1000.0 / (double)(p)->ps->scale))
 
 struct	glyph {
 	unsigned short	  wx; /* WX in AFM */
@@ -52,6 +55,54 @@ struct	font {
 	struct glyph	  gly[MAXCHAR]; /* glyph metrics */
 };
 
+struct	termp_ps {
+	int		  flags;
+#define	PS_INLINE	 (1 << 0)	/* we're in a word */
+#define	PS_MARGINS	 (1 << 1)	/* we're in the margins */
+#define	PS_NEWPAGE	 (1 << 2)	/* new page, no words yet */
+	size_t		  pscol;	/* visible column (AFM units) */
+	size_t		  psrow;	/* visible row (AFM units) */
+	char		 *psmarg;	/* margin buf */
+	size_t		  psmargsz;	/* margin buf size */
+	size_t		  psmargcur;	/* cur index in margin buf */
+	char		  last;		/* character buffer */
+	enum termfont	  lastf;	/* last set font */
+	size_t		  scale;	/* font scaling factor */
+	size_t		  pages;	/* number of pages shown */
+	size_t		  lineheight;	/* line height (AFM units) */
+	size_t		  top;		/* body top (AFM units) */
+	size_t		  bottom;	/* body bottom (AFM units) */
+	size_t		  height;	/* page height (AFM units */
+	size_t		  width;	/* page width (AFM units) */
+	size_t		  left;		/* body left (AFM units) */
+	size_t		  header;	/* header pos (AFM units) */
+	size_t		  footer;	/* footer pos (AFM units) */
+	size_t		  pdfbytes; 	/* current output byte */
+	size_t		  pdflastpg;	/* byte of last page mark */
+	size_t		  pdfbody;	/* start of body object */
+	size_t		 *pdfobjs;	/* table of object offsets */
+	size_t		  pdfobjsz;	/* size of pdfobjs */
+};
+
+static	double		  ps_hspan(const struct termp *,
+				const struct roffsu *);
+static	size_t		  ps_width(const struct termp *, int);
+static	void		  ps_advance(struct termp *, size_t);
+static	void		  ps_begin(struct termp *);
+static	void		  ps_closepage(struct termp *);
+static	void		  ps_end(struct termp *);
+static	void		  ps_endline(struct termp *);
+static	void		  ps_fclose(struct termp *);
+static	void		  ps_growbuf(struct termp *, size_t);
+static	void		  ps_letter(struct termp *, int);
+static	void		  ps_pclose(struct termp *);
+static	void		  ps_pletter(struct termp *, int);
+static	void		  ps_printf(struct termp *, const char *, ...);
+static	void		  ps_putchar(struct termp *, char);
+static	void		  ps_setfont(struct termp *, enum termfont);
+static	struct termp	 *pspdf_alloc(char *);
+static	void		  pdf_obj(struct termp *, size_t);
+
 /*
  * We define, for the time being, three fonts: bold, oblique/italic, and
  * normal (roman).  The following table hard-codes the font metrics for
@@ -352,44 +403,6 @@ static	const struct font fonts[TERMFONT__MAX] = {
 	} },
 };
 
-/* These work the buffer used by the header and footer. */
-#define	PS_BUFSLOP	  128
-
-static void
-ps_growbuf(struct termp *p, size_t sz)
-{
-	if (p->engine.ps.psmargcur + sz <= p->engine.ps.psmargsz)
-		return;
-
-	if (sz < PS_BUFSLOP)
-		sz = PS_BUFSLOP;
-
-	p->engine.ps.psmargsz += sz;
-
-	p->engine.ps.psmarg = mandoc_realloc
-		(p->engine.ps.psmarg,
-		 p->engine.ps.psmargsz);
-}
-
-static	double		  ps_hspan(const struct termp *,
-				const struct roffsu *);
-static	size_t		  ps_width(const struct termp *, char);
-static	void		  ps_advance(struct termp *, size_t);
-static	void		  ps_begin(struct termp *);
-static	void		  ps_closepage(struct termp *);
-static	void		  ps_end(struct termp *);
-static	void		  ps_endline(struct termp *);
-static	void		  ps_fclose(struct termp *);
-static	void		  ps_letter(struct termp *, char);
-static	void		  ps_pclose(struct termp *);
-static	void		  ps_pletter(struct termp *, int);
-static	void		  ps_printf(struct termp *, const char *, ...);
-static	void		  ps_putchar(struct termp *, char);
-static	void		  ps_setfont(struct termp *, enum termfont);
-static	struct termp	 *pspdf_alloc(char *);
-static	void		  pdf_obj(struct termp *, size_t);
-
-
 void *
 pdf_alloc(char *outopts)
 {
@@ -401,7 +414,6 @@ pdf_alloc(char *outopts)
 	return(p);
 }
 
-
 void *
 ps_alloc(char *outopts)
 {
@@ -413,7 +425,6 @@ ps_alloc(char *outopts)
 	return(p);
 }
 
-
 static struct termp *
 pspdf_alloc(char *outopts)
 {
@@ -423,7 +434,9 @@ pspdf_alloc(char *outopts)
 	const char	*pp;
 	char		*v;
 
-	p = term_alloc(TERMENC_ASCII);
+	p = mandoc_calloc(1, sizeof(struct termp));
+	p->enc = TERMENC_ASCII;
+	p->ps = mandoc_calloc(1, sizeof(struct termp_ps));
 
 	p->advance = ps_advance;
 	p->begin = ps_begin;
@@ -482,7 +495,7 @@ pspdf_alloc(char *outopts)
 	 * calculations occur.
 	 */
 
-	p->engine.ps.scale = 11;
+	p->ps->scale = 11;
 
 	/* Remember millimetres -> AFM units. */
 
@@ -498,16 +511,16 @@ pspdf_alloc(char *outopts)
 
 	/* Line-height is 1.4em. */
 
-	lineheight = PNT2AFM(p, ((double)p->engine.ps.scale * 1.4));
+	lineheight = PNT2AFM(p, ((double)p->ps->scale * 1.4));
 
-	p->engine.ps.width = pagex;
-	p->engine.ps.height = pagey;
-	p->engine.ps.header = pagey - (marginy / 2) - (lineheight / 2);
-	p->engine.ps.top = pagey - marginy;
-	p->engine.ps.footer = (marginy / 2) - (lineheight / 2);
-	p->engine.ps.bottom = marginy;
-	p->engine.ps.left = marginx;
-	p->engine.ps.lineheight = lineheight;
+	p->ps->width = pagex;
+	p->ps->height = pagey;
+	p->ps->header = pagey - (marginy / 2) - (lineheight / 2);
+	p->ps->top = pagey - marginy;
+	p->ps->footer = (marginy / 2) - (lineheight / 2);
+	p->ps->bottom = marginy;
+	p->ps->left = marginx;
+	p->ps->lineheight = lineheight;
 
 	p->defrmargin = pagex - (marginx * 2);
 	return(p);
@@ -521,11 +534,12 @@ pspdf_free(void *arg)
 
 	p = (struct termp *)arg;
 
-	if (p->engine.ps.psmarg)
-		free(p->engine.ps.psmarg);
-	if (p->engine.ps.pdfobjs)
-		free(p->engine.ps.pdfobjs);
+	if (p->ps->psmarg)
+		free(p->ps->psmarg);
+	if (p->ps->pdfobjs)
+		free(p->ps->pdfobjs);
 
+	free(p->ps);
 	term_free(p);
 }
 
@@ -544,10 +558,10 @@ ps_printf(struct termp *p, const char *fmt, ...)
 	 * into our growable margin buffer.
 	 */
 
-	if ( ! (PS_MARGINS & p->engine.ps.flags)) {
+	if ( ! (PS_MARGINS & p->ps->flags)) {
 		len = vprintf(fmt, ap);
 		va_end(ap);
-		p->engine.ps.pdfbytes += /* LINTED */
+		p->ps->pdfbytes += /* LINTED */
 			len < 0 ? 0 : (size_t)len;
 		return;
 	}
@@ -560,12 +574,12 @@ ps_printf(struct termp *p, const char *fmt, ...)
 
 	ps_growbuf(p, PS_BUFSLOP);
 
-	pos = (int)p->engine.ps.psmargcur;
-	len = vsnprintf(&p->engine.ps.psmarg[pos], PS_BUFSLOP, fmt, ap);
+	pos = (int)p->ps->psmargcur;
+	len = vsnprintf(&p->ps->psmarg[pos], PS_BUFSLOP, fmt, ap);
 
 	va_end(ap);
 
-	p->engine.ps.psmargcur = strlen(p->engine.ps.psmarg);
+	p->ps->psmargcur = strlen(p->ps->psmarg);
 }
 
 
@@ -576,18 +590,18 @@ ps_putchar(struct termp *p, char c)
 
 	/* See ps_printf(). */
 
-	if ( ! (PS_MARGINS & p->engine.ps.flags)) {
+	if ( ! (PS_MARGINS & p->ps->flags)) {
 		/* LINTED */
 		putchar(c);
-		p->engine.ps.pdfbytes++;
+		p->ps->pdfbytes++;
 		return;
 	}
 
 	ps_growbuf(p, 2);
 
-	pos = (int)p->engine.ps.psmargcur++;
-	p->engine.ps.psmarg[pos++] = c;
-	p->engine.ps.psmarg[pos] = '\0';
+	pos = (int)p->ps->psmargcur++;
+	p->ps->psmarg[pos++] = c;
+	p->ps->psmarg[pos] = '\0';
 }
 
 
@@ -597,18 +611,18 @@ pdf_obj(struct termp *p, size_t obj)
 
 	assert(obj > 0);
 
-	if ((obj - 1) >= p->engine.ps.pdfobjsz) {
-		p->engine.ps.pdfobjsz = obj + 128;
-		p->engine.ps.pdfobjs = realloc
-			(p->engine.ps.pdfobjs, 
-			 p->engine.ps.pdfobjsz * sizeof(size_t));
-		if (NULL == p->engine.ps.pdfobjs) {
+	if ((obj - 1) >= p->ps->pdfobjsz) {
+		p->ps->pdfobjsz = obj + 128;
+		p->ps->pdfobjs = realloc
+			(p->ps->pdfobjs, 
+			 p->ps->pdfobjsz * sizeof(size_t));
+		if (NULL == p->ps->pdfobjs) {
 			perror(NULL);
 			exit((int)MANDOCLEVEL_SYSERR);
 		}
 	}
 
-	p->engine.ps.pdfobjs[(int)obj - 1] = p->engine.ps.pdfbytes;
+	p->ps->pdfobjs[(int)obj - 1] = p->ps->pdfbytes;
 	ps_printf(p, "%zu 0 obj\n", obj);
 }
 
@@ -626,14 +640,14 @@ ps_closepage(struct termp *p)
 	 * for the page contents.
 	 */
 
-	assert(p->engine.ps.psmarg && p->engine.ps.psmarg[0]);
-	ps_printf(p, "%s", p->engine.ps.psmarg);
+	assert(p->ps->psmarg && p->ps->psmarg[0]);
+	ps_printf(p, "%s", p->ps->psmarg);
 
 	if (TERMTYPE_PS != p->type) {
 		ps_printf(p, "ET\n");
 
-		len = p->engine.ps.pdfbytes - p->engine.ps.pdflastpg;
-		base = p->engine.ps.pages * 4 + p->engine.ps.pdfbody;
+		len = p->ps->pdfbytes - p->ps->pdflastpg;
+		base = p->ps->pages * 4 + p->ps->pdfbody;
 
 		ps_printf(p, "endstream\nendobj\n");
 
@@ -660,10 +674,10 @@ ps_closepage(struct termp *p)
 	} else
 		ps_printf(p, "showpage\n");
 
-	p->engine.ps.pages++;
-	p->engine.ps.psrow = p->engine.ps.top;
-	assert( ! (PS_NEWPAGE & p->engine.ps.flags));
-	p->engine.ps.flags |= PS_NEWPAGE;
+	p->ps->pages++;
+	p->ps->psrow = p->ps->top;
+	assert( ! (PS_NEWPAGE & p->ps->flags));
+	p->ps->flags |= PS_NEWPAGE;
 }
 
 
@@ -679,15 +693,15 @@ ps_end(struct termp *p)
 	 * well as just one.
 	 */
 
-	if ( ! (PS_NEWPAGE & p->engine.ps.flags)) {
-		assert(0 == p->engine.ps.flags);
-		assert('\0' == p->engine.ps.last);
+	if ( ! (PS_NEWPAGE & p->ps->flags)) {
+		assert(0 == p->ps->flags);
+		assert('\0' == p->ps->last);
 		ps_closepage(p);
 	}
 
 	if (TERMTYPE_PS == p->type) {
 		ps_printf(p, "%%%%Trailer\n");
-		ps_printf(p, "%%%%Pages: %zu\n", p->engine.ps.pages);
+		ps_printf(p, "%%%%Pages: %zu\n", p->ps->pages);
 		ps_printf(p, "%%%%EOF\n");
 		return;
 	} 
@@ -695,18 +709,18 @@ ps_end(struct termp *p)
 	pdf_obj(p, 2);
 	ps_printf(p, "<<\n/Type /Pages\n");
 	ps_printf(p, "/MediaBox [0 0 %zu %zu]\n",
-			(size_t)AFM2PNT(p, p->engine.ps.width),
-			(size_t)AFM2PNT(p, p->engine.ps.height));
+			(size_t)AFM2PNT(p, p->ps->width),
+			(size_t)AFM2PNT(p, p->ps->height));
 
-	ps_printf(p, "/Count %zu\n", p->engine.ps.pages);
+	ps_printf(p, "/Count %zu\n", p->ps->pages);
 	ps_printf(p, "/Kids [");
 
-	for (i = 0; i < p->engine.ps.pages; i++)
+	for (i = 0; i < p->ps->pages; i++)
 		ps_printf(p, " %zu 0 R", i * 4 +
-				p->engine.ps.pdfbody + 3);
+				p->ps->pdfbody + 3);
 
-	base = (p->engine.ps.pages - 1) * 4 + 
-		p->engine.ps.pdfbody + 4;
+	base = (p->ps->pages - 1) * 4 + 
+		p->ps->pdfbody + 4;
 
 	ps_printf(p, "]\n>>\nendobj\n");
 	pdf_obj(p, base);
@@ -714,14 +728,14 @@ ps_end(struct termp *p)
 	ps_printf(p, "/Type /Catalog\n");
 	ps_printf(p, "/Pages 2 0 R\n");
 	ps_printf(p, ">>\n");
-	xref = p->engine.ps.pdfbytes;
+	xref = p->ps->pdfbytes;
 	ps_printf(p, "xref\n");
 	ps_printf(p, "0 %zu\n", base + 1);
 	ps_printf(p, "0000000000 65535 f \n");
 
 	for (i = 0; i < base; i++)
 		ps_printf(p, "%.10zu 00000 n \n", 
-				p->engine.ps.pdfobjs[(int)i]);
+				p->ps->pdfobjs[(int)i]);
 
 	ps_printf(p, "trailer\n");
 	ps_printf(p, "<<\n");
@@ -746,33 +760,33 @@ ps_begin(struct termp *p)
 	 * screen yet, so we don't need to initialise the primary state.
 	 */
 
-	if (p->engine.ps.psmarg) {
-		assert(p->engine.ps.psmargsz);
-		p->engine.ps.psmarg[0] = '\0';
+	if (p->ps->psmarg) {
+		assert(p->ps->psmargsz);
+		p->ps->psmarg[0] = '\0';
 	}
 
-	/*p->engine.ps.pdfbytes = 0;*/
-	p->engine.ps.psmargcur = 0;
-	p->engine.ps.flags = PS_MARGINS;
-	p->engine.ps.pscol = p->engine.ps.left;
-	p->engine.ps.psrow = p->engine.ps.header;
+	/*p->ps->pdfbytes = 0;*/
+	p->ps->psmargcur = 0;
+	p->ps->flags = PS_MARGINS;
+	p->ps->pscol = p->ps->left;
+	p->ps->psrow = p->ps->header;
 
 	ps_setfont(p, TERMFONT_NONE);
 
 	(*p->headf)(p, p->argf);
 	(*p->endline)(p);
 
-	p->engine.ps.pscol = p->engine.ps.left;
-	p->engine.ps.psrow = p->engine.ps.footer;
+	p->ps->pscol = p->ps->left;
+	p->ps->psrow = p->ps->footer;
 
 	(*p->footf)(p, p->argf);
 	(*p->endline)(p);
 
-	p->engine.ps.flags &= ~PS_MARGINS;
+	p->ps->flags &= ~PS_MARGINS;
 
-	assert(0 == p->engine.ps.flags);
-	assert(p->engine.ps.psmarg);
-	assert('\0' != p->engine.ps.psmarg[0]);
+	assert(0 == p->ps->flags);
+	assert(p->ps->psmarg);
+	assert('\0' != p->ps->psmarg[0]);
 
 	/* 
 	 * Print header and initialise page state.  Following this,
@@ -790,8 +804,8 @@ ps_begin(struct termp *p)
 		ps_printf(p, "%%%%PageOrder: Ascend\n");
 		ps_printf(p, "%%%%DocumentMedia: "
 				"Default %zu %zu 0 () ()\n",
-				(size_t)AFM2PNT(p, p->engine.ps.width),
-				(size_t)AFM2PNT(p, p->engine.ps.height));
+				(size_t)AFM2PNT(p, p->ps->width),
+				(size_t)AFM2PNT(p, p->ps->height));
 		ps_printf(p, "%%%%DocumentNeededResources: font");
 
 		for (i = 0; i < (int)TERMFONT__MAX; i++)
@@ -816,10 +830,10 @@ ps_begin(struct termp *p)
 		}
 	}
 
-	p->engine.ps.pdfbody = (size_t)TERMFONT__MAX + 3;
-	p->engine.ps.pscol = p->engine.ps.left;
-	p->engine.ps.psrow = p->engine.ps.top;
-	p->engine.ps.flags |= PS_NEWPAGE;
+	p->ps->pdfbody = (size_t)TERMFONT__MAX + 3;
+	p->ps->pscol = p->ps->left;
+	p->ps->psrow = p->ps->top;
+	p->ps->flags |= PS_NEWPAGE;
 	ps_setfont(p, TERMFONT_NONE);
 }
 
@@ -834,25 +848,25 @@ ps_pletter(struct termp *p, int c)
 	 * in a new page and make sure the font is correctly set.
 	 */
 
-	if (PS_NEWPAGE & p->engine.ps.flags) {
+	if (PS_NEWPAGE & p->ps->flags) {
 		if (TERMTYPE_PS == p->type) {
 			ps_printf(p, "%%%%Page: %zu %zu\n", 
-					p->engine.ps.pages + 1, 
-					p->engine.ps.pages + 1);
+					p->ps->pages + 1, 
+					p->ps->pages + 1);
 			ps_printf(p, "/%s %zu selectfont\n", 
-					fonts[(int)p->engine.ps.lastf].name, 
-					p->engine.ps.scale);
+					fonts[(int)p->ps->lastf].name, 
+					p->ps->scale);
 		} else {
-			pdf_obj(p, p->engine.ps.pdfbody + 
-					p->engine.ps.pages * 4);
+			pdf_obj(p, p->ps->pdfbody + 
+					p->ps->pages * 4);
 			ps_printf(p, "<<\n");
 			ps_printf(p, "/Length %zu 0 R\n", 
-					p->engine.ps.pdfbody + 1 +
-					p->engine.ps.pages * 4);
+					p->ps->pdfbody + 1 +
+					p->ps->pages * 4);
 			ps_printf(p, ">>\nstream\n");
 		}
-		p->engine.ps.pdflastpg = p->engine.ps.pdfbytes;
-		p->engine.ps.flags &= ~PS_NEWPAGE;
+		p->ps->pdflastpg = p->ps->pdfbytes;
+		p->ps->flags &= ~PS_NEWPAGE;
 	}
 	
 	/*
@@ -860,22 +874,22 @@ ps_pletter(struct termp *p, int c)
 	 * now at the current cursor.
 	 */
 
-	if ( ! (PS_INLINE & p->engine.ps.flags)) {
+	if ( ! (PS_INLINE & p->ps->flags)) {
 		if (TERMTYPE_PS != p->type) {
 			ps_printf(p, "BT\n/F%d %zu Tf\n", 
-					(int)p->engine.ps.lastf,
-					p->engine.ps.scale);
+					(int)p->ps->lastf,
+					p->ps->scale);
 			ps_printf(p, "%.3f %.3f Td\n(",
-					AFM2PNT(p, p->engine.ps.pscol),
-					AFM2PNT(p, p->engine.ps.psrow));
+					AFM2PNT(p, p->ps->pscol),
+					AFM2PNT(p, p->ps->psrow));
 		} else
 			ps_printf(p, "%.3f %.3f moveto\n(", 
-					AFM2PNT(p, p->engine.ps.pscol),
-					AFM2PNT(p, p->engine.ps.psrow));
-		p->engine.ps.flags |= PS_INLINE;
+					AFM2PNT(p, p->ps->pscol),
+					AFM2PNT(p, p->ps->psrow));
+		p->ps->flags |= PS_INLINE;
 	}
 
-	assert( ! (PS_NEWPAGE & p->engine.ps.flags));
+	assert( ! (PS_NEWPAGE & p->ps->flags));
 
 	/*
 	 * We need to escape these characters as per the PostScript
@@ -898,17 +912,17 @@ ps_pletter(struct termp *p, int c)
 
 	/* Write the character and adjust where we are on the page. */
 
-	f = (int)p->engine.ps.lastf;
+	f = (int)p->ps->lastf;
 
 	if (c <= 32 || (c - 32 >= MAXCHAR)) {
 		ps_putchar(p, ' ');
-		p->engine.ps.pscol += (size_t)fonts[f].gly[0].wx;
+		p->ps->pscol += (size_t)fonts[f].gly[0].wx;
 		return;
 	} 
 
 	ps_putchar(p, (char)c);
 	c -= 32;
-	p->engine.ps.pscol += (size_t)fonts[f].gly[c].wx;
+	p->ps->pscol += (size_t)fonts[f].gly[c].wx;
 }
 
 
@@ -922,7 +936,7 @@ ps_pclose(struct termp *p)
 	 * or anything).
 	 */
 
-	if ( ! (PS_INLINE & p->engine.ps.flags))
+	if ( ! (PS_INLINE & p->ps->flags))
 		return;
 	
 	if (TERMTYPE_PS != p->type) {
@@ -930,7 +944,7 @@ ps_pclose(struct termp *p)
 	} else
 		ps_printf(p, ") show\n");
 
-	p->engine.ps.flags &= ~PS_INLINE;
+	p->ps->flags &= ~PS_INLINE;
 }
 
 
@@ -946,16 +960,16 @@ ps_fclose(struct termp *p)
 	 * Following this, close out any scope that's open.
 	 */
 
-	if ('\0' != p->engine.ps.last) {
-		if (p->engine.ps.lastf != TERMFONT_NONE) {
+	if ('\0' != p->ps->last) {
+		if (p->ps->lastf != TERMFONT_NONE) {
 			ps_pclose(p);
 			ps_setfont(p, TERMFONT_NONE);
 		}
-		ps_pletter(p, p->engine.ps.last);
-		p->engine.ps.last = '\0';
+		ps_pletter(p, p->ps->last);
+		p->ps->last = '\0';
 	}
 
-	if ( ! (PS_INLINE & p->engine.ps.flags))
+	if ( ! (PS_INLINE & p->ps->flags))
 		return;
 
 	ps_pclose(p);
@@ -963,9 +977,12 @@ ps_fclose(struct termp *p)
 
 
 static void
-ps_letter(struct termp *p, char c)
+ps_letter(struct termp *p, int arg)
 {
-	char		cc;
+	char		cc, c;
+
+	/* LINTED */
+	c = arg >= 128 || arg <= 0 ? '?' : arg;
 
 	/*
 	 * State machine dictates whether to buffer the last character
@@ -976,33 +993,33 @@ ps_letter(struct termp *p, char c)
 	 * regular character and a regular buffer character.
 	 */
 
-	if ('\0' == p->engine.ps.last) {
+	if ('\0' == p->ps->last) {
 		assert(8 != c);
-		p->engine.ps.last = c;
+		p->ps->last = c;
 		return;
-	} else if (8 == p->engine.ps.last) {
+	} else if (8 == p->ps->last) {
 		assert(8 != c);
-		p->engine.ps.last = '\0';
+		p->ps->last = '\0';
 	} else if (8 == c) {
-		assert(8 != p->engine.ps.last);
-		if ('_' == p->engine.ps.last) {
-			if (p->engine.ps.lastf != TERMFONT_UNDER) {
+		assert(8 != p->ps->last);
+		if ('_' == p->ps->last) {
+			if (p->ps->lastf != TERMFONT_UNDER) {
 				ps_pclose(p);
 				ps_setfont(p, TERMFONT_UNDER);
 			}
-		} else if (p->engine.ps.lastf != TERMFONT_BOLD) {
+		} else if (p->ps->lastf != TERMFONT_BOLD) {
 			ps_pclose(p);
 			ps_setfont(p, TERMFONT_BOLD);
 		}
-		p->engine.ps.last = c;
+		p->ps->last = c;
 		return;
 	} else {
-		if (p->engine.ps.lastf != TERMFONT_NONE) {
+		if (p->ps->lastf != TERMFONT_NONE) {
 			ps_pclose(p);
 			ps_setfont(p, TERMFONT_NONE);
 		}
-		cc = p->engine.ps.last;
-		p->engine.ps.last = c;
+		cc = p->ps->last;
+		p->ps->last = c;
 		c = cc;
 	}
 
@@ -1022,7 +1039,7 @@ ps_advance(struct termp *p, size_t len)
 	 */
 
 	ps_fclose(p);
-	p->engine.ps.pscol += len;
+	p->ps->pscol += len;
 }
 
 
@@ -1040,16 +1057,16 @@ ps_endline(struct termp *p)
 	 * lines, we'll do nasty stuff. 
 	 */
 
-	if (PS_MARGINS & p->engine.ps.flags)
+	if (PS_MARGINS & p->ps->flags)
 		return;
 
 	/* Left-justify. */
 
-	p->engine.ps.pscol = p->engine.ps.left;
+	p->ps->pscol = p->ps->left;
 
 	/* If we haven't printed anything, return. */
 
-	if (PS_NEWPAGE & p->engine.ps.flags)
+	if (PS_NEWPAGE & p->ps->flags)
 		return;
 
 	/*
@@ -1057,9 +1074,9 @@ ps_endline(struct termp *p)
 	 * showpage and restart our row.
 	 */
 
-	if (p->engine.ps.psrow >= p->engine.ps.lineheight + 
-			p->engine.ps.bottom) {
-		p->engine.ps.psrow -= p->engine.ps.lineheight;
+	if (p->ps->psrow >= p->ps->lineheight + 
+			p->ps->bottom) {
+		p->ps->psrow -= p->ps->lineheight;
 		return;
 	}
 
@@ -1072,37 +1089,37 @@ ps_setfont(struct termp *p, enum termfont f)
 {
 
 	assert(f < TERMFONT__MAX);
-	p->engine.ps.lastf = f;
+	p->ps->lastf = f;
 	
 	/*
 	 * If we're still at the top of the page, let the font-setting
 	 * be delayed until we actually have stuff to print.
 	 */
 
-	if (PS_NEWPAGE & p->engine.ps.flags)
+	if (PS_NEWPAGE & p->ps->flags)
 		return;
 
 	if (TERMTYPE_PS == p->type)
 		ps_printf(p, "/%s %zu selectfont\n", 
 				fonts[(int)f].name, 
-				p->engine.ps.scale);
+				p->ps->scale);
 	else
 		ps_printf(p, "/F%d %zu Tf\n", 
 				(int)f, 
-				p->engine.ps.scale);
+				p->ps->scale);
 }
 
 
 /* ARGSUSED */
 static size_t
-ps_width(const struct termp *p, char c)
+ps_width(const struct termp *p, int c)
 {
 
 	if (c <= 32 || c - 32 >= MAXCHAR)
 		return((size_t)fonts[(int)TERMFONT_NONE].gly[0].wx);
 
 	c -= 32;
-	return((size_t)fonts[(int)TERMFONT_NONE].gly[(int)c].wx);
+	return((size_t)fonts[(int)TERMFONT_NONE].gly[c].wx);
 }
 
 
@@ -1141,7 +1158,7 @@ ps_hspan(const struct termp *p, const struct roffsu *su)
 			fonts[(int)TERMFONT_NONE].gly[110 - 32].wx;
 		break;
 	case (SCALE_VS):
-		r = su->scale * p->engine.ps.lineheight;
+		r = su->scale * p->ps->lineheight;
 		break;
 	default:
 		r = su->scale;
@@ -1151,3 +1168,18 @@ ps_hspan(const struct termp *p, const struct roffsu *su)
 	return(r);
 }
 
+static void
+ps_growbuf(struct termp *p, size_t sz)
+{
+	if (p->ps->psmargcur + sz <= p->ps->psmargsz)
+		return;
+
+	if (sz < PS_BUFSLOP)
+		sz = PS_BUFSLOP;
+
+	p->ps->psmargsz += sz;
+
+	p->ps->psmarg = mandoc_realloc
+		(p->ps->psmarg, p->ps->psmargsz);
+}
+
diff --git a/contrib/mdocml/test-mmap.c b/contrib/mdocml/test-mmap.c
new file mode 100644
index 0000000000..db8fd8732e
--- /dev/null
+++ b/contrib/mdocml/test-mmap.c
@@ -0,0 +1,10 @@
+#include <sys/types.h>
+#include <sys/mman.h>
+
+int
+main(int argc, char **argv)
+{
+
+	mmap(0, 0, PROT_READ, MAP_FILE|MAP_SHARED, -1, 0);
+	return 0;
+}
-- 
2.41.0