1 /* $Id: html.c,v 1.131 2011/03/22 14:05:45 kristaps Exp $ */
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22 #include <sys/types.h>
41 #define HTML_CLRLINE (1 << 0)
42 #define HTML_NOSTACK (1 << 1)
43 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
46 static const struct htmldata htmltags[TAG_MAX] = {
47 {"html", HTML_CLRLINE}, /* TAG_HTML */
48 {"head", HTML_CLRLINE}, /* TAG_HEAD */
49 {"body", HTML_CLRLINE}, /* TAG_BODY */
50 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
51 {"title", HTML_CLRLINE}, /* TAG_TITLE */
52 {"div", HTML_CLRLINE}, /* TAG_DIV */
53 {"h1", 0}, /* TAG_H1 */
54 {"h2", 0}, /* TAG_H2 */
55 {"span", 0}, /* TAG_SPAN */
56 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */
57 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
59 {"table", HTML_CLRLINE}, /* TAG_TABLE */
60 {"tbody", HTML_CLRLINE}, /* TAG_TBODY */
61 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
62 {"tr", HTML_CLRLINE}, /* TAG_TR */
63 {"td", HTML_CLRLINE}, /* TAG_TD */
64 {"li", HTML_CLRLINE}, /* TAG_LI */
65 {"ul", HTML_CLRLINE}, /* TAG_UL */
66 {"ol", HTML_CLRLINE}, /* TAG_OL */
67 {"dl", HTML_CLRLINE}, /* TAG_DL */
68 {"dt", HTML_CLRLINE}, /* TAG_DT */
69 {"dd", HTML_CLRLINE}, /* TAG_DD */
70 {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */
71 {"p", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_P */
72 {"pre", HTML_CLRLINE }, /* TAG_PRE */
73 {"b", 0 }, /* TAG_B */
74 {"i", 0 }, /* TAG_I */
75 {"code", 0 }, /* TAG_CODE */
76 {"small", 0 }, /* TAG_SMALL */
79 static const char *const htmlattrs[ATTR_MAX] = {
80 "http-equiv", /* ATTR_HTTPEQUIV */
81 "content", /* ATTR_CONTENT */
82 "name", /* ATTR_NAME */
84 "href", /* ATTR_HREF */
85 "type", /* ATTR_TYPE */
86 "media", /* ATTR_MEDIA */
87 "class", /* ATTR_CLASS */
88 "style", /* ATTR_STYLE */
89 "width", /* ATTR_WIDTH */
91 "summary", /* ATTR_SUMMARY */
92 "align", /* ATTR_ALIGN */
93 "colspan", /* ATTR_COLSPAN */
96 static void print_num(struct html *, const char *, size_t);
97 static void print_spec(struct html *, enum roffdeco,
98 const char *, size_t);
99 static void print_res(struct html *, const char *, size_t);
100 static void print_ctag(struct html *, enum htmltag);
101 static void print_doctype(struct html *);
102 static void print_xmltype(struct html *);
103 static int print_encode(struct html *, const char *, int);
104 static void print_metaf(struct html *, enum roffdeco);
105 static void print_attr(struct html *,
106 const char *, const char *);
107 static void *ml_alloc(char *, enum htmltype);
111 ml_alloc(char *outopts, enum htmltype type)
119 toks[2] = "includes";
122 h = mandoc_calloc(1, sizeof(struct html));
126 h->symtab = chars_init(CHARS_HTML);
128 while (outopts && *outopts)
129 switch (getsubopt(&outopts, UNCONST(toks), &v)) {
137 h->base_includes = v;
147 html_alloc(char *outopts)
150 return(ml_alloc(outopts, HTML_HTML_4_01_STRICT));
155 xhtml_alloc(char *outopts)
158 return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT));
168 h = (struct html *)p;
170 while ((tag = h->tags.head) != NULL) {
171 h->tags.head = tag->next;
176 chars_free(h->symtab);
183 print_gen_head(struct html *h)
185 struct htmlpair tag[4];
187 tag[0].key = ATTR_HTTPEQUIV;
188 tag[0].val = "Content-Type";
189 tag[1].key = ATTR_CONTENT;
190 tag[1].val = "text/html; charset=utf-8";
191 print_otag(h, TAG_META, 2, tag);
193 tag[0].key = ATTR_NAME;
194 tag[0].val = "resource-type";
195 tag[1].key = ATTR_CONTENT;
196 tag[1].val = "document";
197 print_otag(h, TAG_META, 2, tag);
200 tag[0].key = ATTR_REL;
201 tag[0].val = "stylesheet";
202 tag[1].key = ATTR_HREF;
203 tag[1].val = h->style;
204 tag[2].key = ATTR_TYPE;
205 tag[2].val = "text/css";
206 tag[3].key = ATTR_MEDIA;
208 print_otag(h, TAG_LINK, 4, tag);
214 print_num(struct html *h, const char *p, size_t len)
218 rhs = chars_num2char(p, len);
224 print_spec(struct html *h, enum roffdeco d, const char *p, size_t len)
230 if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) {
233 } else if (-1 == cp && DECO_SSPECIAL == d) {
234 fwrite(p, 1, len, stdout);
239 if (NULL != (rhs = chars_spec2str(h->symtab, p, len, &sz)))
240 fwrite(rhs, 1, sz, stdout);
245 print_res(struct html *h, const char *p, size_t len)
251 if ((cp = chars_res2cp(h->symtab, p, len)) > 0) {
257 if (NULL != (rhs = chars_res2str(h->symtab, p, len, &sz)))
258 fwrite(rhs, 1, sz, stdout);
263 print_metaf(struct html *h, enum roffdeco deco)
268 case (DECO_PREVIOUS):
272 font = HTMLFONT_ITALIC;
275 font = HTMLFONT_BOLD;
278 font = HTMLFONT_NONE;
286 print_tagq(h, h->metaf);
293 if (HTMLFONT_NONE != font)
294 h->metaf = HTMLFONT_BOLD == font ?
295 print_otag(h, TAG_B, 0, NULL) :
296 print_otag(h, TAG_I, 0, NULL);
301 print_encode(struct html *h, const char *p, int norecurse)
307 static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
312 sz = strcspn(p, rejs);
314 fwrite(p, 1, sz, stdout);
321 } else if ('>' == *p) {
324 } else if ('&' == *p) {
327 } else if (ASCII_HYPH == *p) {
329 * Note: "soft hyphens" aren't graphically
330 * displayed when not breaking the text; we want
331 * them to be displayed.
333 /*printf("­");*/
336 } else if ('\0' == *p)
340 len = a2roffdeco(&deco, &seq, &sz);
343 case (DECO_NUMBERED):
344 print_num(h, seq, sz);
346 case (DECO_RESERVED):
347 print_res(h, seq, sz);
349 case (DECO_SSPECIAL):
352 print_spec(h, deco, seq, sz);
354 case (DECO_PREVIOUS):
363 print_metaf(h, deco);
371 if (DECO_NOSPACE == deco && '\0' == *(p + 1))
380 print_attr(struct html *h, const char *key, const char *val)
382 printf(" %s=\"", key);
383 (void)print_encode(h, val, 1);
389 print_otag(struct html *h, enum htmltag tag,
390 int sz, const struct htmlpair *p)
395 /* Push this tags onto the stack of open scopes. */
397 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
398 t = mandoc_malloc(sizeof(struct tag));
400 t->next = h->tags.head;
405 if ( ! (HTML_NOSPACE & h->flags))
406 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) {
408 if ( ! (HTML_KEEP & h->flags)) {
409 if (HTML_PREKEEP & h->flags)
410 h->flags |= HTML_KEEP;
416 if ( ! (h->flags & HTML_NONOSPACE))
417 h->flags &= ~HTML_NOSPACE;
419 h->flags |= HTML_NOSPACE;
421 /* Print out the tag name and attributes. */
423 printf("<%s", htmltags[tag].name);
424 for (i = 0; i < sz; i++)
425 print_attr(h, htmlattrs[p[i].key], p[i].val);
427 /* Add non-overridable attributes. */
429 if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) {
430 print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml");
431 print_attr(h, "xml:lang", "en");
432 print_attr(h, "lang", "en");
435 /* Accomodate for XML "well-formed" singleton escaping. */
437 if (HTML_AUTOCLOSE & htmltags[tag].flags)
439 case (HTML_XHTML_1_0_STRICT):
448 h->flags |= HTML_NOSPACE;
450 if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags)
458 print_ctag(struct html *h, enum htmltag tag)
461 printf("</%s>", htmltags[tag].name);
462 if (HTML_CLRLINE & htmltags[tag].flags) {
463 h->flags |= HTML_NOSPACE;
470 print_gen_decls(struct html *h)
479 print_xmltype(struct html *h)
482 if (HTML_XHTML_1_0_STRICT == h->type)
483 puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
488 print_doctype(struct html *h)
495 case (HTML_HTML_4_01_STRICT):
497 doctype = "-//W3C//DTD HTML 4.01//EN";
498 dtd = "http://www.w3.org/TR/html4/strict.dtd";
502 doctype = "-//W3C//DTD XHTML 1.0 Strict//EN";
503 dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
507 printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n",
512 print_text(struct html *h, const char *word)
515 if ( ! (HTML_NOSPACE & h->flags)) {
517 if ( ! (HTML_KEEP & h->flags)) {
518 if (HTML_PREKEEP & h->flags)
519 h->flags |= HTML_KEEP;
525 assert(NULL == h->metaf);
526 if (HTMLFONT_NONE != h->metac)
527 h->metaf = HTMLFONT_BOLD == h->metac ?
528 print_otag(h, TAG_B, 0, NULL) :
529 print_otag(h, TAG_I, 0, NULL);
532 if ( ! print_encode(h, word, 0))
533 if ( ! (h->flags & HTML_NONOSPACE))
534 h->flags &= ~HTML_NOSPACE;
537 print_tagq(h, h->metaf);
541 h->flags &= ~HTML_IGNDELIM;
546 print_tagq(struct html *h, const struct tag *until)
550 while ((tag = h->tags.head) != NULL) {
552 * Remember to close out and nullify the current
553 * meta-font and table, if applicable.
559 print_ctag(h, tag->tag);
560 h->tags.head = tag->next;
562 if (until && tag == until)
569 print_stagq(struct html *h, const struct tag *suntil)
573 while ((tag = h->tags.head) != NULL) {
574 if (suntil && tag == suntil)
577 * Remember to close out and nullify the current
578 * meta-font and table, if applicable.
584 print_ctag(h, tag->tag);
585 h->tags.head = tag->next;
592 bufinit(struct html *h)
601 bufcat_style(struct html *h, const char *key, const char *val)
612 bufcat(struct html *h, const char *p)
615 bufncat(h, p, strlen(p));
620 buffmt(struct html *h, const char *fmt, ...)
625 (void)vsnprintf(h->buf + (int)h->buflen,
626 BUFSIZ - h->buflen - 1, fmt, ap);
628 h->buflen = strlen(h->buf);
633 bufncat(struct html *h, const char *p, size_t sz)
636 if (h->buflen + sz > BUFSIZ - 1)
637 sz = BUFSIZ - 1 - h->buflen;
639 (void)strncat(h->buf, p, sz);
645 buffmt_includes(struct html *h, const char *name)
649 pp = h->base_includes;
651 while (NULL != (p = strchr(pp, '%'))) {
652 bufncat(h, pp, (size_t)(p - pp));
669 buffmt_man(struct html *h,
670 const char *name, const char *sec)
677 while (NULL != (p = strchr(pp, '%'))) {
678 bufncat(h, pp, (size_t)(p - pp));
681 bufcat(h, sec ? sec : "1");
698 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
741 * XXX: the CSS spec isn't clear as to which types accept
742 * integer or real numbers, so we just make them all decimals.
744 buffmt(h, "%s: %.2f%s;", p, v, u);
749 html_idcat(char *dst, const char *src, int sz)
755 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
757 /* We can't start with a number (bah). */
769 for ( ; *dst != '\0' && sz; dst++, sz--)
772 for ( ; *src != '\0' && sz > 1; src++) {
773 ssz = snprintf(dst, (size_t)sz, "%.2x", *src);