Import mdocml-1.11.1
[dragonfly.git] / contrib / mdocml / html.c
CommitLineData
60e1e752 1/* $Id: html.c,v 1.131 2011/03/22 14:05:45 kristaps Exp $ */
80387638 2/*
60e1e752
SW
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
80387638
SW
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#ifdef HAVE_CONFIG_H
19#include "config.h"
20#endif
21
22#include <sys/types.h>
23
24#include <assert.h>
25#include <ctype.h>
26#include <stdarg.h>
27#include <stdio.h>
28#include <stdint.h>
29#include <stdlib.h>
30#include <string.h>
31#include <unistd.h>
32
33#include "mandoc.h"
34#include "out.h"
80387638
SW
35#include "html.h"
36#include "main.h"
37
38struct htmldata {
39 const char *name;
40 int flags;
41#define HTML_CLRLINE (1 << 0)
42#define HTML_NOSTACK (1 << 1)
43#define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
44};
45
46static const struct htmldata htmltags[TAG_MAX] = {
47 {"html", HTML_CLRLINE}, /* TAG_HTML */
48 {"head", HTML_CLRLINE}, /* TAG_HEAD */
49 {"body", HTML_CLRLINE}, /* TAG_BODY */
50 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
51 {"title", HTML_CLRLINE}, /* TAG_TITLE */
52 {"div", HTML_CLRLINE}, /* TAG_DIV */
53 {"h1", 0}, /* TAG_H1 */
54 {"h2", 0}, /* TAG_H2 */
55 {"span", 0}, /* TAG_SPAN */
56 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */
57 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
58 {"a", 0}, /* TAG_A */
59 {"table", HTML_CLRLINE}, /* TAG_TABLE */
60 {"tbody", HTML_CLRLINE}, /* TAG_TBODY */
61 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
62 {"tr", HTML_CLRLINE}, /* TAG_TR */
63 {"td", HTML_CLRLINE}, /* TAG_TD */
64 {"li", HTML_CLRLINE}, /* TAG_LI */
65 {"ul", HTML_CLRLINE}, /* TAG_UL */
66 {"ol", HTML_CLRLINE}, /* TAG_OL */
67 {"dl", HTML_CLRLINE}, /* TAG_DL */
68 {"dt", HTML_CLRLINE}, /* TAG_DT */
69 {"dd", HTML_CLRLINE}, /* TAG_DD */
70 {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */
71 {"p", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_P */
72 {"pre", HTML_CLRLINE }, /* TAG_PRE */
73 {"b", 0 }, /* TAG_B */
74 {"i", 0 }, /* TAG_I */
75 {"code", 0 }, /* TAG_CODE */
76 {"small", 0 }, /* TAG_SMALL */
77};
78
79static const char *const htmlattrs[ATTR_MAX] = {
80 "http-equiv", /* ATTR_HTTPEQUIV */
81 "content", /* ATTR_CONTENT */
82 "name", /* ATTR_NAME */
83 "rel", /* ATTR_REL */
84 "href", /* ATTR_HREF */
85 "type", /* ATTR_TYPE */
86 "media", /* ATTR_MEDIA */
87 "class", /* ATTR_CLASS */
88 "style", /* ATTR_STYLE */
89 "width", /* ATTR_WIDTH */
90 "id", /* ATTR_ID */
91 "summary", /* ATTR_SUMMARY */
92 "align", /* ATTR_ALIGN */
60e1e752 93 "colspan", /* ATTR_COLSPAN */
80387638
SW
94};
95
60e1e752 96static void print_num(struct html *, const char *, size_t);
80387638
SW
97static void print_spec(struct html *, enum roffdeco,
98 const char *, size_t);
99static void print_res(struct html *, const char *, size_t);
100static void print_ctag(struct html *, enum htmltag);
101static void print_doctype(struct html *);
102static void print_xmltype(struct html *);
103static int print_encode(struct html *, const char *, int);
104static void print_metaf(struct html *, enum roffdeco);
105static void print_attr(struct html *,
106 const char *, const char *);
107static void *ml_alloc(char *, enum htmltype);
108
109
110static void *
111ml_alloc(char *outopts, enum htmltype type)
112{
113 struct html *h;
114 const char *toks[4];
115 char *v;
116
117 toks[0] = "style";
118 toks[1] = "man";
119 toks[2] = "includes";
120 toks[3] = NULL;
121
60e1e752 122 h = mandoc_calloc(1, sizeof(struct html));
80387638
SW
123
124 h->type = type;
125 h->tags.head = NULL;
126 h->symtab = chars_init(CHARS_HTML);
127
128 while (outopts && *outopts)
129 switch (getsubopt(&outopts, UNCONST(toks), &v)) {
130 case (0):
131 h->style = v;
132 break;
133 case (1):
134 h->base_man = v;
135 break;
136 case (2):
137 h->base_includes = v;
138 break;
139 default:
140 break;
141 }
142
143 return(h);
144}
145
146void *
147html_alloc(char *outopts)
148{
149
150 return(ml_alloc(outopts, HTML_HTML_4_01_STRICT));
151}
152
153
154void *
155xhtml_alloc(char *outopts)
156{
157
158 return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT));
159}
160
161
162void
163html_free(void *p)
164{
165 struct tag *tag;
166 struct html *h;
167
168 h = (struct html *)p;
169
170 while ((tag = h->tags.head) != NULL) {
171 h->tags.head = tag->next;
172 free(tag);
173 }
174
175 if (h->symtab)
176 chars_free(h->symtab);
177
178 free(h);
179}
180
181
182void
183print_gen_head(struct html *h)
184{
185 struct htmlpair tag[4];
186
187 tag[0].key = ATTR_HTTPEQUIV;
188 tag[0].val = "Content-Type";
189 tag[1].key = ATTR_CONTENT;
190 tag[1].val = "text/html; charset=utf-8";
191 print_otag(h, TAG_META, 2, tag);
192
193 tag[0].key = ATTR_NAME;
194 tag[0].val = "resource-type";
195 tag[1].key = ATTR_CONTENT;
196 tag[1].val = "document";
197 print_otag(h, TAG_META, 2, tag);
198
199 if (h->style) {
200 tag[0].key = ATTR_REL;
201 tag[0].val = "stylesheet";
202 tag[1].key = ATTR_HREF;
203 tag[1].val = h->style;
204 tag[2].key = ATTR_TYPE;
205 tag[2].val = "text/css";
206 tag[3].key = ATTR_MEDIA;
207 tag[3].val = "all";
208 print_otag(h, TAG_LINK, 4, tag);
209 }
210}
211
60e1e752
SW
212/* ARGSUSED */
213static void
214print_num(struct html *h, const char *p, size_t len)
215{
216 const char *rhs;
217
218 rhs = chars_num2char(p, len);
219 if (rhs)
220 putchar((int)*rhs);
221}
80387638
SW
222
223static void
224print_spec(struct html *h, enum roffdeco d, const char *p, size_t len)
225{
226 int cp;
227 const char *rhs;
228 size_t sz;
229
230 if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) {
231 printf("&#%d;", cp);
232 return;
233 } else if (-1 == cp && DECO_SSPECIAL == d) {
234 fwrite(p, 1, len, stdout);
235 return;
236 } else if (-1 == cp)
237 return;
238
239 if (NULL != (rhs = chars_spec2str(h->symtab, p, len, &sz)))
240 fwrite(rhs, 1, sz, stdout);
241}
242
243
244static void
245print_res(struct html *h, const char *p, size_t len)
246{
247 int cp;
248 const char *rhs;
249 size_t sz;
250
251 if ((cp = chars_res2cp(h->symtab, p, len)) > 0) {
252 printf("&#%d;", cp);
253 return;
254 } else if (-1 == cp)
255 return;
256
257 if (NULL != (rhs = chars_res2str(h->symtab, p, len, &sz)))
258 fwrite(rhs, 1, sz, stdout);
259}
260
261
262static void
263print_metaf(struct html *h, enum roffdeco deco)
264{
265 enum htmlfont font;
266
267 switch (deco) {
268 case (DECO_PREVIOUS):
269 font = h->metal;
270 break;
271 case (DECO_ITALIC):
272 font = HTMLFONT_ITALIC;
273 break;
274 case (DECO_BOLD):
275 font = HTMLFONT_BOLD;
276 break;
277 case (DECO_ROMAN):
278 font = HTMLFONT_NONE;
279 break;
280 default:
281 abort();
282 /* NOTREACHED */
283 }
284
285 if (h->metaf) {
286 print_tagq(h, h->metaf);
287 h->metaf = NULL;
288 }
289
290 h->metal = h->metac;
291 h->metac = font;
292
293 if (HTMLFONT_NONE != font)
294 h->metaf = HTMLFONT_BOLD == font ?
295 print_otag(h, TAG_B, 0, NULL) :
296 print_otag(h, TAG_I, 0, NULL);
297}
298
299
300static int
301print_encode(struct html *h, const char *p, int norecurse)
302{
303 size_t sz;
304 int len, nospace;
305 const char *seq;
306 enum roffdeco deco;
307 static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
308
309 nospace = 0;
310
311 for (; *p; p++) {
312 sz = strcspn(p, rejs);
313
314 fwrite(p, 1, sz, stdout);
315 p += /* LINTED */
316 sz;
317
318 if ('<' == *p) {
319 printf("&lt;");
320 continue;
321 } else if ('>' == *p) {
322 printf("&gt;");
323 continue;
324 } else if ('&' == *p) {
325 printf("&amp;");
326 continue;
327 } else if (ASCII_HYPH == *p) {
328 /*
329 * Note: "soft hyphens" aren't graphically
330 * displayed when not breaking the text; we want
331 * them to be displayed.
332 */
333 /*printf("&#173;");*/
334 putchar('-');
335 continue;
336 } else if ('\0' == *p)
337 break;
338
339 seq = ++p;
340 len = a2roffdeco(&deco, &seq, &sz);
341
342 switch (deco) {
60e1e752
SW
343 case (DECO_NUMBERED):
344 print_num(h, seq, sz);
345 break;
80387638
SW
346 case (DECO_RESERVED):
347 print_res(h, seq, sz);
348 break;
349 case (DECO_SSPECIAL):
350 /* FALLTHROUGH */
351 case (DECO_SPECIAL):
352 print_spec(h, deco, seq, sz);
353 break;
354 case (DECO_PREVIOUS):
355 /* FALLTHROUGH */
356 case (DECO_BOLD):
357 /* FALLTHROUGH */
358 case (DECO_ITALIC):
359 /* FALLTHROUGH */
360 case (DECO_ROMAN):
361 if (norecurse)
362 break;
363 print_metaf(h, deco);
364 break;
365 default:
366 break;
367 }
368
369 p += len - 1;
370
371 if (DECO_NOSPACE == deco && '\0' == *(p + 1))
372 nospace = 1;
373 }
374
375 return(nospace);
376}
377
378
379static void
380print_attr(struct html *h, const char *key, const char *val)
381{
382 printf(" %s=\"", key);
383 (void)print_encode(h, val, 1);
384 putchar('\"');
385}
386
387
388struct tag *
389print_otag(struct html *h, enum htmltag tag,
390 int sz, const struct htmlpair *p)
391{
392 int i;
393 struct tag *t;
394
395 /* Push this tags onto the stack of open scopes. */
396
397 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
60e1e752 398 t = mandoc_malloc(sizeof(struct tag));
80387638
SW
399 t->tag = tag;
400 t->next = h->tags.head;
401 h->tags.head = t;
402 } else
403 t = NULL;
404
405 if ( ! (HTML_NOSPACE & h->flags))
406 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) {
407 /* Manage keeps! */
408 if ( ! (HTML_KEEP & h->flags)) {
409 if (HTML_PREKEEP & h->flags)
410 h->flags |= HTML_KEEP;
411 putchar(' ');
412 } else
413 printf("&#160;");
414 }
415
416 if ( ! (h->flags & HTML_NONOSPACE))
417 h->flags &= ~HTML_NOSPACE;
418 else
419 h->flags |= HTML_NOSPACE;
420
421 /* Print out the tag name and attributes. */
422
423 printf("<%s", htmltags[tag].name);
424 for (i = 0; i < sz; i++)
425 print_attr(h, htmlattrs[p[i].key], p[i].val);
426
427 /* Add non-overridable attributes. */
428
429 if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) {
430 print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml");
431 print_attr(h, "xml:lang", "en");
432 print_attr(h, "lang", "en");
433 }
434
435 /* Accomodate for XML "well-formed" singleton escaping. */
436
437 if (HTML_AUTOCLOSE & htmltags[tag].flags)
438 switch (h->type) {
439 case (HTML_XHTML_1_0_STRICT):
440 putchar('/');
441 break;
442 default:
443 break;
444 }
445
446 putchar('>');
447
448 h->flags |= HTML_NOSPACE;
449
450 if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags)
451 putchar('\n');
452
453 return(t);
454}
455
456
457static void
458print_ctag(struct html *h, enum htmltag tag)
459{
460
461 printf("</%s>", htmltags[tag].name);
462 if (HTML_CLRLINE & htmltags[tag].flags) {
463 h->flags |= HTML_NOSPACE;
464 putchar('\n');
465 }
466}
467
468
469void
470print_gen_decls(struct html *h)
471{
472
473 print_xmltype(h);
474 print_doctype(h);
475}
476
477
478static void
479print_xmltype(struct html *h)
480{
481
482 if (HTML_XHTML_1_0_STRICT == h->type)
483 puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
484}
485
486
487static void
488print_doctype(struct html *h)
489{
490 const char *doctype;
491 const char *dtd;
492 const char *name;
493
494 switch (h->type) {
495 case (HTML_HTML_4_01_STRICT):
496 name = "HTML";
497 doctype = "-//W3C//DTD HTML 4.01//EN";
498 dtd = "http://www.w3.org/TR/html4/strict.dtd";
499 break;
500 default:
501 name = "html";
502 doctype = "-//W3C//DTD XHTML 1.0 Strict//EN";
503 dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
504 break;
505 }
506
507 printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n",
508 name, doctype, dtd);
509}
510
80387638
SW
511void
512print_text(struct html *h, const char *word)
513{
514
80387638
SW
515 if ( ! (HTML_NOSPACE & h->flags)) {
516 /* Manage keeps! */
517 if ( ! (HTML_KEEP & h->flags)) {
518 if (HTML_PREKEEP & h->flags)
519 h->flags |= HTML_KEEP;
520 putchar(' ');
521 } else
522 printf("&#160;");
523 }
524
525 assert(NULL == h->metaf);
526 if (HTMLFONT_NONE != h->metac)
527 h->metaf = HTMLFONT_BOLD == h->metac ?
528 print_otag(h, TAG_B, 0, NULL) :
529 print_otag(h, TAG_I, 0, NULL);
530
531 assert(word);
532 if ( ! print_encode(h, word, 0))
533 if ( ! (h->flags & HTML_NONOSPACE))
534 h->flags &= ~HTML_NOSPACE;
535
536 if (h->metaf) {
537 print_tagq(h, h->metaf);
538 h->metaf = NULL;
539 }
540
541 h->flags &= ~HTML_IGNDELIM;
80387638
SW
542}
543
544
545void
546print_tagq(struct html *h, const struct tag *until)
547{
548 struct tag *tag;
549
550 while ((tag = h->tags.head) != NULL) {
60e1e752
SW
551 /*
552 * Remember to close out and nullify the current
553 * meta-font and table, if applicable.
554 */
80387638
SW
555 if (tag == h->metaf)
556 h->metaf = NULL;
60e1e752
SW
557 if (tag == h->tblt)
558 h->tblt = NULL;
80387638
SW
559 print_ctag(h, tag->tag);
560 h->tags.head = tag->next;
561 free(tag);
562 if (until && tag == until)
563 return;
564 }
565}
566
567
568void
569print_stagq(struct html *h, const struct tag *suntil)
570{
571 struct tag *tag;
572
573 while ((tag = h->tags.head) != NULL) {
574 if (suntil && tag == suntil)
575 return;
60e1e752
SW
576 /*
577 * Remember to close out and nullify the current
578 * meta-font and table, if applicable.
579 */
80387638
SW
580 if (tag == h->metaf)
581 h->metaf = NULL;
60e1e752
SW
582 if (tag == h->tblt)
583 h->tblt = NULL;
80387638
SW
584 print_ctag(h, tag->tag);
585 h->tags.head = tag->next;
586 free(tag);
587 }
588}
589
590
591void
592bufinit(struct html *h)
593{
594
595 h->buf[0] = '\0';
596 h->buflen = 0;
597}
598
599
600void
601bufcat_style(struct html *h, const char *key, const char *val)
602{
603
604 bufcat(h, key);
605 bufncat(h, ":", 1);
606 bufcat(h, val);
607 bufncat(h, ";", 1);
608}
609
610
611void
612bufcat(struct html *h, const char *p)
613{
614
615 bufncat(h, p, strlen(p));
616}
617
618
619void
620buffmt(struct html *h, const char *fmt, ...)
621{
622 va_list ap;
623
624 va_start(ap, fmt);
625 (void)vsnprintf(h->buf + (int)h->buflen,
626 BUFSIZ - h->buflen - 1, fmt, ap);
627 va_end(ap);
628 h->buflen = strlen(h->buf);
629}
630
631
632void
633bufncat(struct html *h, const char *p, size_t sz)
634{
635
636 if (h->buflen + sz > BUFSIZ - 1)
637 sz = BUFSIZ - 1 - h->buflen;
638
639 (void)strncat(h->buf, p, sz);
640 h->buflen += sz;
641}
642
643
644void
645buffmt_includes(struct html *h, const char *name)
646{
647 const char *p, *pp;
648
649 pp = h->base_includes;
650
651 while (NULL != (p = strchr(pp, '%'))) {
652 bufncat(h, pp, (size_t)(p - pp));
653 switch (*(p + 1)) {
654 case('I'):
655 bufcat(h, name);
656 break;
657 default:
658 bufncat(h, p, 2);
659 break;
660 }
661 pp = p + 2;
662 }
663 if (pp)
664 bufcat(h, pp);
665}
666
667
668void
669buffmt_man(struct html *h,
670 const char *name, const char *sec)
671{
672 const char *p, *pp;
673
674 pp = h->base_man;
675
676 /* LINTED */
677 while (NULL != (p = strchr(pp, '%'))) {
678 bufncat(h, pp, (size_t)(p - pp));
679 switch (*(p + 1)) {
680 case('S'):
681 bufcat(h, sec ? sec : "1");
682 break;
683 case('N'):
684 buffmt(h, name);
685 break;
686 default:
687 bufncat(h, p, 2);
688 break;
689 }
690 pp = p + 2;
691 }
692 if (pp)
693 bufcat(h, pp);
694}
695
696
697void
698bufcat_su(struct html *h, const char *p, const struct roffsu *su)
699{
700 double v;
701 const char *u;
702
703 v = su->scale;
704
705 switch (su->unit) {
706 case (SCALE_CM):
707 u = "cm";
708 break;
709 case (SCALE_IN):
710 u = "in";
711 break;
712 case (SCALE_PC):
713 u = "pc";
714 break;
715 case (SCALE_PT):
716 u = "pt";
717 break;
718 case (SCALE_EM):
719 u = "em";
720 break;
721 case (SCALE_MM):
722 if (0 == (v /= 100))
723 v = 1;
724 u = "em";
725 break;
726 case (SCALE_EN):
727 u = "ex";
728 break;
729 case (SCALE_BU):
730 u = "ex";
731 break;
732 case (SCALE_VS):
733 u = "em";
734 break;
735 default:
736 u = "ex";
737 break;
738 }
739
740 /*
741 * XXX: the CSS spec isn't clear as to which types accept
742 * integer or real numbers, so we just make them all decimals.
743 */
744 buffmt(h, "%s: %.2f%s;", p, v, u);
745}
746
747
748void
749html_idcat(char *dst, const char *src, int sz)
750{
751 int ssz;
752
753 assert(sz > 2);
754
755 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
756
757 /* We can't start with a number (bah). */
758
759 if ('#' == *dst) {
760 dst++;
761 sz--;
762 }
763 if ('\0' == *dst) {
764 *dst++ = 'x';
765 *dst = '\0';
766 sz--;
767 }
768
769 for ( ; *dst != '\0' && sz; dst++, sz--)
770 /* Jump to end. */ ;
771
772 for ( ; *src != '\0' && sz > 1; src++) {
773 ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
774 sz -= ssz;
775 dst += ssz;
776 }
777}