Bring in OpenBSD's mandoc(1) tool for formatting manual pages.
[dragonfly.git] / usr.bin / mandoc / html.c
1 /*      $Id: html.c,v 1.1 2009/10/21 19:13:50 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <sys/types.h>
18 #include <sys/queue.h>
19
20 #include <assert.h>
21 #include <err.h>
22 #include <stdio.h>
23 #include <stdarg.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <unistd.h>
28
29 #include "out.h"
30 #include "chars.h"
31 #include "html.h"
32 #include "main.h"
33
34 #define UNCONST(a)      ((void *)(uintptr_t)(const void *)(a))
35
36 #define DOCTYPE         "-//W3C//DTD HTML 4.01//EN"
37 #define DTD             "http://www.w3.org/TR/html4/strict.dtd"
38
39 struct  htmldata {
40         const char       *name;
41         int               flags;
42 #define HTML_CLRLINE     (1 << 0)
43 #define HTML_NOSTACK     (1 << 1)
44 };
45
46 static  const struct htmldata htmltags[TAG_MAX] = {
47         {"html",        HTML_CLRLINE}, /* TAG_HTML */
48         {"head",        HTML_CLRLINE}, /* TAG_HEAD */
49         {"body",        HTML_CLRLINE}, /* TAG_BODY */
50         {"meta",        HTML_CLRLINE | HTML_NOSTACK}, /* TAG_META */
51         {"title",       HTML_CLRLINE}, /* TAG_TITLE */
52         {"div",         HTML_CLRLINE}, /* TAG_DIV */
53         {"h1",          0}, /* TAG_H1 */
54         {"h2",          0}, /* TAG_H2 */
55         {"p",           HTML_CLRLINE}, /* TAG_P */
56         {"span",        0}, /* TAG_SPAN */
57         {"link",        HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
58         {"br",          HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
59         {"a",           0}, /* TAG_A */
60         {"table",       HTML_CLRLINE}, /* TAG_TABLE */
61         {"col",         HTML_CLRLINE | HTML_NOSTACK}, /* TAG_COL */
62         {"tr",          HTML_CLRLINE}, /* TAG_TR */
63         {"td",          HTML_CLRLINE}, /* TAG_TD */
64         {"li",          HTML_CLRLINE}, /* TAG_LI */
65         {"ul",          HTML_CLRLINE}, /* TAG_UL */
66         {"ol",          HTML_CLRLINE}, /* TAG_OL */
67         {"base",        HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */
68 };
69
70 static  const char       *const htmlattrs[ATTR_MAX] = {
71         "http-equiv",
72         "content",
73         "name",
74         "rel",
75         "href",
76         "type",
77         "media",
78         "class",
79         "style",
80         "width",
81         "valign",
82         "target",
83         "id",
84 };
85
86 void *
87 html_alloc(char *outopts)
88 {
89         struct html     *h;
90         const char      *toks[4];
91         char            *v;
92
93         toks[0] = "style";
94         toks[1] = "man";
95         toks[2] = "includes";
96         toks[3] = NULL;
97
98         if (NULL == (h = calloc(1, sizeof(struct html))))
99                 return(NULL);
100
101         SLIST_INIT(&h->tags);
102         SLIST_INIT(&h->ords);
103
104         if (NULL == (h->symtab = chars_init(CHARS_HTML))) {
105                 free(h);
106                 return(NULL);
107         }
108
109         while (outopts && *outopts)
110                 switch (getsubopt(&outopts, UNCONST(toks), &v)) {
111                 case (0):
112                         h->style = v;
113                         break;
114                 case (1):
115                         h->base_man = v;
116                         break;
117                 case (2):
118                         h->base_includes = v;
119                         break;
120                 default:
121                         break;
122                 }
123
124         return(h);
125 }
126
127
128 void
129 html_free(void *p)
130 {
131         struct tag      *tag;
132         struct ord      *ord;
133         struct html     *h;
134
135         h = (struct html *)p;
136
137         while ( ! SLIST_EMPTY(&h->ords)) {
138                 ord = SLIST_FIRST(&h->ords);
139                 SLIST_REMOVE_HEAD(&h->ords, entry);
140                 free(ord);
141         }
142
143         while ( ! SLIST_EMPTY(&h->tags)) {
144                 tag = SLIST_FIRST(&h->tags);
145                 SLIST_REMOVE_HEAD(&h->tags, entry);
146                 free(tag);
147         }
148
149         if (h->symtab)
150                 chars_free(h->symtab);
151
152         free(h);
153 }
154
155
156 void
157 print_gen_head(struct html *h)
158 {
159         struct htmlpair  tag[4];
160
161         tag[0].key = ATTR_HTTPEQUIV;
162         tag[0].val = "Content-Type";
163         tag[1].key = ATTR_CONTENT;
164         tag[1].val = "text/html; charset=utf-8";
165         print_otag(h, TAG_META, 2, tag);
166
167         tag[0].key = ATTR_NAME;
168         tag[0].val = "resource-type";
169         tag[1].key = ATTR_CONTENT;
170         tag[1].val = "document";
171         print_otag(h, TAG_META, 2, tag);
172
173         if (h->style) {
174                 tag[0].key = ATTR_REL;
175                 tag[0].val = "stylesheet";
176                 tag[1].key = ATTR_HREF;
177                 tag[1].val = h->style;
178                 tag[2].key = ATTR_TYPE;
179                 tag[2].val = "text/css";
180                 tag[3].key = ATTR_MEDIA;
181                 tag[3].val = "all";
182                 print_otag(h, TAG_LINK, 4, tag);
183         }
184 }
185
186
187 static void
188 print_spec(struct html *h, const char *p, int len)
189 {
190         const char      *rhs;
191         int              i;
192         size_t           sz;
193
194         rhs = chars_a2ascii(h->symtab, p, (size_t)len, &sz);
195
196         if (NULL == rhs)
197                 return;
198         for (i = 0; i < (int)sz; i++)
199                 putchar(rhs[i]);
200 }
201
202
203 static void
204 print_res(struct html *h, const char *p, int len)
205 {
206         const char      *rhs;
207         int              i;
208         size_t           sz;
209
210         rhs = chars_a2res(h->symtab, p, (size_t)len, &sz);
211
212         if (NULL == rhs)
213                 return;
214         for (i = 0; i < (int)sz; i++)
215                 putchar(rhs[i]);
216 }
217
218
219 static void
220 print_escape(struct html *h, const char **p)
221 {
222         int              j, type;
223         const char      *wp;
224
225         wp = *p;
226         type = 1;
227
228         if (0 == *(++wp)) {
229                 *p = wp;
230                 return;
231         }
232
233         if ('(' == *wp) {
234                 wp++;
235                 if (0 == *wp || 0 == *(wp + 1)) {
236                         *p = 0 == *wp ? wp : wp + 1;
237                         return;
238                 }
239
240                 print_spec(h, wp, 2);
241                 *p = ++wp;
242                 return;
243
244         } else if ('*' == *wp) {
245                 if (0 == *(++wp)) {
246                         *p = wp;
247                         return;
248                 }
249
250                 switch (*wp) {
251                 case ('('):
252                         wp++;
253                         if (0 == *wp || 0 == *(wp + 1)) {
254                                 *p = 0 == *wp ? wp : wp + 1;
255                                 return;
256                         }
257
258                         print_res(h, wp, 2);
259                         *p = ++wp;
260                         return;
261                 case ('['):
262                         type = 0;
263                         break;
264                 default:
265                         print_res(h, wp, 1);
266                         *p = wp;
267                         return;
268                 }
269
270         } else if ('f' == *wp) {
271                 if (0 == *(++wp)) {
272                         *p = wp;
273                         return;
274                 }
275
276                 switch (*wp) {
277                 case ('B'):
278                         /* TODO */
279                         break;
280                 case ('I'):
281                         /* TODO */
282                         break;
283                 case ('P'):
284                         /* FALLTHROUGH */
285                 case ('R'):
286                         /* TODO */
287                         break;
288                 default:
289                         break;
290                 }
291
292                 *p = wp;
293                 return;
294
295         } else if ('[' != *wp) {
296                 print_spec(h, wp, 1);
297                 *p = wp;
298                 return;
299         }
300
301         wp++;
302         for (j = 0; *wp && ']' != *wp; wp++, j++)
303                 /* Loop... */ ;
304
305         if (0 == *wp) {
306                 *p = wp;
307                 return;
308         }
309
310         if (type)
311                 print_spec(h, wp - j, j);
312         else
313                 print_res(h, wp - j, j);
314
315         *p = wp;
316 }
317
318
319 static void
320 print_encode(struct html *h, const char *p)
321 {
322
323         for (; *p; p++) {
324                 if ('\\' == *p) {
325                         print_escape(h, &p);
326                         continue;
327                 }
328                 switch (*p) {
329                 case ('<'):
330                         printf("&lt;");
331                         break;
332                 case ('>'):
333                         printf("&gt;");
334                         break;
335                 case ('&'):
336                         printf("&amp;");
337                         break;
338                 default:
339                         putchar(*p);
340                         break;
341                 }
342         }
343 }
344
345
346 struct tag *
347 print_otag(struct html *h, enum htmltag tag,
348                 int sz, const struct htmlpair *p)
349 {
350         int              i;
351         struct tag      *t;
352
353         if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
354                 if (NULL == (t = malloc(sizeof(struct tag))))
355                         err(EXIT_FAILURE, "malloc");
356                 t->tag = tag;
357                 SLIST_INSERT_HEAD(&h->tags, t, entry);
358         } else
359                 t = NULL;
360
361         if ( ! (HTML_NOSPACE & h->flags))
362                 if ( ! (HTML_CLRLINE & htmltags[tag].flags))
363                         printf(" ");
364
365         printf("<%s", htmltags[tag].name);
366         for (i = 0; i < sz; i++) {
367                 printf(" %s=\"", htmlattrs[p[i].key]);
368                 assert(p->val);
369                 print_encode(h, p[i].val);
370                 printf("\"");
371         }
372         printf(">");
373
374         h->flags |= HTML_NOSPACE;
375         if (HTML_CLRLINE & htmltags[tag].flags)
376                 h->flags |= HTML_NEWLINE;
377         else
378                 h->flags &= ~HTML_NEWLINE;
379
380         return(t);
381 }
382
383
384 /* ARGSUSED */
385 static void
386 print_ctag(struct html *h, enum htmltag tag)
387 {
388
389         printf("</%s>", htmltags[tag].name);
390         if (HTML_CLRLINE & htmltags[tag].flags)
391                 h->flags |= HTML_NOSPACE;
392         if (HTML_CLRLINE & htmltags[tag].flags)
393                 h->flags |= HTML_NEWLINE;
394         else
395                 h->flags &= ~HTML_NEWLINE;
396 }
397
398
399 /* ARGSUSED */
400 void
401 print_gen_doctype(struct html *h)
402 {
403
404         printf("<!DOCTYPE HTML PUBLIC \"%s\" \"%s\">", DOCTYPE, DTD);
405 }
406
407
408 void
409 print_text(struct html *h, const char *p)
410 {
411
412         if (*p && 0 == *(p + 1))
413                 switch (*p) {
414                 case('.'):
415                         /* FALLTHROUGH */
416                 case(','):
417                         /* FALLTHROUGH */
418                 case(';'):
419                         /* FALLTHROUGH */
420                 case(':'):
421                         /* FALLTHROUGH */
422                 case('?'):
423                         /* FALLTHROUGH */
424                 case('!'):
425                         /* FALLTHROUGH */
426                 case(')'):
427                         /* FALLTHROUGH */
428                 case(']'):
429                         /* FALLTHROUGH */
430                 case('}'):
431                         if ( ! (HTML_IGNDELIM & h->flags))
432                                 h->flags |= HTML_NOSPACE;
433                         break;
434                 default:
435                         break;
436                 }
437
438         if ( ! (h->flags & HTML_NOSPACE))
439                 printf(" ");
440
441         h->flags &= ~HTML_NOSPACE;
442         h->flags &= ~HTML_NEWLINE;
443
444         if (p)
445                 print_encode(h, p);
446
447         if (*p && 0 == *(p + 1))
448                 switch (*p) {
449                 case('('):
450                         /* FALLTHROUGH */
451                 case('['):
452                         /* FALLTHROUGH */
453                 case('{'):
454                         h->flags |= HTML_NOSPACE;
455                         break;
456                 default:
457                         break;
458                 }
459 }
460
461
462 void
463 print_tagq(struct html *h, const struct tag *until)
464 {
465         struct tag      *tag;
466
467         while ( ! SLIST_EMPTY(&h->tags)) {
468                 tag = SLIST_FIRST(&h->tags);
469                 print_ctag(h, tag->tag);
470                 SLIST_REMOVE_HEAD(&h->tags, entry);
471                 free(tag);
472                 if (until && tag == until)
473                         return;
474         }
475 }
476
477
478 void
479 print_stagq(struct html *h, const struct tag *suntil)
480 {
481         struct tag      *tag;
482
483         while ( ! SLIST_EMPTY(&h->tags)) {
484                 tag = SLIST_FIRST(&h->tags);
485                 if (suntil && tag == suntil)
486                         return;
487                 print_ctag(h, tag->tag);
488                 SLIST_REMOVE_HEAD(&h->tags, entry);
489                 free(tag);
490         }
491 }
492
493
494 void
495 bufinit(struct html *h)
496 {
497
498         h->buf[0] = '\0';
499         h->buflen = 0;
500 }
501
502
503 void
504 bufcat_style(struct html *h, const char *key, const char *val)
505 {
506
507         bufcat(h, key);
508         bufncat(h, ":", 1);
509         bufcat(h, val);
510         bufncat(h, ";", 1);
511 }
512
513
514 void
515 bufcat(struct html *h, const char *p)
516 {
517
518         bufncat(h, p, strlen(p));
519 }
520
521
522 void
523 buffmt(struct html *h, const char *fmt, ...)
524 {
525         va_list          ap;
526
527         va_start(ap, fmt);
528         (void)vsnprintf(h->buf + (int)h->buflen,
529                         BUFSIZ - h->buflen - 1, fmt, ap);
530         va_end(ap);
531         h->buflen = strlen(h->buf);
532 }
533
534
535 void
536 bufncat(struct html *h, const char *p, size_t sz)
537 {
538
539         if (h->buflen + sz > BUFSIZ - 1)
540                 sz = BUFSIZ - 1 - h->buflen;
541
542         (void)strncat(h->buf, p, sz);
543         h->buflen += sz;
544 }
545
546
547 void
548 buffmt_includes(struct html *h, const char *name)
549 {
550         const char      *p, *pp;
551
552         pp = h->base_includes;
553
554         while (NULL != (p = strchr(pp, '%'))) {
555                 bufncat(h, pp, (size_t)(p - pp));
556                 switch (*(p + 1)) {
557                 case('I'):
558                         bufcat(h, name);
559                         break;
560                 default:
561                         bufncat(h, p, 2);
562                         break;
563                 }
564                 pp = p + 2;
565         }
566         if (pp)
567                 bufcat(h, pp);
568 }
569
570
571 void
572 buffmt_man(struct html *h,
573                 const char *name, const char *sec)
574 {
575         const char      *p, *pp;
576
577         pp = h->base_man;
578
579         /* LINTED */
580         while (NULL != (p = strchr(pp, '%'))) {
581                 bufncat(h, pp, (size_t)(p - pp));
582                 switch (*(p + 1)) {
583                 case('S'):
584                         bufcat(h, sec ? sec : "1");
585                         break;
586                 case('N'):
587                         buffmt(h, name);
588                         break;
589                 default:
590                         bufncat(h, p, 2);
591                         break;
592                 }
593                 pp = p + 2;
594         }
595         if (pp)
596                 bufcat(h, pp);
597 }
598
599
600 void
601 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
602 {
603         double           v;
604         const char      *u;
605
606         v = su->scale;
607
608         switch (su->unit) {
609         case (SCALE_CM):
610                 u = "cm";
611                 break;
612         case (SCALE_IN):
613                 u = "in";
614                 break;
615         case (SCALE_PC):
616                 u = "pc";
617                 break;
618         case (SCALE_PT):
619                 u = "pt";
620                 break;
621         case (SCALE_EM):
622                 u = "em";
623                 break;
624         case (SCALE_MM):
625                 if (0 == (v /= 100))
626                         v = 1;
627                 u = "em";
628                 break;
629         case (SCALE_EN):
630                 u = "ex";
631                 break;
632         case (SCALE_BU):
633                 u = "ex";
634                 break;
635         case (SCALE_VS):
636                 u = "em";
637                 break;
638         default:
639                 u = "ex";
640                 break;
641         }
642
643         if (su->pt)
644                 buffmt(h, "%s: %f%s;", p, v, u);
645         else
646                 /* LINTED */
647                 buffmt(h, "%s: %d%s;", p, (int)v, u);
648 }