4d8332f81b6f458d13266d7bf06b19a7b4d8d213
[dragonfly.git] / usr.bin / mandoc / html.c
1 /*      $Id: html.c,v 1.80 2009/11/02 06:22:44 kristaps Exp $ */
2 /*
3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <sys/types.h>
18
19 #include <assert.h>
20 #include <ctype.h>
21 #include <stdarg.h>
22 #include <stdio.h>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27
28 #include "out.h"
29 #include "chars.h"
30 #include "html.h"
31 #include "main.h"
32
33 #define UNCONST(a)      ((void *)(uintptr_t)(const void *)(a))
34
35 #define DOCTYPE         "-//W3C//DTD HTML 4.01//EN"
36 #define DTD             "http://www.w3.org/TR/html4/strict.dtd"
37
38 struct  htmldata {
39         const char       *name;
40         int               flags;
41 #define HTML_CLRLINE     (1 << 0)
42 #define HTML_NOSTACK     (1 << 1)
43 };
44
45 static  const struct htmldata htmltags[TAG_MAX] = {
46         {"html",        HTML_CLRLINE}, /* TAG_HTML */
47         {"head",        HTML_CLRLINE}, /* TAG_HEAD */
48         {"body",        HTML_CLRLINE}, /* TAG_BODY */
49         {"meta",        HTML_CLRLINE | HTML_NOSTACK}, /* TAG_META */
50         {"title",       HTML_CLRLINE}, /* TAG_TITLE */
51         {"div",         HTML_CLRLINE}, /* TAG_DIV */
52         {"h1",          0}, /* TAG_H1 */
53         {"h2",          0}, /* TAG_H2 */
54         {"p",           HTML_CLRLINE}, /* TAG_P */
55         {"span",        0}, /* TAG_SPAN */
56         {"link",        HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
57         {"br",          HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
58         {"a",           0}, /* TAG_A */
59         {"table",       HTML_CLRLINE}, /* TAG_TABLE */
60         {"col",         HTML_CLRLINE | HTML_NOSTACK}, /* TAG_COL */
61         {"tr",          HTML_CLRLINE}, /* TAG_TR */
62         {"td",          HTML_CLRLINE}, /* TAG_TD */
63         {"li",          HTML_CLRLINE}, /* TAG_LI */
64         {"ul",          HTML_CLRLINE}, /* TAG_UL */
65         {"ol",          HTML_CLRLINE}, /* TAG_OL */
66         {"base",        HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */
67 };
68
69 static  const char       *const htmlattrs[ATTR_MAX] = {
70         "http-equiv",
71         "content",
72         "name",
73         "rel",
74         "href",
75         "type",
76         "media",
77         "class",
78         "style",
79         "width",
80         "valign",
81         "target",
82         "id",
83         "summary",
84 };
85
86 #ifdef __linux__
87 extern  int               getsubopt(char **, char * const *, char **);
88 #endif
89
90 void *
91 html_alloc(char *outopts)
92 {
93         struct html     *h;
94         const char      *toks[4];
95         char            *v;
96
97         toks[0] = "style";
98         toks[1] = "man";
99         toks[2] = "includes";
100         toks[3] = NULL;
101
102         h = calloc(1, sizeof(struct html));
103         if (NULL == h) {
104                 perror(NULL);
105                 exit(EXIT_FAILURE);
106         }
107
108         h->tags.head = NULL;
109         h->ords.head = NULL;
110         h->symtab = chars_init(CHARS_HTML);
111
112         while (outopts && *outopts)
113                 switch (getsubopt(&outopts, UNCONST(toks), &v)) {
114                 case (0):
115                         h->style = v;
116                         break;
117                 case (1):
118                         h->base_man = v;
119                         break;
120                 case (2):
121                         h->base_includes = v;
122                         break;
123                 default:
124                         break;
125                 }
126
127         return(h);
128 }
129
130
131 void
132 html_free(void *p)
133 {
134         struct tag      *tag;
135         struct ord      *ord;
136         struct html     *h;
137
138         h = (struct html *)p;
139
140         while ((ord = h->ords.head) != NULL) {
141                 h->ords.head = ord->next;
142                 free(ord);
143         }
144
145         while ((tag = h->tags.head) != NULL) {
146                 h->tags.head = tag->next;
147                 free(tag);
148         }
149
150         if (h->symtab)
151                 chars_free(h->symtab);
152
153         free(h);
154 }
155
156
157 void
158 print_gen_head(struct html *h)
159 {
160         struct htmlpair  tag[4];
161
162         tag[0].key = ATTR_HTTPEQUIV;
163         tag[0].val = "Content-Type";
164         tag[1].key = ATTR_CONTENT;
165         tag[1].val = "text/html; charset=utf-8";
166         print_otag(h, TAG_META, 2, tag);
167
168         tag[0].key = ATTR_NAME;
169         tag[0].val = "resource-type";
170         tag[1].key = ATTR_CONTENT;
171         tag[1].val = "document";
172         print_otag(h, TAG_META, 2, tag);
173
174         if (h->style) {
175                 tag[0].key = ATTR_REL;
176                 tag[0].val = "stylesheet";
177                 tag[1].key = ATTR_HREF;
178                 tag[1].val = h->style;
179                 tag[2].key = ATTR_TYPE;
180                 tag[2].val = "text/css";
181                 tag[3].key = ATTR_MEDIA;
182                 tag[3].val = "all";
183                 print_otag(h, TAG_LINK, 4, tag);
184         }
185 }
186
187
188 static void
189 print_spec(struct html *h, const char *p, int len)
190 {
191         const char      *rhs;
192         size_t           sz;
193
194         rhs = chars_a2ascii(h->symtab, p, (size_t)len, &sz);
195
196         if (NULL == rhs)
197                 return;
198         fwrite(rhs, 1, sz, stdout);
199 }
200
201
202 static void
203 print_res(struct html *h, const char *p, int len)
204 {
205         const char      *rhs;
206         size_t           sz;
207
208         rhs = chars_a2res(h->symtab, p, (size_t)len, &sz);
209
210         if (NULL == rhs)
211                 return;
212         fwrite(rhs, 1, sz, stdout);
213 }
214
215
216 static void
217 print_escape(struct html *h, const char **p)
218 {
219         int              j, type;
220         const char      *wp;
221
222         wp = *p;
223         type = 1;
224
225         if (0 == *(++wp)) {
226                 *p = wp;
227                 return;
228         }
229
230         if ('(' == *wp) {
231                 wp++;
232                 if (0 == *wp || 0 == *(wp + 1)) {
233                         *p = 0 == *wp ? wp : wp + 1;
234                         return;
235                 }
236
237                 print_spec(h, wp, 2);
238                 *p = ++wp;
239                 return;
240
241         } else if ('*' == *wp) {
242                 if (0 == *(++wp)) {
243                         *p = wp;
244                         return;
245                 }
246
247                 switch (*wp) {
248                 case ('('):
249                         wp++;
250                         if (0 == *wp || 0 == *(wp + 1)) {
251                                 *p = 0 == *wp ? wp : wp + 1;
252                                 return;
253                         }
254
255                         print_res(h, wp, 2);
256                         *p = ++wp;
257                         return;
258                 case ('['):
259                         type = 0;
260                         break;
261                 default:
262                         print_res(h, wp, 1);
263                         *p = wp;
264                         return;
265                 }
266
267         } else if ('f' == *wp) {
268                 if (0 == *(++wp)) {
269                         *p = wp;
270                         return;
271                 }
272
273                 switch (*wp) {
274                 case ('B'):
275                         /* TODO */
276                         break;
277                 case ('I'):
278                         /* TODO */
279                         break;
280                 case ('P'):
281                         /* FALLTHROUGH */
282                 case ('R'):
283                         /* TODO */
284                         break;
285                 default:
286                         break;
287                 }
288
289                 *p = wp;
290                 return;
291
292         } else if ('[' != *wp) {
293                 print_spec(h, wp, 1);
294                 *p = wp;
295                 return;
296         }
297
298         wp++;
299         for (j = 0; *wp && ']' != *wp; wp++, j++)
300                 /* Loop... */ ;
301
302         if (0 == *wp) {
303                 *p = wp;
304                 return;
305         }
306
307         if (type)
308                 print_spec(h, wp - j, j);
309         else
310                 print_res(h, wp - j, j);
311
312         *p = wp;
313 }
314
315
316 static void
317 print_encode(struct html *h, const char *p)
318 {
319         size_t           sz;
320
321         for (; *p; p++) {
322                 sz = strcspn(p, "\\<>&");
323
324                 fwrite(p, 1, sz, stdout);
325                 p += /* LINTED */
326                         sz;
327
328                 if ('\\' == *p) {
329                         print_escape(h, &p);
330                         continue;
331                 } else if ('\0' == *p)
332                         break;
333
334                 if ('<' == *p)
335                         printf("&lt;");
336                 else if ('>' == *p)
337                         printf("&gt;");
338                 else if ('&' == *p)
339                         printf("&amp;");
340         }
341 }
342
343
344 struct tag *
345 print_otag(struct html *h, enum htmltag tag,
346                 int sz, const struct htmlpair *p)
347 {
348         int              i;
349         struct tag      *t;
350
351         if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
352                 t = malloc(sizeof(struct tag));
353                 if (NULL == t) {
354                         perror(NULL);
355                         exit(EXIT_FAILURE);
356                 }
357                 t->tag = tag;
358                 t->next = h->tags.head;
359                 h->tags.head = t;
360         } else
361                 t = NULL;
362
363         if ( ! (HTML_NOSPACE & h->flags))
364                 if ( ! (HTML_CLRLINE & htmltags[tag].flags))
365                         putchar(' ');
366
367         printf("<%s", htmltags[tag].name);
368         for (i = 0; i < sz; i++) {
369                 printf(" %s=\"", htmlattrs[p[i].key]);
370                 assert(p->val);
371                 print_encode(h, p[i].val);
372                 putchar('\"');
373         }
374         putchar('>');
375
376         h->flags |= HTML_NOSPACE;
377         if (HTML_CLRLINE & htmltags[tag].flags)
378                 h->flags |= HTML_NEWLINE;
379         else
380                 h->flags &= ~HTML_NEWLINE;
381
382         return(t);
383 }
384
385
386 /* ARGSUSED */
387 static void
388 print_ctag(struct html *h, enum htmltag tag)
389 {
390
391         printf("</%s>", htmltags[tag].name);
392         if (HTML_CLRLINE & htmltags[tag].flags) {
393                 h->flags |= HTML_NOSPACE;
394                 h->flags |= HTML_NEWLINE;
395                 putchar('\n');
396         } else
397                 h->flags &= ~HTML_NEWLINE;
398 }
399
400
401 /* ARGSUSED */
402 void
403 print_gen_doctype(struct html *h)
404 {
405
406         printf("<!DOCTYPE HTML PUBLIC \"%s\" \"%s\">", DOCTYPE, DTD);
407 }
408
409
410 void
411 print_text(struct html *h, const char *p)
412 {
413
414         if (*p && 0 == *(p + 1))
415                 switch (*p) {
416                 case('.'):
417                         /* FALLTHROUGH */
418                 case(','):
419                         /* FALLTHROUGH */
420                 case(';'):
421                         /* FALLTHROUGH */
422                 case(':'):
423                         /* FALLTHROUGH */
424                 case('?'):
425                         /* FALLTHROUGH */
426                 case('!'):
427                         /* FALLTHROUGH */
428                 case(')'):
429                         /* FALLTHROUGH */
430                 case(']'):
431                         /* FALLTHROUGH */
432                 case('}'):
433                         if ( ! (HTML_IGNDELIM & h->flags))
434                                 h->flags |= HTML_NOSPACE;
435                         break;
436                 default:
437                         break;
438                 }
439
440         if ( ! (h->flags & HTML_NOSPACE))
441                 putchar(' ');
442
443         h->flags &= ~HTML_NOSPACE;
444         h->flags &= ~HTML_NEWLINE;
445
446         if (p)
447                 print_encode(h, p);
448
449         if (*p && 0 == *(p + 1))
450                 switch (*p) {
451                 case('('):
452                         /* FALLTHROUGH */
453                 case('['):
454                         /* FALLTHROUGH */
455                 case('{'):
456                         h->flags |= HTML_NOSPACE;
457                         break;
458                 default:
459                         break;
460                 }
461 }
462
463
464 void
465 print_tagq(struct html *h, const struct tag *until)
466 {
467         struct tag      *tag;
468
469         while ((tag = h->tags.head) != NULL) {
470                 print_ctag(h, tag->tag);
471                 h->tags.head = tag->next;
472                 free(tag);
473                 if (until && tag == until)
474                         return;
475         }
476 }
477
478
479 void
480 print_stagq(struct html *h, const struct tag *suntil)
481 {
482         struct tag      *tag;
483
484         while ((tag = h->tags.head) != NULL) {
485                 if (suntil && tag == suntil)
486                         return;
487                 print_ctag(h, tag->tag);
488                 h->tags.head = tag->next;
489                 free(tag);
490         }
491 }
492
493
494 void
495 bufinit(struct html *h)
496 {
497
498         h->buf[0] = '\0';
499         h->buflen = 0;
500 }
501
502
503 void
504 bufcat_style(struct html *h, const char *key, const char *val)
505 {
506
507         bufcat(h, key);
508         bufncat(h, ":", 1);
509         bufcat(h, val);
510         bufncat(h, ";", 1);
511 }
512
513
514 void
515 bufcat(struct html *h, const char *p)
516 {
517
518         bufncat(h, p, strlen(p));
519 }
520
521
522 void
523 buffmt(struct html *h, const char *fmt, ...)
524 {
525         va_list          ap;
526
527         va_start(ap, fmt);
528         (void)vsnprintf(h->buf + (int)h->buflen,
529                         BUFSIZ - h->buflen - 1, fmt, ap);
530         va_end(ap);
531         h->buflen = strlen(h->buf);
532 }
533
534
535 void
536 bufncat(struct html *h, const char *p, size_t sz)
537 {
538
539         if (h->buflen + sz > BUFSIZ - 1)
540                 sz = BUFSIZ - 1 - h->buflen;
541
542         (void)strncat(h->buf, p, sz);
543         h->buflen += sz;
544 }
545
546
547 void
548 buffmt_includes(struct html *h, const char *name)
549 {
550         const char      *p, *pp;
551
552         pp = h->base_includes;
553
554         while (NULL != (p = strchr(pp, '%'))) {
555                 bufncat(h, pp, (size_t)(p - pp));
556                 switch (*(p + 1)) {
557                 case('I'):
558                         bufcat(h, name);
559                         break;
560                 default:
561                         bufncat(h, p, 2);
562                         break;
563                 }
564                 pp = p + 2;
565         }
566         if (pp)
567                 bufcat(h, pp);
568 }
569
570
571 void
572 buffmt_man(struct html *h,
573                 const char *name, const char *sec)
574 {
575         const char      *p, *pp;
576
577         pp = h->base_man;
578
579         /* LINTED */
580         while (NULL != (p = strchr(pp, '%'))) {
581                 bufncat(h, pp, (size_t)(p - pp));
582                 switch (*(p + 1)) {
583                 case('S'):
584                         bufcat(h, sec ? sec : "1");
585                         break;
586                 case('N'):
587                         buffmt(h, name);
588                         break;
589                 default:
590                         bufncat(h, p, 2);
591                         break;
592                 }
593                 pp = p + 2;
594         }
595         if (pp)
596                 bufcat(h, pp);
597 }
598
599
600 void
601 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
602 {
603         double           v;
604         const char      *u;
605
606         v = su->scale;
607
608         switch (su->unit) {
609         case (SCALE_CM):
610                 u = "cm";
611                 break;
612         case (SCALE_IN):
613                 u = "in";
614                 break;
615         case (SCALE_PC):
616                 u = "pc";
617                 break;
618         case (SCALE_PT):
619                 u = "pt";
620                 break;
621         case (SCALE_EM):
622                 u = "em";
623                 break;
624         case (SCALE_MM):
625                 if (0 == (v /= 100))
626                         v = 1;
627                 u = "em";
628                 break;
629         case (SCALE_EN):
630                 u = "ex";
631                 break;
632         case (SCALE_BU):
633                 u = "ex";
634                 break;
635         case (SCALE_VS):
636                 u = "em";
637                 break;
638         default:
639                 u = "ex";
640                 break;
641         }
642
643         if (su->pt)
644                 buffmt(h, "%s: %f%s;", p, v, u);
645         else
646                 /* LINTED */
647                 buffmt(h, "%s: %d%s;", p, (int)v, u);
648 }
649
650
651 void
652 html_idcat(char *dst, const char *src, int sz)
653 {
654         int              ssz;
655
656         assert(sz);
657
658         /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
659
660         for ( ; *dst != '\0' && sz; dst++, sz--)
661                 /* Jump to end. */ ;
662
663         assert(sz > 2);
664
665         /* We can't start with a number (bah). */
666
667         *dst++ = 'x';
668         *dst = '\0';
669         sz--;
670
671         for ( ; *src != '\0' && sz > 1; src++) {
672                 ssz = snprintf(dst, (size_t)sz, "%.2x", *src);
673                 sz -= ssz;
674                 dst += ssz;
675         }
676 }