2b13e374b8979cb562d3b28d2060cd1ad434912c
[dragonfly.git] / usr.bin / mandoc / html.c
1 /*      $Id: html.c,v 1.2 2009/10/27 21:40:07 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <sys/types.h>
18
19 #include <assert.h>
20 #include <err.h>
21 #include <stdio.h>
22 #include <stdarg.h>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27
28 #include "out.h"
29 #include "chars.h"
30 #include "html.h"
31 #include "main.h"
32
33 #define UNCONST(a)      ((void *)(uintptr_t)(const void *)(a))
34
35 #define DOCTYPE         "-//W3C//DTD HTML 4.01//EN"
36 #define DTD             "http://www.w3.org/TR/html4/strict.dtd"
37
38 struct  htmldata {
39         const char       *name;
40         int               flags;
41 #define HTML_CLRLINE     (1 << 0)
42 #define HTML_NOSTACK     (1 << 1)
43 };
44
45 static  const struct htmldata htmltags[TAG_MAX] = {
46         {"html",        HTML_CLRLINE}, /* TAG_HTML */
47         {"head",        HTML_CLRLINE}, /* TAG_HEAD */
48         {"body",        HTML_CLRLINE}, /* TAG_BODY */
49         {"meta",        HTML_CLRLINE | HTML_NOSTACK}, /* TAG_META */
50         {"title",       HTML_CLRLINE}, /* TAG_TITLE */
51         {"div",         HTML_CLRLINE}, /* TAG_DIV */
52         {"h1",          0}, /* TAG_H1 */
53         {"h2",          0}, /* TAG_H2 */
54         {"p",           HTML_CLRLINE}, /* TAG_P */
55         {"span",        0}, /* TAG_SPAN */
56         {"link",        HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
57         {"br",          HTML_CLRLINE | HTML_NOSTACK}, /* TAG_LINK */
58         {"a",           0}, /* TAG_A */
59         {"table",       HTML_CLRLINE}, /* TAG_TABLE */
60         {"col",         HTML_CLRLINE | HTML_NOSTACK}, /* TAG_COL */
61         {"tr",          HTML_CLRLINE}, /* TAG_TR */
62         {"td",          HTML_CLRLINE}, /* TAG_TD */
63         {"li",          HTML_CLRLINE}, /* TAG_LI */
64         {"ul",          HTML_CLRLINE}, /* TAG_UL */
65         {"ol",          HTML_CLRLINE}, /* TAG_OL */
66         {"base",        HTML_CLRLINE | HTML_NOSTACK}, /* TAG_BASE */
67 };
68
69 static  const char       *const htmlattrs[ATTR_MAX] = {
70         "http-equiv",
71         "content",
72         "name",
73         "rel",
74         "href",
75         "type",
76         "media",
77         "class",
78         "style",
79         "width",
80         "valign",
81         "target",
82         "id",
83 };
84
85 void *
86 html_alloc(char *outopts)
87 {
88         struct html     *h;
89         const char      *toks[4];
90         char            *v;
91
92         toks[0] = "style";
93         toks[1] = "man";
94         toks[2] = "includes";
95         toks[3] = NULL;
96
97         if (NULL == (h = calloc(1, sizeof(struct html))))
98                 return(NULL);
99
100         h->tags.head = NULL;
101         h->ords.head = NULL;
102
103         if (NULL == (h->symtab = chars_init(CHARS_HTML))) {
104                 free(h);
105                 return(NULL);
106         }
107
108         while (outopts && *outopts)
109                 switch (getsubopt(&outopts, UNCONST(toks), &v)) {
110                 case (0):
111                         h->style = v;
112                         break;
113                 case (1):
114                         h->base_man = v;
115                         break;
116                 case (2):
117                         h->base_includes = v;
118                         break;
119                 default:
120                         break;
121                 }
122
123         return(h);
124 }
125
126
127 void
128 html_free(void *p)
129 {
130         struct tag      *tag;
131         struct ord      *ord;
132         struct html     *h;
133
134         h = (struct html *)p;
135
136         while ((ord = h->ords.head) != NULL) {
137                 h->ords.head = ord->next;
138                 free(ord);
139         }
140
141         while ((tag = h->tags.head) != NULL) {
142                 h->tags.head = tag->next;
143                 free(tag);
144         }
145
146         if (h->symtab)
147                 chars_free(h->symtab);
148
149         free(h);
150 }
151
152
153 void
154 print_gen_head(struct html *h)
155 {
156         struct htmlpair  tag[4];
157
158         tag[0].key = ATTR_HTTPEQUIV;
159         tag[0].val = "Content-Type";
160         tag[1].key = ATTR_CONTENT;
161         tag[1].val = "text/html; charset=utf-8";
162         print_otag(h, TAG_META, 2, tag);
163
164         tag[0].key = ATTR_NAME;
165         tag[0].val = "resource-type";
166         tag[1].key = ATTR_CONTENT;
167         tag[1].val = "document";
168         print_otag(h, TAG_META, 2, tag);
169
170         if (h->style) {
171                 tag[0].key = ATTR_REL;
172                 tag[0].val = "stylesheet";
173                 tag[1].key = ATTR_HREF;
174                 tag[1].val = h->style;
175                 tag[2].key = ATTR_TYPE;
176                 tag[2].val = "text/css";
177                 tag[3].key = ATTR_MEDIA;
178                 tag[3].val = "all";
179                 print_otag(h, TAG_LINK, 4, tag);
180         }
181 }
182
183
184 static void
185 print_spec(struct html *h, const char *p, int len)
186 {
187         const char      *rhs;
188         int              i;
189         size_t           sz;
190
191         rhs = chars_a2ascii(h->symtab, p, (size_t)len, &sz);
192
193         if (NULL == rhs)
194                 return;
195         for (i = 0; i < (int)sz; i++)
196                 putchar(rhs[i]);
197 }
198
199
200 static void
201 print_res(struct html *h, const char *p, int len)
202 {
203         const char      *rhs;
204         int              i;
205         size_t           sz;
206
207         rhs = chars_a2res(h->symtab, p, (size_t)len, &sz);
208
209         if (NULL == rhs)
210                 return;
211         for (i = 0; i < (int)sz; i++)
212                 putchar(rhs[i]);
213 }
214
215
216 static void
217 print_escape(struct html *h, const char **p)
218 {
219         int              j, type;
220         const char      *wp;
221
222         wp = *p;
223         type = 1;
224
225         if (0 == *(++wp)) {
226                 *p = wp;
227                 return;
228         }
229
230         if ('(' == *wp) {
231                 wp++;
232                 if (0 == *wp || 0 == *(wp + 1)) {
233                         *p = 0 == *wp ? wp : wp + 1;
234                         return;
235                 }
236
237                 print_spec(h, wp, 2);
238                 *p = ++wp;
239                 return;
240
241         } else if ('*' == *wp) {
242                 if (0 == *(++wp)) {
243                         *p = wp;
244                         return;
245                 }
246
247                 switch (*wp) {
248                 case ('('):
249                         wp++;
250                         if (0 == *wp || 0 == *(wp + 1)) {
251                                 *p = 0 == *wp ? wp : wp + 1;
252                                 return;
253                         }
254
255                         print_res(h, wp, 2);
256                         *p = ++wp;
257                         return;
258                 case ('['):
259                         type = 0;
260                         break;
261                 default:
262                         print_res(h, wp, 1);
263                         *p = wp;
264                         return;
265                 }
266
267         } else if ('f' == *wp) {
268                 if (0 == *(++wp)) {
269                         *p = wp;
270                         return;
271                 }
272
273                 switch (*wp) {
274                 case ('B'):
275                         /* TODO */
276                         break;
277                 case ('I'):
278                         /* TODO */
279                         break;
280                 case ('P'):
281                         /* FALLTHROUGH */
282                 case ('R'):
283                         /* TODO */
284                         break;
285                 default:
286                         break;
287                 }
288
289                 *p = wp;
290                 return;
291
292         } else if ('[' != *wp) {
293                 print_spec(h, wp, 1);
294                 *p = wp;
295                 return;
296         }
297
298         wp++;
299         for (j = 0; *wp && ']' != *wp; wp++, j++)
300                 /* Loop... */ ;
301
302         if (0 == *wp) {
303                 *p = wp;
304                 return;
305         }
306
307         if (type)
308                 print_spec(h, wp - j, j);
309         else
310                 print_res(h, wp - j, j);
311
312         *p = wp;
313 }
314
315
316 static void
317 print_encode(struct html *h, const char *p)
318 {
319
320         for (; *p; p++) {
321                 if ('\\' == *p) {
322                         print_escape(h, &p);
323                         continue;
324                 }
325                 switch (*p) {
326                 case ('<'):
327                         printf("&lt;");
328                         break;
329                 case ('>'):
330                         printf("&gt;");
331                         break;
332                 case ('&'):
333                         printf("&amp;");
334                         break;
335                 default:
336                         putchar(*p);
337                         break;
338                 }
339         }
340 }
341
342
343 struct tag *
344 print_otag(struct html *h, enum htmltag tag,
345                 int sz, const struct htmlpair *p)
346 {
347         int              i;
348         struct tag      *t;
349
350         if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
351                 if (NULL == (t = malloc(sizeof(struct tag))))
352                         err(EXIT_FAILURE, "malloc");
353                 t->tag = tag;
354                 t->next = h->tags.head;
355                 h->tags.head = t;
356         } else
357                 t = NULL;
358
359         if ( ! (HTML_NOSPACE & h->flags))
360                 if ( ! (HTML_CLRLINE & htmltags[tag].flags))
361                         printf(" ");
362
363         printf("<%s", htmltags[tag].name);
364         for (i = 0; i < sz; i++) {
365                 printf(" %s=\"", htmlattrs[p[i].key]);
366                 assert(p->val);
367                 print_encode(h, p[i].val);
368                 printf("\"");
369         }
370         printf(">");
371
372         h->flags |= HTML_NOSPACE;
373         if (HTML_CLRLINE & htmltags[tag].flags)
374                 h->flags |= HTML_NEWLINE;
375         else
376                 h->flags &= ~HTML_NEWLINE;
377
378         return(t);
379 }
380
381
382 /* ARGSUSED */
383 static void
384 print_ctag(struct html *h, enum htmltag tag)
385 {
386
387         printf("</%s>", htmltags[tag].name);
388         if (HTML_CLRLINE & htmltags[tag].flags)
389                 h->flags |= HTML_NOSPACE;
390         if (HTML_CLRLINE & htmltags[tag].flags)
391                 h->flags |= HTML_NEWLINE;
392         else
393                 h->flags &= ~HTML_NEWLINE;
394 }
395
396
397 /* ARGSUSED */
398 void
399 print_gen_doctype(struct html *h)
400 {
401
402         printf("<!DOCTYPE HTML PUBLIC \"%s\" \"%s\">", DOCTYPE, DTD);
403 }
404
405
406 void
407 print_text(struct html *h, const char *p)
408 {
409
410         if (*p && 0 == *(p + 1))
411                 switch (*p) {
412                 case('.'):
413                         /* FALLTHROUGH */
414                 case(','):
415                         /* FALLTHROUGH */
416                 case(';'):
417                         /* FALLTHROUGH */
418                 case(':'):
419                         /* FALLTHROUGH */
420                 case('?'):
421                         /* FALLTHROUGH */
422                 case('!'):
423                         /* FALLTHROUGH */
424                 case(')'):
425                         /* FALLTHROUGH */
426                 case(']'):
427                         /* FALLTHROUGH */
428                 case('}'):
429                         if ( ! (HTML_IGNDELIM & h->flags))
430                                 h->flags |= HTML_NOSPACE;
431                         break;
432                 default:
433                         break;
434                 }
435
436         if ( ! (h->flags & HTML_NOSPACE))
437                 printf(" ");
438
439         h->flags &= ~HTML_NOSPACE;
440         h->flags &= ~HTML_NEWLINE;
441
442         if (p)
443                 print_encode(h, p);
444
445         if (*p && 0 == *(p + 1))
446                 switch (*p) {
447                 case('('):
448                         /* FALLTHROUGH */
449                 case('['):
450                         /* FALLTHROUGH */
451                 case('{'):
452                         h->flags |= HTML_NOSPACE;
453                         break;
454                 default:
455                         break;
456                 }
457 }
458
459
460 void
461 print_tagq(struct html *h, const struct tag *until)
462 {
463         struct tag      *tag;
464
465         while ((tag = h->tags.head) != NULL) {
466                 print_ctag(h, tag->tag);
467                 h->tags.head = tag->next;
468                 free(tag);
469                 if (until && tag == until)
470                         return;
471         }
472 }
473
474
475 void
476 print_stagq(struct html *h, const struct tag *suntil)
477 {
478         struct tag      *tag;
479
480         while ((tag = h->tags.head) != NULL) {
481                 if (suntil && tag == suntil)
482                         return;
483                 print_ctag(h, tag->tag);
484                 h->tags.head = tag->next;
485                 free(tag);
486         }
487 }
488
489
490 void
491 bufinit(struct html *h)
492 {
493
494         h->buf[0] = '\0';
495         h->buflen = 0;
496 }
497
498
499 void
500 bufcat_style(struct html *h, const char *key, const char *val)
501 {
502
503         bufcat(h, key);
504         bufncat(h, ":", 1);
505         bufcat(h, val);
506         bufncat(h, ";", 1);
507 }
508
509
510 void
511 bufcat(struct html *h, const char *p)
512 {
513
514         bufncat(h, p, strlen(p));
515 }
516
517
518 void
519 buffmt(struct html *h, const char *fmt, ...)
520 {
521         va_list          ap;
522
523         va_start(ap, fmt);
524         (void)vsnprintf(h->buf + (int)h->buflen,
525                         BUFSIZ - h->buflen - 1, fmt, ap);
526         va_end(ap);
527         h->buflen = strlen(h->buf);
528 }
529
530
531 void
532 bufncat(struct html *h, const char *p, size_t sz)
533 {
534
535         if (h->buflen + sz > BUFSIZ - 1)
536                 sz = BUFSIZ - 1 - h->buflen;
537
538         (void)strncat(h->buf, p, sz);
539         h->buflen += sz;
540 }
541
542
543 void
544 buffmt_includes(struct html *h, const char *name)
545 {
546         const char      *p, *pp;
547
548         pp = h->base_includes;
549
550         while (NULL != (p = strchr(pp, '%'))) {
551                 bufncat(h, pp, (size_t)(p - pp));
552                 switch (*(p + 1)) {
553                 case('I'):
554                         bufcat(h, name);
555                         break;
556                 default:
557                         bufncat(h, p, 2);
558                         break;
559                 }
560                 pp = p + 2;
561         }
562         if (pp)
563                 bufcat(h, pp);
564 }
565
566
567 void
568 buffmt_man(struct html *h,
569                 const char *name, const char *sec)
570 {
571         const char      *p, *pp;
572
573         pp = h->base_man;
574
575         /* LINTED */
576         while (NULL != (p = strchr(pp, '%'))) {
577                 bufncat(h, pp, (size_t)(p - pp));
578                 switch (*(p + 1)) {
579                 case('S'):
580                         bufcat(h, sec ? sec : "1");
581                         break;
582                 case('N'):
583                         buffmt(h, name);
584                         break;
585                 default:
586                         bufncat(h, p, 2);
587                         break;
588                 }
589                 pp = p + 2;
590         }
591         if (pp)
592                 bufcat(h, pp);
593 }
594
595
596 void
597 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
598 {
599         double           v;
600         const char      *u;
601
602         v = su->scale;
603
604         switch (su->unit) {
605         case (SCALE_CM):
606                 u = "cm";
607                 break;
608         case (SCALE_IN):
609                 u = "in";
610                 break;
611         case (SCALE_PC):
612                 u = "pc";
613                 break;
614         case (SCALE_PT):
615                 u = "pt";
616                 break;
617         case (SCALE_EM):
618                 u = "em";
619                 break;
620         case (SCALE_MM):
621                 if (0 == (v /= 100))
622                         v = 1;
623                 u = "em";
624                 break;
625         case (SCALE_EN):
626                 u = "ex";
627                 break;
628         case (SCALE_BU):
629                 u = "ex";
630                 break;
631         case (SCALE_VS):
632                 u = "em";
633                 break;
634         default:
635                 u = "ex";
636                 break;
637         }
638
639         if (su->pt)
640                 buffmt(h, "%s: %f%s;", p, v, u);
641         else
642                 /* LINTED */
643                 buffmt(h, "%s: %d%s;", p, (int)v, u);
644 }