Import mdocml-1.13.1
[dragonfly.git] / contrib / mdocml / cgi.c
1 /*      $Id: cgi.c,v 1.92 2014/08/05 15:29:30 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <ctype.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <limits.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31
32 #include "mandoc.h"
33 #include "mandoc_aux.h"
34 #include "main.h"
35 #include "manpath.h"
36 #include "mansearch.h"
37 #include "cgi.h"
38
39 /*
40  * A query as passed to the search function.
41  */
42 struct  query {
43         char            *manpath; /* desired manual directory */
44         char            *arch; /* architecture */
45         char            *sec; /* manual section */
46         char            *query; /* unparsed query expression */
47         int              equal; /* match whole names, not substrings */
48 };
49
50 struct  req {
51         struct query      q;
52         char            **p; /* array of available manpaths */
53         size_t            psz; /* number of available manpaths */
54 };
55
56 static  void             catman(const struct req *, const char *);
57 static  void             format(const struct req *, const char *);
58 static  void             html_print(const char *);
59 static  void             html_putchar(char);
60 static  int              http_decode(char *);
61 static  void             http_parse(struct req *, const char *);
62 static  void             http_print(const char *);
63 static  void             http_putchar(char);
64 static  void             http_printquery(const struct req *, const char *);
65 static  void             pathgen(struct req *);
66 static  void             pg_error_badrequest(const char *);
67 static  void             pg_error_internal(void);
68 static  void             pg_index(const struct req *);
69 static  void             pg_noresult(const struct req *, const char *);
70 static  void             pg_search(const struct req *);
71 static  void             pg_searchres(const struct req *,
72                                 struct manpage *, size_t);
73 static  void             pg_show(struct req *, const char *);
74 static  void             resp_begin_html(int, const char *);
75 static  void             resp_begin_http(int, const char *);
76 static  void             resp_end_html(void);
77 static  void             resp_searchform(const struct req *);
78 static  void             resp_show(const struct req *, const char *);
79 static  void             set_query_attr(char **, char **);
80 static  int              validate_filename(const char *);
81 static  int              validate_manpath(const struct req *, const char *);
82 static  int              validate_urifrag(const char *);
83
84 static  const char       *scriptname; /* CGI script name */
85
86 static  const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
87 static  const char *const sec_numbers[] = {
88     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
89 };
90 static  const char *const sec_names[] = {
91     "All Sections",
92     "1 - General Commands",
93     "2 - System Calls",
94     "3 - Subroutines",
95     "3p - Perl Subroutines",
96     "4 - Special Files",
97     "5 - File Formats",
98     "6 - Games",
99     "7 - Macros and Conventions",
100     "8 - Maintenance Commands",
101     "9 - Kernel Interface"
102 };
103 static  const int sec_MAX = sizeof(sec_names) / sizeof(char *);
104
105 static  const char *const arch_names[] = {
106     "amd64",       "alpha",       "armish",      "armv7",
107     "aviion",      "hppa",        "hppa64",      "i386",
108     "ia64",        "landisk",     "loongson",    "luna88k",
109     "macppc",      "mips64",      "octeon",      "sgi",
110     "socppc",      "solbourne",   "sparc",       "sparc64",
111     "vax",         "zaurus",
112     "amiga",       "arc",         "arm32",       "atari",
113     "beagle",      "cats",        "hp300",       "mac68k",
114     "mvme68k",     "mvme88k",     "mvmeppc",     "palm",
115     "pc532",       "pegasos",     "pmax",        "powerpc",
116     "sun3",        "wgrisc",      "x68k"
117 };
118 static  const int arch_MAX = sizeof(arch_names) / sizeof(char *);
119
120 /*
121  * Print a character, escaping HTML along the way.
122  * This will pass non-ASCII straight to output: be warned!
123  */
124 static void
125 html_putchar(char c)
126 {
127
128         switch (c) {
129         case ('"'):
130                 printf("&quote;");
131                 break;
132         case ('&'):
133                 printf("&amp;");
134                 break;
135         case ('>'):
136                 printf("&gt;");
137                 break;
138         case ('<'):
139                 printf("&lt;");
140                 break;
141         default:
142                 putchar((unsigned char)c);
143                 break;
144         }
145 }
146
147 static void
148 http_printquery(const struct req *req, const char *sep)
149 {
150
151         if (NULL != req->q.query) {
152                 printf("query=");
153                 http_print(req->q.query);
154         }
155         if (0 == req->q.equal)
156                 printf("%sapropos=1", sep);
157         if (NULL != req->q.sec) {
158                 printf("%ssec=", sep);
159                 http_print(req->q.sec);
160         }
161         if (NULL != req->q.arch) {
162                 printf("%sarch=", sep);
163                 http_print(req->q.arch);
164         }
165         if (NULL != req->q.manpath &&
166             strcmp(req->q.manpath, req->p[0])) {
167                 printf("%smanpath=", sep);
168                 http_print(req->q.manpath);
169         }
170 }
171
172 static void
173 http_print(const char *p)
174 {
175
176         if (NULL == p)
177                 return;
178         while ('\0' != *p)
179                 http_putchar(*p++);
180 }
181
182 /*
183  * Call through to html_putchar().
184  * Accepts NULL strings.
185  */
186 static void
187 html_print(const char *p)
188 {
189         
190         if (NULL == p)
191                 return;
192         while ('\0' != *p)
193                 html_putchar(*p++);
194 }
195
196 /*
197  * Transfer the responsibility for the allocated string *val
198  * to the query structure.
199  */
200 static void
201 set_query_attr(char **attr, char **val)
202 {
203
204         free(*attr);
205         if (**val == '\0') {
206                 *attr = NULL;
207                 free(*val);
208         } else
209                 *attr = *val;
210         *val = NULL;
211 }
212
213 /*
214  * Parse the QUERY_STRING for key-value pairs
215  * and store the values into the query structure.
216  */
217 static void
218 http_parse(struct req *req, const char *qs)
219 {
220         char            *key, *val;
221         size_t           keysz, valsz;
222
223         req->q.manpath  = NULL;
224         req->q.arch     = NULL;
225         req->q.sec      = NULL;
226         req->q.query    = NULL;
227         req->q.equal    = 1;
228
229         key = val = NULL;
230         while (*qs != '\0') {
231
232                 /* Parse one key. */
233
234                 keysz = strcspn(qs, "=;&");
235                 key = mandoc_strndup(qs, keysz);
236                 qs += keysz;
237                 if (*qs != '=')
238                         goto next;
239
240                 /* Parse one value. */
241
242                 valsz = strcspn(++qs, ";&");
243                 val = mandoc_strndup(qs, valsz);
244                 qs += valsz;
245
246                 /* Decode and catch encoding errors. */
247
248                 if ( ! (http_decode(key) && http_decode(val)))
249                         goto next;
250
251                 /* Handle key-value pairs. */
252
253                 if ( ! strcmp(key, "query"))
254                         set_query_attr(&req->q.query, &val);
255
256                 else if ( ! strcmp(key, "apropos"))
257                         req->q.equal = !strcmp(val, "0");
258
259                 else if ( ! strcmp(key, "manpath")) {
260 #ifdef COMPAT_OLDURI
261                         if ( ! strncmp(val, "OpenBSD ", 8)) {
262                                 val[7] = '-';
263                                 if ('C' == val[8])
264                                         val[8] = 'c';
265                         }
266 #endif
267                         set_query_attr(&req->q.manpath, &val);
268                 }
269
270                 else if ( ! (strcmp(key, "sec")
271 #ifdef COMPAT_OLDURI
272                     && strcmp(key, "sektion")
273 #endif
274                     )) {
275                         if ( ! strcmp(val, "0"))
276                                 *val = '\0';
277                         set_query_attr(&req->q.sec, &val);
278                 }
279
280                 else if ( ! strcmp(key, "arch")) {
281                         if ( ! strcmp(val, "default"))
282                                 *val = '\0';
283                         set_query_attr(&req->q.arch, &val);
284                 }
285
286                 /*
287                  * The key must be freed in any case.
288                  * The val may have been handed over to the query
289                  * structure, in which case it is now NULL.
290                  */
291 next:
292                 free(key);
293                 key = NULL;
294                 free(val);
295                 val = NULL;
296
297                 if (*qs != '\0')
298                         qs++;
299         }
300
301         /* Fall back to the default manpath. */
302
303         if (req->q.manpath == NULL)
304                 req->q.manpath = mandoc_strdup(req->p[0]);
305 }
306
307 static void
308 http_putchar(char c)
309 {
310
311         if (isalnum((unsigned char)c)) {
312                 putchar((unsigned char)c);
313                 return;
314         } else if (' ' == c) {
315                 putchar('+');
316                 return;
317         }
318         printf("%%%.2x", c);
319 }
320
321 /*
322  * HTTP-decode a string.  The standard explanation is that this turns
323  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
324  * over the allocated string.
325  */
326 static int
327 http_decode(char *p)
328 {
329         char             hex[3];
330         char            *q;
331         int              c;
332
333         hex[2] = '\0';
334
335         q = p;
336         for ( ; '\0' != *p; p++, q++) {
337                 if ('%' == *p) {
338                         if ('\0' == (hex[0] = *(p + 1)))
339                                 return(0);
340                         if ('\0' == (hex[1] = *(p + 2)))
341                                 return(0);
342                         if (1 != sscanf(hex, "%x", &c))
343                                 return(0);
344                         if ('\0' == c)
345                                 return(0);
346
347                         *q = (char)c;
348                         p += 2;
349                 } else
350                         *q = '+' == *p ? ' ' : *p;
351         }
352
353         *q = '\0';
354         return(1);
355 }
356
357 static void
358 resp_begin_http(int code, const char *msg)
359 {
360
361         if (200 != code)
362                 printf("Status: %d %s\r\n", code, msg);
363
364         printf("Content-Type: text/html; charset=utf-8\r\n"
365              "Cache-Control: no-cache\r\n"
366              "Pragma: no-cache\r\n"
367              "\r\n");
368
369         fflush(stdout);
370 }
371
372 static void
373 resp_begin_html(int code, const char *msg)
374 {
375
376         resp_begin_http(code, msg);
377
378         printf("<!DOCTYPE HTML PUBLIC "
379                " \"-//W3C//DTD HTML 4.01//EN\""
380                " \"http://www.w3.org/TR/html4/strict.dtd\">\n"
381                "<HTML>\n"
382                "<HEAD>\n"
383                "<META HTTP-EQUIV=\"Content-Type\""
384                " CONTENT=\"text/html; charset=utf-8\">\n"
385                "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
386                " TYPE=\"text/css\" media=\"all\">\n"
387                "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
388                " TYPE=\"text/css\" media=\"all\">\n"
389                "<TITLE>%s</TITLE>\n"
390                "</HEAD>\n"
391                "<BODY>\n"
392                "<!-- Begin page content. //-->\n",
393                CSS_DIR, CSS_DIR, CUSTOMIZE_TITLE);
394 }
395
396 static void
397 resp_end_html(void)
398 {
399
400         puts("</BODY>\n"
401              "</HTML>");
402 }
403
404 static void
405 resp_searchform(const struct req *req)
406 {
407         int              i;
408
409         puts(CUSTOMIZE_BEGIN);
410         puts("<!-- Begin search form. //-->");
411         printf("<DIV ID=\"mancgi\">\n"
412                "<FORM ACTION=\"%s\" METHOD=\"get\">\n"
413                "<FIELDSET>\n"
414                "<LEGEND>Manual Page Search Parameters</LEGEND>\n",
415                scriptname);
416
417         /* Write query input box. */
418
419         printf( "<TABLE><TR><TD>\n"
420                 "<INPUT TYPE=\"text\" NAME=\"query\" VALUE=\"");
421         if (NULL != req->q.query)
422                 html_print(req->q.query);
423         puts("\" SIZE=\"40\">");
424
425         /* Write submission and reset buttons. */
426
427         printf( "<INPUT TYPE=\"submit\" VALUE=\"Submit\">\n"
428                 "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n");
429
430         /* Write show radio button */
431
432         printf( "</TD><TD>\n"
433                 "<INPUT TYPE=\"radio\" ");
434         if (req->q.equal)
435                 printf("CHECKED=\"checked\" ");
436         printf( "NAME=\"apropos\" ID=\"show\" VALUE=\"0\">\n"
437                 "<LABEL FOR=\"show\">Show named manual page</LABEL>\n");
438
439         /* Write section selector. */
440
441         puts(   "</TD></TR><TR><TD>\n"
442                 "<SELECT NAME=\"sec\">");
443         for (i = 0; i < sec_MAX; i++) {
444                 printf("<OPTION VALUE=\"%s\"", sec_numbers[i]);
445                 if (NULL != req->q.sec &&
446                     0 == strcmp(sec_numbers[i], req->q.sec))
447                         printf(" SELECTED=\"selected\"");
448                 printf(">%s</OPTION>\n", sec_names[i]);
449         }
450         puts("</SELECT>");
451
452         /* Write architecture selector. */
453
454         printf( "<SELECT NAME=\"arch\">\n"
455                 "<OPTION VALUE=\"default\"");
456         if (NULL == req->q.arch)
457                 printf(" SELECTED=\"selected\"");
458         puts(">All Architectures</OPTION>");
459         for (i = 0; i < arch_MAX; i++) {
460                 printf("<OPTION VALUE=\"%s\"", arch_names[i]);
461                 if (NULL != req->q.arch &&
462                     0 == strcmp(arch_names[i], req->q.arch))
463                         printf(" SELECTED=\"selected\"");
464                 printf(">%s</OPTION>\n", arch_names[i]);
465         }
466         puts("</SELECT>");
467
468         /* Write manpath selector. */
469
470         if (req->psz > 1) {
471                 puts("<SELECT NAME=\"manpath\">");
472                 for (i = 0; i < (int)req->psz; i++) {
473                         printf("<OPTION ");
474                         if (NULL == req->q.manpath ? 0 == i :
475                             0 == strcmp(req->q.manpath, req->p[i]))
476                                 printf("SELECTED=\"selected\" ");
477                         printf("VALUE=\"");
478                         html_print(req->p[i]);
479                         printf("\">");
480                         html_print(req->p[i]);
481                         puts("</OPTION>");
482                 }
483                 puts("</SELECT>");
484         }
485
486         /* Write search radio button */
487
488         printf( "</TD><TD>\n"
489                 "<INPUT TYPE=\"radio\" ");
490         if (0 == req->q.equal)
491                 printf("CHECKED=\"checked\" ");
492         printf( "NAME=\"apropos\" ID=\"search\" VALUE=\"1\">\n"
493                 "<LABEL FOR=\"search\">Search with apropos query</LABEL>\n");
494
495         puts("</TD></TR></TABLE>\n"
496              "</FIELDSET>\n"
497              "</FORM>\n"
498              "</DIV>");
499         puts("<!-- End search form. //-->");
500 }
501
502 static int
503 validate_urifrag(const char *frag)
504 {
505
506         while ('\0' != *frag) {
507                 if ( ! (isalnum((unsigned char)*frag) ||
508                     '-' == *frag || '.' == *frag ||
509                     '/' == *frag || '_' == *frag))
510                         return(0);
511                 frag++;
512         }
513         return(1);
514 }
515
516 static int
517 validate_manpath(const struct req *req, const char* manpath)
518 {
519         size_t   i;
520
521         if ( ! strcmp(manpath, "mandoc"))
522                 return(1);
523
524         for (i = 0; i < req->psz; i++)
525                 if ( ! strcmp(manpath, req->p[i]))
526                         return(1);
527
528         return(0);
529 }
530
531 static int
532 validate_filename(const char *file)
533 {
534
535         if ('.' == file[0] && '/' == file[1])
536                 file += 2;
537
538         return ( ! (strstr(file, "../") || strstr(file, "/..") ||
539             (strncmp(file, "man", 3) && strncmp(file, "cat", 3))));
540 }
541
542 static void
543 pg_index(const struct req *req)
544 {
545
546         resp_begin_html(200, NULL);
547         resp_searchform(req);
548         printf("<P>\n"
549                "This web interface is documented in the\n"
550                "<A HREF=\"%s/mandoc/man8/man.cgi.8\">man.cgi</A>\n"
551                "manual, and the\n"
552                "<A HREF=\"%s/mandoc/man1/apropos.1\">apropos</A>\n"
553                "manual explains the query syntax.\n"
554                "</P>\n",
555                scriptname, scriptname);
556         resp_end_html();
557 }
558
559 static void
560 pg_noresult(const struct req *req, const char *msg)
561 {
562         resp_begin_html(200, NULL);
563         resp_searchform(req);
564         puts("<P>");
565         puts(msg);
566         puts("</P>");
567         resp_end_html();
568 }
569
570 static void
571 pg_error_badrequest(const char *msg)
572 {
573
574         resp_begin_html(400, "Bad Request");
575         puts("<H1>Bad Request</H1>\n"
576              "<P>\n");
577         puts(msg);
578         printf("Try again from the\n"
579                "<A HREF=\"%s\">main page</A>.\n"
580                "</P>", scriptname);
581         resp_end_html();
582 }
583
584 static void
585 pg_error_internal(void)
586 {
587         resp_begin_html(500, "Internal Server Error");
588         puts("<P>Internal Server Error</P>");
589         resp_end_html();
590 }
591
592 static void
593 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
594 {
595         char            *arch, *archend;
596         size_t           i, iuse, isec;
597         int              archprio, archpriouse;
598         int              prio, priouse;
599         char             sec;
600
601         for (i = 0; i < sz; i++) {
602                 if (validate_filename(r[i].file))
603                         continue;
604                 fprintf(stderr, "invalid filename %s in %s database\n",
605                     r[i].file, req->q.manpath);
606                 pg_error_internal();
607                 return;
608         }
609
610         if (1 == sz) {
611                 /*
612                  * If we have just one result, then jump there now
613                  * without any delay.
614                  */
615                 printf("Status: 303 See Other\r\n");
616                 printf("Location: http://%s%s/%s/%s?",
617                     HTTP_HOST, scriptname, req->q.manpath, r[0].file);
618                 http_printquery(req, "&");
619                 printf("\r\n"
620                      "Content-Type: text/html; charset=utf-8\r\n"
621                      "\r\n");
622                 return;
623         }
624
625         resp_begin_html(200, NULL);
626         resp_searchform(req);
627         puts("<DIV CLASS=\"results\">");
628         puts("<TABLE>");
629
630         for (i = 0; i < sz; i++) {
631                 printf("<TR>\n"
632                        "<TD CLASS=\"title\">\n"
633                        "<A HREF=\"%s/%s/%s?", 
634                     scriptname, req->q.manpath, r[i].file);
635                 http_printquery(req, "&amp;");
636                 printf("\">");
637                 html_print(r[i].names);
638                 printf("</A>\n"
639                        "</TD>\n"
640                        "<TD CLASS=\"desc\">");
641                 html_print(r[i].output);
642                 puts("</TD>\n"
643                      "</TR>");
644         }
645
646         puts("</TABLE>\n"
647              "</DIV>");
648
649         /*
650          * In man(1) mode, show one of the pages
651          * even if more than one is found.
652          */
653
654         if (req->q.equal) {
655                 puts("<HR>");
656                 iuse = 0;
657                 priouse = 10;
658                 archpriouse = 3;
659                 for (i = 0; i < sz; i++) {
660                         isec = strcspn(r[i].file, "123456789");
661                         sec = r[i].file[isec];
662                         if ('\0' == sec)
663                                 continue;
664                         prio = sec_prios[sec - '1'];
665                         if (NULL == req->q.arch) {
666                                 archprio =
667                                     (NULL == (arch = strchr(
668                                         r[i].file + isec, '/'))) ? 3 :
669                                     (NULL == (archend = strchr(
670                                         arch + 1, '/'))) ? 0 :
671                                     strncmp(arch, "amd64/",
672                                         archend - arch) ? 2 : 1;
673                                 if (archprio < archpriouse) {
674                                         archpriouse = archprio;
675                                         priouse = prio;
676                                         iuse = i;
677                                         continue;
678                                 }
679                                 if (archprio > archpriouse)
680                                         continue;
681                         }
682                         if (prio >= priouse)
683                                 continue;
684                         priouse = prio;
685                         iuse = i;
686                 }
687                 resp_show(req, r[iuse].file);
688         }
689
690         resp_end_html();
691 }
692
693 static void
694 catman(const struct req *req, const char *file)
695 {
696         FILE            *f;
697         size_t           len;
698         int              i;
699         char            *p;
700         int              italic, bold;
701
702         if (NULL == (f = fopen(file, "r"))) {
703                 puts("<P>You specified an invalid manual file.</P>");
704                 return;
705         }
706
707         puts("<DIV CLASS=\"catman\">\n"
708              "<PRE>");
709
710         while (NULL != (p = fgetln(f, &len))) {
711                 bold = italic = 0;
712                 for (i = 0; i < (int)len - 1; i++) {
713                         /* 
714                          * This means that the catpage is out of state.
715                          * Ignore it and keep going (although the
716                          * catpage is bogus).
717                          */
718
719                         if ('\b' == p[i] || '\n' == p[i])
720                                 continue;
721
722                         /*
723                          * Print a regular character.
724                          * Close out any bold/italic scopes.
725                          * If we're in back-space mode, make sure we'll
726                          * have something to enter when we backspace.
727                          */
728
729                         if ('\b' != p[i + 1]) {
730                                 if (italic)
731                                         printf("</I>");
732                                 if (bold)
733                                         printf("</B>");
734                                 italic = bold = 0;
735                                 html_putchar(p[i]);
736                                 continue;
737                         } else if (i + 2 >= (int)len)
738                                 continue;
739
740                         /* Italic mode. */
741
742                         if ('_' == p[i]) {
743                                 if (bold)
744                                         printf("</B>");
745                                 if ( ! italic)
746                                         printf("<I>");
747                                 bold = 0;
748                                 italic = 1;
749                                 i += 2;
750                                 html_putchar(p[i]);
751                                 continue;
752                         }
753
754                         /* 
755                          * Handle funny behaviour troff-isms.
756                          * These grok'd from the original man2html.c.
757                          */
758
759                         if (('+' == p[i] && 'o' == p[i + 2]) ||
760                                         ('o' == p[i] && '+' == p[i + 2]) ||
761                                         ('|' == p[i] && '=' == p[i + 2]) ||
762                                         ('=' == p[i] && '|' == p[i + 2]) ||
763                                         ('*' == p[i] && '=' == p[i + 2]) ||
764                                         ('=' == p[i] && '*' == p[i + 2]) ||
765                                         ('*' == p[i] && '|' == p[i + 2]) ||
766                                         ('|' == p[i] && '*' == p[i + 2]))  {
767                                 if (italic)
768                                         printf("</I>");
769                                 if (bold)
770                                         printf("</B>");
771                                 italic = bold = 0;
772                                 putchar('*');
773                                 i += 2;
774                                 continue;
775                         } else if (('|' == p[i] && '-' == p[i + 2]) ||
776                                         ('-' == p[i] && '|' == p[i + 1]) ||
777                                         ('+' == p[i] && '-' == p[i + 1]) ||
778                                         ('-' == p[i] && '+' == p[i + 1]) ||
779                                         ('+' == p[i] && '|' == p[i + 1]) ||
780                                         ('|' == p[i] && '+' == p[i + 1]))  {
781                                 if (italic)
782                                         printf("</I>");
783                                 if (bold)
784                                         printf("</B>");
785                                 italic = bold = 0;
786                                 putchar('+');
787                                 i += 2;
788                                 continue;
789                         }
790
791                         /* Bold mode. */
792                         
793                         if (italic)
794                                 printf("</I>");
795                         if ( ! bold)
796                                 printf("<B>");
797                         bold = 1;
798                         italic = 0;
799                         i += 2;
800                         html_putchar(p[i]);
801                 }
802
803                 /* 
804                  * Clean up the last character.
805                  * We can get to a newline; don't print that. 
806                  */
807
808                 if (italic)
809                         printf("</I>");
810                 if (bold)
811                         printf("</B>");
812
813                 if (i == (int)len - 1 && '\n' != p[i])
814                         html_putchar(p[i]);
815
816                 putchar('\n');
817         }
818
819         puts("</PRE>\n"
820              "</DIV>");
821
822         fclose(f);
823 }
824
825 static void
826 format(const struct req *req, const char *file)
827 {
828         struct mparse   *mp;
829         struct mdoc     *mdoc;
830         struct man      *man;
831         void            *vp;
832         char            *opts;
833         enum mandoclevel rc;
834         int              fd;
835         int              usepath;
836
837         if (-1 == (fd = open(file, O_RDONLY, 0))) {
838                 puts("<P>You specified an invalid manual file.</P>");
839                 return;
840         }
841
842         mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_FATAL, NULL,
843             req->q.manpath);
844         rc = mparse_readfd(mp, fd, file);
845         close(fd);
846
847         if (rc >= MANDOCLEVEL_FATAL) {
848                 fprintf(stderr, "fatal mandoc error: %s/%s\n",
849                     req->q.manpath, file);
850                 pg_error_internal();
851                 return;
852         }
853
854         usepath = strcmp(req->q.manpath, req->p[0]);
855         mandoc_asprintf(&opts,
856             "fragment,man=%s?query=%%N&sec=%%S%s%s%s%s",
857             scriptname,
858             req->q.arch ? "&arch="       : "",
859             req->q.arch ? req->q.arch    : "",
860             usepath     ? "&manpath="    : "",
861             usepath     ? req->q.manpath : "");
862
863         mparse_result(mp, &mdoc, &man, NULL);
864         if (NULL == man && NULL == mdoc) {
865                 fprintf(stderr, "fatal mandoc error: %s/%s\n",
866                     req->q.manpath, file);
867                 pg_error_internal();
868                 mparse_free(mp);
869                 return;
870         }
871
872         vp = html_alloc(opts);
873
874         if (NULL != mdoc)
875                 html_mdoc(vp, mdoc);
876         else
877                 html_man(vp, man);
878
879         html_free(vp);
880         mparse_free(mp);
881         free(opts);
882 }
883
884 static void
885 resp_show(const struct req *req, const char *file)
886 {
887
888         if ('.' == file[0] && '/' == file[1])
889                 file += 2;
890
891         if ('c' == *file)
892                 catman(req, file);
893         else
894                 format(req, file);
895 }
896
897 static void
898 pg_show(struct req *req, const char *fullpath)
899 {
900         char            *manpath;
901         const char      *file;
902
903         if ((file = strchr(fullpath, '/')) == NULL) {
904                 pg_error_badrequest(
905                     "You did not specify a page to show.");
906                 return;
907         } 
908         manpath = mandoc_strndup(fullpath, file - fullpath);
909         file++;
910
911         if ( ! validate_manpath(req, manpath)) {
912                 pg_error_badrequest(
913                     "You specified an invalid manpath.");
914                 free(manpath);
915                 return;
916         }
917
918         /*
919          * Begin by chdir()ing into the manpath.
920          * This way we can pick up the database files, which are
921          * relative to the manpath root.
922          */
923
924         if (chdir(manpath) == -1) {
925                 fprintf(stderr, "chdir %s: %s\n",
926                     manpath, strerror(errno));
927                 pg_error_internal();
928                 free(manpath);
929                 return;
930         }
931
932         if (strcmp(manpath, "mandoc")) {
933                 free(req->q.manpath);
934                 req->q.manpath = manpath;
935         } else
936                 free(manpath);
937
938         if ( ! validate_filename(file)) {
939                 pg_error_badrequest(
940                     "You specified an invalid manual file.");
941                 return;
942         }
943
944         resp_begin_html(200, NULL);
945         resp_searchform(req);
946         resp_show(req, file);
947         resp_end_html();
948 }
949
950 static void
951 pg_search(const struct req *req)
952 {
953         struct mansearch          search;
954         struct manpaths           paths;
955         struct manpage           *res;
956         char                    **cp;
957         const char               *ep, *start;
958         size_t                    ressz;
959         int                       i, sz;
960
961         /*
962          * Begin by chdir()ing into the root of the manpath.
963          * This way we can pick up the database files, which are
964          * relative to the manpath root.
965          */
966
967         if (-1 == (chdir(req->q.manpath))) {
968                 fprintf(stderr, "chdir %s: %s\n",
969                     req->q.manpath, strerror(errno));
970                 pg_error_internal();
971                 return;
972         }
973
974         search.arch = req->q.arch;
975         search.sec = req->q.sec;
976         search.deftype = req->q.equal ? TYPE_Nm : (TYPE_Nm | TYPE_Nd);
977         search.flags = req->q.equal ? MANSEARCH_MAN : 0;
978
979         paths.sz = 1;
980         paths.paths = mandoc_malloc(sizeof(char *));
981         paths.paths[0] = mandoc_strdup(".");
982
983         /*
984          * Poor man's tokenisation: just break apart by spaces.
985          * Yes, this is half-ass.  But it works for now.
986          */
987
988         ep = req->q.query;
989         while (ep && isspace((unsigned char)*ep))
990                 ep++;
991
992         sz = 0;
993         cp = NULL;
994         while (ep && '\0' != *ep) {
995                 cp = mandoc_reallocarray(cp, sz + 1, sizeof(char *));
996                 start = ep;
997                 while ('\0' != *ep && ! isspace((unsigned char)*ep))
998                         ep++;
999                 cp[sz] = mandoc_malloc((ep - start) + 1);
1000                 memcpy(cp[sz], start, ep - start);
1001                 cp[sz++][ep - start] = '\0';
1002                 while (isspace((unsigned char)*ep))
1003                         ep++;
1004         }
1005
1006         if (0 == mansearch(&search, &paths, sz, cp, "Nd", &res, &ressz))
1007                 pg_noresult(req, "You entered an invalid query.");
1008         else if (0 == ressz)
1009                 pg_noresult(req, "No results found.");
1010         else
1011                 pg_searchres(req, res, ressz);
1012
1013         for (i = 0; i < sz; i++)
1014                 free(cp[i]);
1015         free(cp);
1016
1017         for (i = 0; i < (int)ressz; i++) {
1018                 free(res[i].file);
1019                 free(res[i].names);
1020                 free(res[i].output);
1021         }
1022         free(res);
1023
1024         free(paths.paths[0]);
1025         free(paths.paths);
1026 }
1027
1028 int
1029 main(void)
1030 {
1031         struct req       req;
1032         const char      *path;
1033         const char      *querystring;
1034         int              i;
1035
1036         /* Scan our run-time environment. */
1037
1038         if (NULL == (scriptname = getenv("SCRIPT_NAME")))
1039                 scriptname = "";
1040
1041         if ( ! validate_urifrag(scriptname)) {
1042                 fprintf(stderr, "unsafe SCRIPT_NAME \"%s\"\n",
1043                     scriptname);
1044                 pg_error_internal();
1045                 return(EXIT_FAILURE);
1046         }
1047
1048         /*
1049          * First we change directory into the MAN_DIR so that
1050          * subsequent scanning for manpath directories is rooted
1051          * relative to the same position.
1052          */
1053
1054         if (-1 == chdir(MAN_DIR)) {
1055                 fprintf(stderr, "MAN_DIR: %s: %s\n",
1056                     MAN_DIR, strerror(errno));
1057                 pg_error_internal();
1058                 return(EXIT_FAILURE);
1059         } 
1060
1061         memset(&req, 0, sizeof(struct req));
1062         pathgen(&req);
1063
1064         /* Next parse out the query string. */
1065
1066         if (NULL != (querystring = getenv("QUERY_STRING")))
1067                 http_parse(&req, querystring);
1068
1069         if ( ! (NULL == req.q.manpath ||
1070             validate_manpath(&req, req.q.manpath))) {
1071                 pg_error_badrequest(
1072                     "You specified an invalid manpath.");
1073                 return(EXIT_FAILURE);
1074         }
1075
1076         if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1077                 pg_error_badrequest(
1078                     "You specified an invalid architecture.");
1079                 return(EXIT_FAILURE);
1080         }
1081
1082         /* Dispatch to the three different pages. */
1083
1084         path = getenv("PATH_INFO");
1085         if (NULL == path)
1086                 path = "";
1087         else if ('/' == *path)
1088                 path++;
1089
1090         if ('\0' != *path)
1091                 pg_show(&req, path);
1092         else if (NULL != req.q.query)
1093                 pg_search(&req);
1094         else
1095                 pg_index(&req);
1096
1097         free(req.q.manpath);
1098         free(req.q.arch);
1099         free(req.q.sec);
1100         free(req.q.query);
1101         for (i = 0; i < (int)req.psz; i++)
1102                 free(req.p[i]);
1103         free(req.p);
1104         return(EXIT_SUCCESS);
1105 }
1106
1107 /*
1108  * Scan for indexable paths.
1109  */
1110 static void
1111 pathgen(struct req *req)
1112 {
1113         FILE    *fp;
1114         char    *dp;
1115         size_t   dpsz;
1116
1117         if (NULL == (fp = fopen("manpath.conf", "r"))) {
1118                 fprintf(stderr, "%s/manpath.conf: %s\n",
1119                         MAN_DIR, strerror(errno));
1120                 pg_error_internal();
1121                 exit(EXIT_FAILURE);
1122         }
1123
1124         while (NULL != (dp = fgetln(fp, &dpsz))) {
1125                 if ('\n' == dp[dpsz - 1])
1126                         dpsz--;
1127                 req->p = mandoc_realloc(req->p,
1128                     (req->psz + 1) * sizeof(char *));
1129                 dp = mandoc_strndup(dp, dpsz);
1130                 if ( ! validate_urifrag(dp)) {
1131                         fprintf(stderr, "%s/manpath.conf contains "
1132                             "unsafe path \"%s\"\n", MAN_DIR, dp);
1133                         pg_error_internal();
1134                         exit(EXIT_FAILURE);
1135                 }
1136                 if (NULL != strchr(dp, '/')) {
1137                         fprintf(stderr, "%s/manpath.conf contains "
1138                             "path with slash \"%s\"\n", MAN_DIR, dp);
1139                         pg_error_internal();
1140                         exit(EXIT_FAILURE);
1141                 }
1142                 req->p[req->psz++] = dp;
1143         }
1144
1145         if ( req->p == NULL ) {
1146                 fprintf(stderr, "%s/manpath.conf is empty\n", MAN_DIR);
1147                 pg_error_internal();
1148                 exit(EXIT_FAILURE);
1149         }
1150 }