Merge branch 'vendor/MDOCML'
[dragonfly.git] / contrib / mdocml / cgi.c
1 /*      $Id: cgi.c,v 1.45 2013/06/05 02:00:26 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/wait.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <errno.h>
26 #include <dirent.h>
27 #include <fcntl.h>
28 #include <limits.h>
29 #include <regex.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36
37 #include "apropos_db.h"
38 #include "mandoc.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "main.h"
42 #include "manpath.h"
43 #include "mandocdb.h"
44
45 #ifdef __linux__
46 # include <db_185.h>
47 #else
48 # include <db.h>
49 #endif
50
51 enum    page {
52         PAGE_INDEX,
53         PAGE_SEARCH,
54         PAGE_SHOW,
55         PAGE__MAX
56 };
57
58 struct  paths {
59         char            *name;
60         char            *path;
61 };
62
63 /*
64  * A query as passed to the search function.
65  */
66 struct  query {
67         const char      *arch; /* architecture */
68         const char      *sec; /* manual section */
69         const char      *expr; /* unparsed expression string */
70         int              manroot; /* manroot index (or -1)*/
71         int              legacy; /* whether legacy mode */
72 };
73
74 struct  req {
75         struct query     q;
76         struct paths    *p;
77         size_t           psz;
78         enum page        page;
79 };
80
81 static  int              atou(const char *, unsigned *);
82 static  void             catman(const struct req *, const char *);
83 static  int              cmp(const void *, const void *);
84 static  void             format(const struct req *, const char *);
85 static  void             html_print(const char *);
86 static  void             html_printquery(const struct req *);
87 static  void             html_putchar(char);
88 static  int              http_decode(char *);
89 static  void             http_parse(struct req *, char *);
90 static  void             http_print(const char *);
91 static  void             http_putchar(char);
92 static  void             http_printquery(const struct req *);
93 static  int              pathstop(DIR *);
94 static  void             pathgen(DIR *, char *, struct req *);
95 static  void             pg_index(const struct req *, char *);
96 static  void             pg_search(const struct req *, char *);
97 static  void             pg_show(const struct req *, char *);
98 static  void             resp_bad(void);
99 static  void             resp_baddb(void);
100 static  void             resp_error400(void);
101 static  void             resp_error404(const char *);
102 static  void             resp_begin_html(int, const char *);
103 static  void             resp_begin_http(int, const char *);
104 static  void             resp_end_html(void);
105 static  void             resp_index(const struct req *);
106 static  void             resp_search(struct res *, size_t, void *);
107 static  void             resp_searchform(const struct req *);
108
109 static  const char       *progname; /* cgi script name */
110 static  const char       *cache; /* cache directory */
111 static  const char       *css; /* css directory */
112 static  const char       *host; /* hostname */
113
114 static  const char * const pages[PAGE__MAX] = {
115         "index", /* PAGE_INDEX */ 
116         "search", /* PAGE_SEARCH */
117         "show", /* PAGE_SHOW */
118 };
119
120 /*
121  * This is just OpenBSD's strtol(3) suggestion.
122  * I use it instead of strtonum(3) for portability's sake.
123  */
124 static int
125 atou(const char *buf, unsigned *v)
126 {
127         char            *ep;
128         long             lval;
129
130         errno = 0;
131         lval = strtol(buf, &ep, 10);
132         if (buf[0] == '\0' || *ep != '\0')
133                 return(0);
134         if ((errno == ERANGE && (lval == LONG_MAX || 
135                                         lval == LONG_MIN)) ||
136                         (lval > INT_MAX || lval < 0))
137                 return(0);
138
139         *v = (unsigned int)lval;
140         return(1);
141 }
142
143 /*
144  * Print a character, escaping HTML along the way.
145  * This will pass non-ASCII straight to output: be warned!
146  */
147 static void
148 html_putchar(char c)
149 {
150
151         switch (c) {
152         case ('"'):
153                 printf("&quote;");
154                 break;
155         case ('&'):
156                 printf("&amp;");
157                 break;
158         case ('>'):
159                 printf("&gt;");
160                 break;
161         case ('<'):
162                 printf("&lt;");
163                 break;
164         default:
165                 putchar((unsigned char)c);
166                 break;
167         }
168 }
169 static void
170 http_printquery(const struct req *req)
171 {
172
173         printf("&expr=");
174         http_print(req->q.expr ? req->q.expr : "");
175         printf("&sec=");
176         http_print(req->q.sec ? req->q.sec : "");
177         printf("&arch=");
178         http_print(req->q.arch ? req->q.arch : "");
179 }
180
181
182 static void
183 html_printquery(const struct req *req)
184 {
185
186         printf("&amp;expr=");
187         html_print(req->q.expr ? req->q.expr : "");
188         printf("&amp;sec=");
189         html_print(req->q.sec ? req->q.sec : "");
190         printf("&amp;arch=");
191         html_print(req->q.arch ? req->q.arch : "");
192 }
193
194 static void
195 http_print(const char *p)
196 {
197
198         if (NULL == p)
199                 return;
200         while ('\0' != *p)
201                 http_putchar(*p++);
202 }
203
204 /*
205  * Call through to html_putchar().
206  * Accepts NULL strings.
207  */
208 static void
209 html_print(const char *p)
210 {
211         
212         if (NULL == p)
213                 return;
214         while ('\0' != *p)
215                 html_putchar(*p++);
216 }
217
218 /*
219  * Parse out key-value pairs from an HTTP request variable.
220  * This can be either a cookie or a POST/GET string, although man.cgi
221  * uses only GET for simplicity.
222  */
223 static void
224 http_parse(struct req *req, char *p)
225 {
226         char            *key, *val, *manroot;
227         int              i, legacy;
228
229         memset(&req->q, 0, sizeof(struct query));
230
231         legacy = -1;
232         manroot = NULL;
233
234         while ('\0' != *p) {
235                 key = p;
236                 val = NULL;
237
238                 p += (int)strcspn(p, ";&");
239                 if ('\0' != *p)
240                         *p++ = '\0';
241                 if (NULL != (val = strchr(key, '=')))
242                         *val++ = '\0';
243
244                 if ('\0' == *key || NULL == val || '\0' == *val)
245                         continue;
246
247                 /* Just abort handling. */
248
249                 if ( ! http_decode(key))
250                         break;
251                 if (NULL != val && ! http_decode(val))
252                         break;
253
254                 if (0 == strcmp(key, "expr"))
255                         req->q.expr = val;
256                 else if (0 == strcmp(key, "query"))
257                         req->q.expr = val;
258                 else if (0 == strcmp(key, "sec"))
259                         req->q.sec = val;
260                 else if (0 == strcmp(key, "sektion"))
261                         req->q.sec = val;
262                 else if (0 == strcmp(key, "arch"))
263                         req->q.arch = val;
264                 else if (0 == strcmp(key, "manpath"))
265                         manroot = val;
266                 else if (0 == strcmp(key, "apropos"))
267                         legacy = 0 == strcmp(val, "0");
268         }
269
270         /* Test for old man.cgi compatibility mode. */
271
272         req->q.legacy = legacy > 0;
273
274         /* 
275          * Section "0" means no section when in legacy mode.
276          * For some man.cgi scripts, "default" arch is none.
277          */
278
279         if (req->q.legacy && NULL != req->q.sec)
280                 if (0 == strcmp(req->q.sec, "0"))
281                         req->q.sec = NULL;
282         if (req->q.legacy && NULL != req->q.arch)
283                 if (0 == strcmp(req->q.arch, "default"))
284                         req->q.arch = NULL;
285
286         /* Default to first manroot. */
287
288         if (NULL != manroot) {
289                 for (i = 0; i < (int)req->psz; i++)
290                         if (0 == strcmp(req->p[i].name, manroot))
291                                 break;
292                 req->q.manroot = i < (int)req->psz ? i : -1;
293         }
294 }
295
296 static void
297 http_putchar(char c)
298 {
299
300         if (isalnum((unsigned char)c)) {
301                 putchar((unsigned char)c);
302                 return;
303         } else if (' ' == c) {
304                 putchar('+');
305                 return;
306         }
307         printf("%%%.2x", c);
308 }
309
310 /*
311  * HTTP-decode a string.  The standard explanation is that this turns
312  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
313  * over the allocated string.
314  */
315 static int
316 http_decode(char *p)
317 {
318         char             hex[3];
319         int              c;
320
321         hex[2] = '\0';
322
323         for ( ; '\0' != *p; p++) {
324                 if ('%' == *p) {
325                         if ('\0' == (hex[0] = *(p + 1)))
326                                 return(0);
327                         if ('\0' == (hex[1] = *(p + 2)))
328                                 return(0);
329                         if (1 != sscanf(hex, "%x", &c))
330                                 return(0);
331                         if ('\0' == c)
332                                 return(0);
333
334                         *p = (char)c;
335                         memmove(p + 1, p + 3, strlen(p + 3) + 1);
336                 } else
337                         *p = '+' == *p ? ' ' : *p;
338         }
339
340         *p = '\0';
341         return(1);
342 }
343
344 static void
345 resp_begin_http(int code, const char *msg)
346 {
347
348         if (200 != code)
349                 printf("Status: %d %s\n", code, msg);
350
351         puts("Content-Type: text/html; charset=utf-8\n"
352              "Cache-Control: no-cache\n"
353              "Pragma: no-cache\n"
354              "");
355
356         fflush(stdout);
357 }
358
359 static void
360 resp_begin_html(int code, const char *msg)
361 {
362
363         resp_begin_http(code, msg);
364
365         printf("<!DOCTYPE HTML PUBLIC "
366                " \"-//W3C//DTD HTML 4.01//EN\""
367                " \"http://www.w3.org/TR/html4/strict.dtd\">\n"
368                "<HTML>\n"
369                "<HEAD>\n"
370                "<META HTTP-EQUIV=\"Content-Type\""
371                " CONTENT=\"text/html; charset=utf-8\">\n"
372                "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
373                " TYPE=\"text/css\" media=\"all\">\n"
374                "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
375                " TYPE=\"text/css\" media=\"all\">\n"
376                "<TITLE>System Manpage Reference</TITLE>\n"
377                "</HEAD>\n"
378                "<BODY>\n"
379                "<!-- Begin page content. //-->\n", css, css);
380 }
381
382 static void
383 resp_end_html(void)
384 {
385
386         puts("</BODY>\n"
387              "</HTML>");
388 }
389
390 static void
391 resp_searchform(const struct req *req)
392 {
393         int              i;
394
395         puts("<!-- Begin search form. //-->");
396         printf("<DIV ID=\"mancgi\">\n"
397                "<FORM ACTION=\"%s/search.html\" METHOD=\"get\">\n"
398                "<FIELDSET>\n"
399                "<LEGEND>Search Parameters</LEGEND>\n"
400                "<INPUT TYPE=\"submit\" "
401                " VALUE=\"Search\"> for manuals satisfying \n"
402                "<INPUT TYPE=\"text\" NAME=\"expr\" VALUE=\"",
403                progname);
404         html_print(req->q.expr ? req->q.expr : "");
405         printf("\">, section "
406                "<INPUT TYPE=\"text\""
407                " SIZE=\"4\" NAME=\"sec\" VALUE=\"");
408         html_print(req->q.sec ? req->q.sec : "");
409         printf("\">, arch "
410                "<INPUT TYPE=\"text\""
411                " SIZE=\"8\" NAME=\"arch\" VALUE=\"");
412         html_print(req->q.arch ? req->q.arch : "");
413         printf("\">");
414         if (req->psz > 1) {
415                 puts(", <SELECT NAME=\"manpath\">");
416                 for (i = 0; i < (int)req->psz; i++) {
417                         printf("<OPTION %s VALUE=\"",
418                                 (i == req->q.manroot) ||
419                                 (0 == i && -1 == req->q.manroot) ?
420                                 "SELECTED=\"selected\"" : "");
421                         html_print(req->p[i].name);
422                         printf("\">");
423                         html_print(req->p[i].name);
424                         puts("</OPTION>");
425                 }
426                 puts("</SELECT>");
427         }
428         puts(".\n"
429              "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n"
430              "</FIELDSET>\n"
431              "</FORM>\n"
432              "</DIV>");
433         puts("<!-- End search form. //-->");
434 }
435
436 static void
437 resp_index(const struct req *req)
438 {
439
440         resp_begin_html(200, NULL);
441         resp_searchform(req);
442         resp_end_html();
443 }
444
445 static void
446 resp_error400(void)
447 {
448
449         resp_begin_html(400, "Query Malformed");
450         printf("<H1>Malformed Query</H1>\n"
451                "<P>\n"
452                "The query your entered was malformed.\n"
453                "Try again from the\n"
454                "<A HREF=\"%s/index.html\">main page</A>.\n"
455                "</P>", progname);
456         resp_end_html();
457 }
458
459 static void
460 resp_error404(const char *page)
461 {
462
463         resp_begin_html(404, "Not Found");
464         puts("<H1>Page Not Found</H1>\n"
465              "<P>\n"
466              "The page you're looking for, ");
467         printf("<B>");
468         html_print(page);
469         printf("</B>,\n"
470                "could not be found.\n"
471                "Try searching from the\n"
472                "<A HREF=\"%s/index.html\">main page</A>.\n"
473                "</P>", progname);
474         resp_end_html();
475 }
476
477 static void
478 resp_bad(void)
479 {
480         resp_begin_html(500, "Internal Server Error");
481         puts("<P>Generic badness happened.</P>");
482         resp_end_html();
483 }
484
485 static void
486 resp_baddb(void)
487 {
488
489         resp_begin_html(500, "Internal Server Error");
490         puts("<P>Your database is broken.</P>");
491         resp_end_html();
492 }
493
494 static void
495 resp_search(struct res *r, size_t sz, void *arg)
496 {
497         size_t           i, matched;
498         const struct req *req;
499
500         req = (const struct req *)arg;
501
502         if (sz > 0)
503                 assert(req->q.manroot >= 0);
504
505         for (matched = i = 0; i < sz; i++)
506                 if (r[i].matched)
507                         matched++;
508         
509         if (1 == matched) {
510                 for (i = 0; i < sz; i++)
511                         if (r[i].matched)
512                                 break;
513                 /*
514                  * If we have just one result, then jump there now
515                  * without any delay.
516                  */
517                 puts("Status: 303 See Other");
518                 printf("Location: http://%s%s/show/%d/%u/%u.html?",
519                                 host, progname, req->q.manroot,
520                                 r[i].volume, r[i].rec);
521                 http_printquery(req);
522                 puts("\n"
523                      "Content-Type: text/html; charset=utf-8\n");
524                 return;
525         }
526
527         resp_begin_html(200, NULL);
528         resp_searchform(req);
529
530         puts("<DIV CLASS=\"results\">");
531
532         if (0 == matched) {
533                 puts("<P>\n"
534                      "No results found.\n"
535                      "</P>\n"
536                      "</DIV>");
537                 resp_end_html();
538                 return;
539         }
540
541         qsort(r, sz, sizeof(struct res), cmp);
542
543         puts("<TABLE>");
544
545         for (i = 0; i < sz; i++) {
546                 if ( ! r[i].matched)
547                         continue;
548                 printf("<TR>\n"
549                        "<TD CLASS=\"title\">\n"
550                        "<A HREF=\"%s/show/%d/%u/%u.html?", 
551                                 progname, req->q.manroot,
552                                 r[i].volume, r[i].rec);
553                 html_printquery(req);
554                 printf("\">");
555                 html_print(r[i].title);
556                 putchar('(');
557                 html_print(r[i].cat);
558                 if (r[i].arch && '\0' != *r[i].arch) {
559                         putchar('/');
560                         html_print(r[i].arch);
561                 }
562                 printf(")</A>\n"
563                        "</TD>\n"
564                        "<TD CLASS=\"desc\">");
565                 html_print(r[i].desc);
566                 puts("</TD>\n"
567                      "</TR>");
568         }
569
570         puts("</TABLE>\n"
571              "</DIV>");
572         resp_end_html();
573 }
574
575 /* ARGSUSED */
576 static void
577 pg_index(const struct req *req, char *path)
578 {
579
580         resp_index(req);
581 }
582
583 static void
584 catman(const struct req *req, const char *file)
585 {
586         FILE            *f;
587         size_t           len;
588         int              i;
589         char            *p;
590         int              italic, bold;
591
592         if (NULL == (f = fopen(file, "r"))) {
593                 resp_baddb();
594                 return;
595         }
596
597         resp_begin_html(200, NULL);
598         resp_searchform(req);
599         puts("<DIV CLASS=\"catman\">\n"
600              "<PRE>");
601
602         while (NULL != (p = fgetln(f, &len))) {
603                 bold = italic = 0;
604                 for (i = 0; i < (int)len - 1; i++) {
605                         /* 
606                          * This means that the catpage is out of state.
607                          * Ignore it and keep going (although the
608                          * catpage is bogus).
609                          */
610
611                         if ('\b' == p[i] || '\n' == p[i])
612                                 continue;
613
614                         /*
615                          * Print a regular character.
616                          * Close out any bold/italic scopes.
617                          * If we're in back-space mode, make sure we'll
618                          * have something to enter when we backspace.
619                          */
620
621                         if ('\b' != p[i + 1]) {
622                                 if (italic)
623                                         printf("</I>");
624                                 if (bold)
625                                         printf("</B>");
626                                 italic = bold = 0;
627                                 html_putchar(p[i]);
628                                 continue;
629                         } else if (i + 2 >= (int)len)
630                                 continue;
631
632                         /* Italic mode. */
633
634                         if ('_' == p[i]) {
635                                 if (bold)
636                                         printf("</B>");
637                                 if ( ! italic)
638                                         printf("<I>");
639                                 bold = 0;
640                                 italic = 1;
641                                 i += 2;
642                                 html_putchar(p[i]);
643                                 continue;
644                         }
645
646                         /* 
647                          * Handle funny behaviour troff-isms.
648                          * These grok'd from the original man2html.c.
649                          */
650
651                         if (('+' == p[i] && 'o' == p[i + 2]) ||
652                                         ('o' == p[i] && '+' == p[i + 2]) ||
653                                         ('|' == p[i] && '=' == p[i + 2]) ||
654                                         ('=' == p[i] && '|' == p[i + 2]) ||
655                                         ('*' == p[i] && '=' == p[i + 2]) ||
656                                         ('=' == p[i] && '*' == p[i + 2]) ||
657                                         ('*' == p[i] && '|' == p[i + 2]) ||
658                                         ('|' == p[i] && '*' == p[i + 2]))  {
659                                 if (italic)
660                                         printf("</I>");
661                                 if (bold)
662                                         printf("</B>");
663                                 italic = bold = 0;
664                                 putchar('*');
665                                 i += 2;
666                                 continue;
667                         } else if (('|' == p[i] && '-' == p[i + 2]) ||
668                                         ('-' == p[i] && '|' == p[i + 1]) ||
669                                         ('+' == p[i] && '-' == p[i + 1]) ||
670                                         ('-' == p[i] && '+' == p[i + 1]) ||
671                                         ('+' == p[i] && '|' == p[i + 1]) ||
672                                         ('|' == p[i] && '+' == p[i + 1]))  {
673                                 if (italic)
674                                         printf("</I>");
675                                 if (bold)
676                                         printf("</B>");
677                                 italic = bold = 0;
678                                 putchar('+');
679                                 i += 2;
680                                 continue;
681                         }
682
683                         /* Bold mode. */
684                         
685                         if (italic)
686                                 printf("</I>");
687                         if ( ! bold)
688                                 printf("<B>");
689                         bold = 1;
690                         italic = 0;
691                         i += 2;
692                         html_putchar(p[i]);
693                 }
694
695                 /* 
696                  * Clean up the last character.
697                  * We can get to a newline; don't print that. 
698                  */
699
700                 if (italic)
701                         printf("</I>");
702                 if (bold)
703                         printf("</B>");
704
705                 if (i == (int)len - 1 && '\n' != p[i])
706                         html_putchar(p[i]);
707
708                 putchar('\n');
709         }
710
711         puts("</PRE>\n"
712              "</DIV>\n"
713              "</BODY>\n"
714              "</HTML>");
715
716         fclose(f);
717 }
718
719 static void
720 format(const struct req *req, const char *file)
721 {
722         struct mparse   *mp;
723         int              fd;
724         struct mdoc     *mdoc;
725         struct man      *man;
726         void            *vp;
727         enum mandoclevel rc;
728         char             opts[PATH_MAX + 128];
729
730         if (-1 == (fd = open(file, O_RDONLY, 0))) {
731                 resp_baddb();
732                 return;
733         }
734
735         mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL);
736         rc = mparse_readfd(mp, fd, file);
737         close(fd);
738
739         if (rc >= MANDOCLEVEL_FATAL) {
740                 resp_baddb();
741                 return;
742         }
743
744         snprintf(opts, sizeof(opts), "fragment,"
745                         "man=%s/search.html?sec=%%S&expr=Nm~^%%N$,"
746                         /*"includes=/cgi-bin/man.cgi/usr/include/%%I"*/,
747                         progname);
748
749         mparse_result(mp, &mdoc, &man);
750         if (NULL == man && NULL == mdoc) {
751                 resp_baddb();
752                 mparse_free(mp);
753                 return;
754         }
755
756         resp_begin_html(200, NULL);
757         resp_searchform(req);
758
759         vp = html_alloc(opts);
760
761         if (NULL != mdoc)
762                 html_mdoc(vp, mdoc);
763         else
764                 html_man(vp, man);
765
766         puts("</BODY>\n"
767              "</HTML>");
768
769         html_free(vp);
770         mparse_free(mp);
771 }
772
773 static void
774 pg_show(const struct req *req, char *path)
775 {
776         struct manpaths  ps;
777         size_t           sz;
778         char            *sub;
779         char             file[PATH_MAX];
780         const char      *cp;
781         int              rc, catm;
782         unsigned int     vol, rec, mr;
783         DB              *idx;
784         DBT              key, val;
785
786         idx = NULL;
787
788         /* Parse out mroot, volume, and record from the path. */
789
790         if (NULL == path || NULL == (sub = strchr(path, '/'))) {
791                 resp_error400();
792                 return;
793         } 
794         *sub++ = '\0';
795         if ( ! atou(path, &mr)) {
796                 resp_error400();
797                 return;
798         }
799         path = sub;
800         if (NULL == (sub = strchr(path, '/'))) {
801                 resp_error400();
802                 return;
803         }
804         *sub++ = '\0';
805         if ( ! atou(path, &vol) || ! atou(sub, &rec)) {
806                 resp_error400();
807                 return;
808         } else if (mr >= (unsigned int)req->psz) {
809                 resp_error400();
810                 return;
811         }
812
813         /*
814          * Begin by chdir()ing into the manroot.
815          * This way we can pick up the database files, which are
816          * relative to the manpath root.
817          */
818
819         if (-1 == chdir(req->p[(int)mr].path)) {
820                 perror(req->p[(int)mr].path);
821                 resp_baddb();
822                 return;
823         }
824
825         memset(&ps, 0, sizeof(struct manpaths));
826         manpath_manconf(&ps, "etc/catman.conf");
827
828         if (vol >= (unsigned int)ps.sz) {
829                 resp_error400();
830                 goto out;
831         }
832
833         sz = strlcpy(file, ps.paths[vol], PATH_MAX);
834         assert(sz < PATH_MAX);
835         strlcat(file, "/", PATH_MAX);
836         strlcat(file, MANDOC_IDX, PATH_MAX);
837
838         /* Open the index recno(3) database. */
839
840         idx = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
841         if (NULL == idx) {
842                 perror(file);
843                 resp_baddb();
844                 goto out;
845         }
846
847         key.data = &rec;
848         key.size = 4;
849
850         if (0 != (rc = (*idx->get)(idx, &key, &val, 0))) {
851                 rc < 0 ? resp_baddb() : resp_error400();
852                 goto out;
853         } else if (0 == val.size) {
854                 resp_baddb();
855                 goto out;
856         }
857
858         cp = (char *)val.data;
859         catm = 'c' == *cp++;
860
861         if (NULL == memchr(cp, '\0', val.size - 1)) 
862                 resp_baddb();
863         else {
864                 file[(int)sz] = '\0';
865                 strlcat(file, "/", PATH_MAX);
866                 strlcat(file, cp, PATH_MAX);
867                 if (catm) 
868                         catman(req, file);
869                 else
870                         format(req, file);
871         }
872 out:
873         if (idx)
874                 (*idx->close)(idx);
875         manpath_free(&ps);
876 }
877
878 static void
879 pg_search(const struct req *req, char *path)
880 {
881         size_t            tt, ressz;
882         struct manpaths   ps;
883         int               i, sz, rc;
884         const char       *ep, *start;
885         struct res      *res;
886         char            **cp;
887         struct opts       opt;
888         struct expr      *expr;
889
890         if (req->q.manroot < 0 || 0 == req->psz) {
891                 resp_search(NULL, 0, (void *)req);
892                 return;
893         }
894
895         memset(&opt, 0, sizeof(struct opts));
896
897         ep       = req->q.expr;
898         opt.arch = req->q.arch;
899         opt.cat  = req->q.sec;
900         rc       = -1;
901         sz       = 0;
902         cp       = NULL;
903         ressz    = 0;
904         res      = NULL;
905
906         /*
907          * Begin by chdir()ing into the root of the manpath.
908          * This way we can pick up the database files, which are
909          * relative to the manpath root.
910          */
911
912         assert(req->q.manroot < (int)req->psz);
913         if (-1 == (chdir(req->p[req->q.manroot].path))) {
914                 perror(req->p[req->q.manroot].path);
915                 resp_search(NULL, 0, (void *)req);
916                 return;
917         }
918
919         memset(&ps, 0, sizeof(struct manpaths));
920         manpath_manconf(&ps, "etc/catman.conf");
921
922         /*
923          * Poor man's tokenisation: just break apart by spaces.
924          * Yes, this is half-ass.  But it works for now.
925          */
926
927         while (ep && isspace((unsigned char)*ep))
928                 ep++;
929
930         while (ep && '\0' != *ep) {
931                 cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *));
932                 start = ep;
933                 while ('\0' != *ep && ! isspace((unsigned char)*ep))
934                         ep++;
935                 cp[sz] = mandoc_malloc((ep - start) + 1);
936                 memcpy(cp[sz], start, ep - start);
937                 cp[sz++][ep - start] = '\0';
938                 while (isspace((unsigned char)*ep))
939                         ep++;
940         }
941
942         /*
943          * Pump down into apropos backend.
944          * The resp_search() function is called with the results.
945          */
946
947         expr = req->q.legacy ? 
948                 termcomp(sz, cp, &tt) : exprcomp(sz, cp, &tt);
949
950         if (NULL != expr)
951                 rc = apropos_search
952                         (ps.sz, ps.paths, &opt, expr, tt, 
953                          (void *)req, &ressz, &res, resp_search);
954
955         /* ...unless errors occured. */
956
957         if (0 == rc)
958                 resp_baddb();
959         else if (-1 == rc)
960                 resp_search(NULL, 0, NULL);
961
962         for (i = 0; i < sz; i++)
963                 free(cp[i]);
964
965         free(cp);
966         resfree(res, ressz);
967         exprfree(expr);
968         manpath_free(&ps);
969 }
970
971 int
972 main(void)
973 {
974         int              i;
975         char             buf[PATH_MAX];
976         DIR             *cwd;
977         struct req       req;
978         char            *p, *path, *subpath;
979
980         /* Scan our run-time environment. */
981
982         if (NULL == (cache = getenv("CACHE_DIR")))
983                 cache = "/cache/man.cgi";
984
985         if (NULL == (progname = getenv("SCRIPT_NAME")))
986                 progname = "";
987
988         if (NULL == (css = getenv("CSS_DIR")))
989                 css = "";
990
991         if (NULL == (host = getenv("HTTP_HOST")))
992                 host = "localhost";
993
994         /*
995          * First we change directory into the cache directory so that
996          * subsequent scanning for manpath directories is rooted
997          * relative to the same position.
998          */
999
1000         if (-1 == chdir(cache)) {
1001                 perror(cache);
1002                 resp_bad();
1003                 return(EXIT_FAILURE);
1004         } else if (NULL == (cwd = opendir(cache))) {
1005                 perror(cache);
1006                 resp_bad();
1007                 return(EXIT_FAILURE);
1008         } 
1009
1010         memset(&req, 0, sizeof(struct req));
1011
1012         strlcpy(buf, ".", PATH_MAX);
1013         pathgen(cwd, buf, &req);
1014         closedir(cwd);
1015
1016         /* Next parse out the query string. */
1017
1018         if (NULL != (p = getenv("QUERY_STRING")))
1019                 http_parse(&req, p);
1020
1021         /*
1022          * Now juggle paths to extract information.
1023          * We want to extract our filetype (the file suffix), the
1024          * initial path component, then the trailing component(s).
1025          * Start with leading subpath component. 
1026          */
1027
1028         subpath = path = NULL;
1029         req.page = PAGE__MAX;
1030
1031         if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path)
1032                 req.page = PAGE_INDEX;
1033
1034         if (NULL != path && '/' == *path && '\0' == *++path)
1035                 req.page = PAGE_INDEX;
1036
1037         /* Strip file suffix. */
1038
1039         if (NULL != path && NULL != (p = strrchr(path, '.')))
1040                 if (NULL != p && NULL == strchr(p, '/'))
1041                         *p++ = '\0';
1042
1043         /* Resolve subpath component. */
1044
1045         if (NULL != path && NULL != (subpath = strchr(path, '/')))
1046                 *subpath++ = '\0';
1047
1048         /* Map path into one we recognise. */
1049
1050         if (NULL != path && '\0' != *path)
1051                 for (i = 0; i < (int)PAGE__MAX; i++) 
1052                         if (0 == strcmp(pages[i], path)) {
1053                                 req.page = (enum page)i;
1054                                 break;
1055                         }
1056
1057         /* Route pages. */
1058
1059         switch (req.page) {
1060         case (PAGE_INDEX):
1061                 pg_index(&req, subpath);
1062                 break;
1063         case (PAGE_SEARCH):
1064                 pg_search(&req, subpath);
1065                 break;
1066         case (PAGE_SHOW):
1067                 pg_show(&req, subpath);
1068                 break;
1069         default:
1070                 resp_error404(path);
1071                 break;
1072         }
1073
1074         for (i = 0; i < (int)req.psz; i++) {
1075                 free(req.p[i].path);
1076                 free(req.p[i].name);
1077         }
1078
1079         free(req.p);
1080         return(EXIT_SUCCESS);
1081 }
1082
1083 static int
1084 cmp(const void *p1, const void *p2)
1085 {
1086
1087         return(strcasecmp(((const struct res *)p1)->title,
1088                                 ((const struct res *)p2)->title));
1089 }
1090
1091 /*
1092  * Check to see if an "etc" path consists of a catman.conf file.  If it
1093  * does, that means that the path contains a tree created by catman(8)
1094  * and should be used for indexing.
1095  */
1096 static int
1097 pathstop(DIR *dir)
1098 {
1099         struct dirent   *d;
1100
1101         while (NULL != (d = readdir(dir)))
1102                 if (DT_REG == d->d_type)
1103                         if (0 == strcmp(d->d_name, "catman.conf"))
1104                                 return(1);
1105
1106         return(0);
1107 }
1108
1109 /*
1110  * Scan for indexable paths.
1111  * This adds all paths with "etc/catman.conf" to the buffer.
1112  */
1113 static void
1114 pathgen(DIR *dir, char *path, struct req *req)
1115 {
1116         struct dirent   *d;
1117         char            *cp;
1118         DIR             *cd;
1119         int              rc;
1120         size_t           sz, ssz;
1121
1122         sz = strlcat(path, "/", PATH_MAX);
1123         if (sz >= PATH_MAX) {
1124                 fprintf(stderr, "%s: Path too long", path);
1125                 return;
1126         } 
1127
1128         /* 
1129          * First, scan for the "etc" directory.
1130          * If it's found, then see if it should cause us to stop.  This
1131          * happens when a catman.conf is found in the directory.
1132          */
1133
1134         rc = 0;
1135         while (0 == rc && NULL != (d = readdir(dir))) {
1136                 if (DT_DIR != d->d_type || strcmp(d->d_name, "etc"))
1137                         continue;
1138
1139                 path[(int)sz] = '\0';
1140                 ssz = strlcat(path, d->d_name, PATH_MAX);
1141
1142                 if (ssz >= PATH_MAX) {
1143                         fprintf(stderr, "%s: Path too long", path);
1144                         return;
1145                 } else if (NULL == (cd = opendir(path))) {
1146                         perror(path);
1147                         return;
1148                 } 
1149                 
1150                 rc = pathstop(cd);
1151                 closedir(cd);
1152         }
1153
1154         if (rc > 0) {
1155                 /* This also strips the trailing slash. */
1156                 path[(int)--sz] = '\0';
1157                 req->p = mandoc_realloc
1158                         (req->p, 
1159                          (req->psz + 1) * sizeof(struct paths));
1160                 /*
1161                  * Strip out the leading "./" unless we're just a ".",
1162                  * in which case use an empty string as our name.
1163                  */
1164                 req->p[(int)req->psz].path = mandoc_strdup(path);
1165                 req->p[(int)req->psz].name = 
1166                         cp = mandoc_strdup(path + (1 == sz ? 1 : 2));
1167                 req->psz++;
1168                 /* 
1169                  * The name is just the path with all the slashes taken
1170                  * out of it.  Simple but effective. 
1171                  */
1172                 for ( ; '\0' != *cp; cp++) 
1173                         if ('/' == *cp)
1174                                 *cp = ' ';
1175                 return;
1176         } 
1177
1178         /*
1179          * If no etc/catman.conf was found, recursively enter child
1180          * directory and continue scanning.
1181          */
1182
1183         rewinddir(dir);
1184         while (NULL != (d = readdir(dir))) {
1185                 if (DT_DIR != d->d_type || '.' == d->d_name[0])
1186                         continue;
1187
1188                 path[(int)sz] = '\0';
1189                 ssz = strlcat(path, d->d_name, PATH_MAX);
1190
1191                 if (ssz >= PATH_MAX) {
1192                         fprintf(stderr, "%s: Path too long", path);
1193                         return;
1194                 } else if (NULL == (cd = opendir(path))) {
1195                         perror(path);
1196                         return;
1197                 }
1198
1199                 pathgen(cd, path, req);
1200                 closedir(cd);
1201         }
1202 }