Merge branch 'vendor/OPENSSL'
[dragonfly.git] / contrib / mdocml / cgi.c
1 /*      $Id: cgi.c,v 1.46 2013/10/11 00:06:48 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/wait.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <errno.h>
26 #include <dirent.h>
27 #include <fcntl.h>
28 #include <limits.h>
29 #include <regex.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36
37 #if defined(__sun)
38 /* for stat() */
39 #include <fcntl.h>
40 #include <sys/types.h>
41 #include <sys/stat.h>
42 #endif
43
44 #include "apropos_db.h"
45 #include "mandoc.h"
46 #include "mdoc.h"
47 #include "man.h"
48 #include "main.h"
49 #include "manpath.h"
50 #include "mandocdb.h"
51
52 #if defined(__linux__) || defined(__sun)
53 # include <db_185.h>
54 #else
55 # include <db.h>
56 #endif
57
58 enum    page {
59         PAGE_INDEX,
60         PAGE_SEARCH,
61         PAGE_SHOW,
62         PAGE__MAX
63 };
64
65 struct  paths {
66         char            *name;
67         char            *path;
68 };
69
70 /*
71  * A query as passed to the search function.
72  */
73 struct  query {
74         const char      *arch; /* architecture */
75         const char      *sec; /* manual section */
76         const char      *expr; /* unparsed expression string */
77         int              manroot; /* manroot index (or -1)*/
78         int              legacy; /* whether legacy mode */
79 };
80
81 struct  req {
82         struct query     q;
83         struct paths    *p;
84         size_t           psz;
85         enum page        page;
86 };
87
88 static  int              atou(const char *, unsigned *);
89 static  void             catman(const struct req *, const char *);
90 static  int              cmp(const void *, const void *);
91 static  void             format(const struct req *, const char *);
92 static  void             html_print(const char *);
93 static  void             html_printquery(const struct req *);
94 static  void             html_putchar(char);
95 static  int              http_decode(char *);
96 static  void             http_parse(struct req *, char *);
97 static  void             http_print(const char *);
98 static  void             http_putchar(char);
99 static  void             http_printquery(const struct req *);
100 static  int              pathstop(DIR *);
101 static  void             pathgen(DIR *, char *, struct req *);
102 static  void             pg_index(const struct req *, char *);
103 static  void             pg_search(const struct req *, char *);
104 static  void             pg_show(const struct req *, char *);
105 static  void             resp_bad(void);
106 static  void             resp_baddb(void);
107 static  void             resp_error400(void);
108 static  void             resp_error404(const char *);
109 static  void             resp_begin_html(int, const char *);
110 static  void             resp_begin_http(int, const char *);
111 static  void             resp_end_html(void);
112 static  void             resp_index(const struct req *);
113 static  void             resp_search(struct res *, size_t, void *);
114 static  void             resp_searchform(const struct req *);
115
116 static  const char       *progname; /* cgi script name */
117 static  const char       *cache; /* cache directory */
118 static  const char       *css; /* css directory */
119 static  const char       *host; /* hostname */
120
121 static  const char * const pages[PAGE__MAX] = {
122         "index", /* PAGE_INDEX */ 
123         "search", /* PAGE_SEARCH */
124         "show", /* PAGE_SHOW */
125 };
126
127 /*
128  * This is just OpenBSD's strtol(3) suggestion.
129  * I use it instead of strtonum(3) for portability's sake.
130  */
131 static int
132 atou(const char *buf, unsigned *v)
133 {
134         char            *ep;
135         long             lval;
136
137         errno = 0;
138         lval = strtol(buf, &ep, 10);
139         if (buf[0] == '\0' || *ep != '\0')
140                 return(0);
141         if ((errno == ERANGE && (lval == LONG_MAX || 
142                                         lval == LONG_MIN)) ||
143                         (lval > INT_MAX || lval < 0))
144                 return(0);
145
146         *v = (unsigned int)lval;
147         return(1);
148 }
149
150 /*
151  * Print a character, escaping HTML along the way.
152  * This will pass non-ASCII straight to output: be warned!
153  */
154 static void
155 html_putchar(char c)
156 {
157
158         switch (c) {
159         case ('"'):
160                 printf("&quote;");
161                 break;
162         case ('&'):
163                 printf("&amp;");
164                 break;
165         case ('>'):
166                 printf("&gt;");
167                 break;
168         case ('<'):
169                 printf("&lt;");
170                 break;
171         default:
172                 putchar((unsigned char)c);
173                 break;
174         }
175 }
176 static void
177 http_printquery(const struct req *req)
178 {
179
180         printf("&expr=");
181         http_print(req->q.expr ? req->q.expr : "");
182         printf("&sec=");
183         http_print(req->q.sec ? req->q.sec : "");
184         printf("&arch=");
185         http_print(req->q.arch ? req->q.arch : "");
186 }
187
188
189 static void
190 html_printquery(const struct req *req)
191 {
192
193         printf("&amp;expr=");
194         html_print(req->q.expr ? req->q.expr : "");
195         printf("&amp;sec=");
196         html_print(req->q.sec ? req->q.sec : "");
197         printf("&amp;arch=");
198         html_print(req->q.arch ? req->q.arch : "");
199 }
200
201 static void
202 http_print(const char *p)
203 {
204
205         if (NULL == p)
206                 return;
207         while ('\0' != *p)
208                 http_putchar(*p++);
209 }
210
211 /*
212  * Call through to html_putchar().
213  * Accepts NULL strings.
214  */
215 static void
216 html_print(const char *p)
217 {
218         
219         if (NULL == p)
220                 return;
221         while ('\0' != *p)
222                 html_putchar(*p++);
223 }
224
225 /*
226  * Parse out key-value pairs from an HTTP request variable.
227  * This can be either a cookie or a POST/GET string, although man.cgi
228  * uses only GET for simplicity.
229  */
230 static void
231 http_parse(struct req *req, char *p)
232 {
233         char            *key, *val, *manroot;
234         int              i, legacy;
235
236         memset(&req->q, 0, sizeof(struct query));
237
238         legacy = -1;
239         manroot = NULL;
240
241         while ('\0' != *p) {
242                 key = p;
243                 val = NULL;
244
245                 p += (int)strcspn(p, ";&");
246                 if ('\0' != *p)
247                         *p++ = '\0';
248                 if (NULL != (val = strchr(key, '=')))
249                         *val++ = '\0';
250
251                 if ('\0' == *key || NULL == val || '\0' == *val)
252                         continue;
253
254                 /* Just abort handling. */
255
256                 if ( ! http_decode(key))
257                         break;
258                 if (NULL != val && ! http_decode(val))
259                         break;
260
261                 if (0 == strcmp(key, "expr"))
262                         req->q.expr = val;
263                 else if (0 == strcmp(key, "query"))
264                         req->q.expr = val;
265                 else if (0 == strcmp(key, "sec"))
266                         req->q.sec = val;
267                 else if (0 == strcmp(key, "sektion"))
268                         req->q.sec = val;
269                 else if (0 == strcmp(key, "arch"))
270                         req->q.arch = val;
271                 else if (0 == strcmp(key, "manpath"))
272                         manroot = val;
273                 else if (0 == strcmp(key, "apropos"))
274                         legacy = 0 == strcmp(val, "0");
275         }
276
277         /* Test for old man.cgi compatibility mode. */
278
279         req->q.legacy = legacy > 0;
280
281         /* 
282          * Section "0" means no section when in legacy mode.
283          * For some man.cgi scripts, "default" arch is none.
284          */
285
286         if (req->q.legacy && NULL != req->q.sec)
287                 if (0 == strcmp(req->q.sec, "0"))
288                         req->q.sec = NULL;
289         if (req->q.legacy && NULL != req->q.arch)
290                 if (0 == strcmp(req->q.arch, "default"))
291                         req->q.arch = NULL;
292
293         /* Default to first manroot. */
294
295         if (NULL != manroot) {
296                 for (i = 0; i < (int)req->psz; i++)
297                         if (0 == strcmp(req->p[i].name, manroot))
298                                 break;
299                 req->q.manroot = i < (int)req->psz ? i : -1;
300         }
301 }
302
303 static void
304 http_putchar(char c)
305 {
306
307         if (isalnum((unsigned char)c)) {
308                 putchar((unsigned char)c);
309                 return;
310         } else if (' ' == c) {
311                 putchar('+');
312                 return;
313         }
314         printf("%%%.2x", c);
315 }
316
317 /*
318  * HTTP-decode a string.  The standard explanation is that this turns
319  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
320  * over the allocated string.
321  */
322 static int
323 http_decode(char *p)
324 {
325         char             hex[3];
326         int              c;
327
328         hex[2] = '\0';
329
330         for ( ; '\0' != *p; p++) {
331                 if ('%' == *p) {
332                         if ('\0' == (hex[0] = *(p + 1)))
333                                 return(0);
334                         if ('\0' == (hex[1] = *(p + 2)))
335                                 return(0);
336                         if (1 != sscanf(hex, "%x", &c))
337                                 return(0);
338                         if ('\0' == c)
339                                 return(0);
340
341                         *p = (char)c;
342                         memmove(p + 1, p + 3, strlen(p + 3) + 1);
343                 } else
344                         *p = '+' == *p ? ' ' : *p;
345         }
346
347         *p = '\0';
348         return(1);
349 }
350
351 static void
352 resp_begin_http(int code, const char *msg)
353 {
354
355         if (200 != code)
356                 printf("Status: %d %s\n", code, msg);
357
358         puts("Content-Type: text/html; charset=utf-8\n"
359              "Cache-Control: no-cache\n"
360              "Pragma: no-cache\n"
361              "");
362
363         fflush(stdout);
364 }
365
366 static void
367 resp_begin_html(int code, const char *msg)
368 {
369
370         resp_begin_http(code, msg);
371
372         printf("<!DOCTYPE HTML PUBLIC "
373                " \"-//W3C//DTD HTML 4.01//EN\""
374                " \"http://www.w3.org/TR/html4/strict.dtd\">\n"
375                "<HTML>\n"
376                "<HEAD>\n"
377                "<META HTTP-EQUIV=\"Content-Type\""
378                " CONTENT=\"text/html; charset=utf-8\">\n"
379                "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
380                " TYPE=\"text/css\" media=\"all\">\n"
381                "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
382                " TYPE=\"text/css\" media=\"all\">\n"
383                "<TITLE>System Manpage Reference</TITLE>\n"
384                "</HEAD>\n"
385                "<BODY>\n"
386                "<!-- Begin page content. //-->\n", css, css);
387 }
388
389 static void
390 resp_end_html(void)
391 {
392
393         puts("</BODY>\n"
394              "</HTML>");
395 }
396
397 static void
398 resp_searchform(const struct req *req)
399 {
400         int              i;
401
402         puts("<!-- Begin search form. //-->");
403         printf("<DIV ID=\"mancgi\">\n"
404                "<FORM ACTION=\"%s/search.html\" METHOD=\"get\">\n"
405                "<FIELDSET>\n"
406                "<LEGEND>Search Parameters</LEGEND>\n"
407                "<INPUT TYPE=\"submit\" "
408                " VALUE=\"Search\"> for manuals satisfying \n"
409                "<INPUT TYPE=\"text\" NAME=\"expr\" VALUE=\"",
410                progname);
411         html_print(req->q.expr ? req->q.expr : "");
412         printf("\">, section "
413                "<INPUT TYPE=\"text\""
414                " SIZE=\"4\" NAME=\"sec\" VALUE=\"");
415         html_print(req->q.sec ? req->q.sec : "");
416         printf("\">, arch "
417                "<INPUT TYPE=\"text\""
418                " SIZE=\"8\" NAME=\"arch\" VALUE=\"");
419         html_print(req->q.arch ? req->q.arch : "");
420         printf("\">");
421         if (req->psz > 1) {
422                 puts(", <SELECT NAME=\"manpath\">");
423                 for (i = 0; i < (int)req->psz; i++) {
424                         printf("<OPTION %s VALUE=\"",
425                                 (i == req->q.manroot) ||
426                                 (0 == i && -1 == req->q.manroot) ?
427                                 "SELECTED=\"selected\"" : "");
428                         html_print(req->p[i].name);
429                         printf("\">");
430                         html_print(req->p[i].name);
431                         puts("</OPTION>");
432                 }
433                 puts("</SELECT>");
434         }
435         puts(".\n"
436              "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n"
437              "</FIELDSET>\n"
438              "</FORM>\n"
439              "</DIV>");
440         puts("<!-- End search form. //-->");
441 }
442
443 static void
444 resp_index(const struct req *req)
445 {
446
447         resp_begin_html(200, NULL);
448         resp_searchform(req);
449         resp_end_html();
450 }
451
452 static void
453 resp_error400(void)
454 {
455
456         resp_begin_html(400, "Query Malformed");
457         printf("<H1>Malformed Query</H1>\n"
458                "<P>\n"
459                "The query your entered was malformed.\n"
460                "Try again from the\n"
461                "<A HREF=\"%s/index.html\">main page</A>.\n"
462                "</P>", progname);
463         resp_end_html();
464 }
465
466 static void
467 resp_error404(const char *page)
468 {
469
470         resp_begin_html(404, "Not Found");
471         puts("<H1>Page Not Found</H1>\n"
472              "<P>\n"
473              "The page you're looking for, ");
474         printf("<B>");
475         html_print(page);
476         printf("</B>,\n"
477                "could not be found.\n"
478                "Try searching from the\n"
479                "<A HREF=\"%s/index.html\">main page</A>.\n"
480                "</P>", progname);
481         resp_end_html();
482 }
483
484 static void
485 resp_bad(void)
486 {
487         resp_begin_html(500, "Internal Server Error");
488         puts("<P>Generic badness happened.</P>");
489         resp_end_html();
490 }
491
492 static void
493 resp_baddb(void)
494 {
495
496         resp_begin_html(500, "Internal Server Error");
497         puts("<P>Your database is broken.</P>");
498         resp_end_html();
499 }
500
501 static void
502 resp_search(struct res *r, size_t sz, void *arg)
503 {
504         size_t           i, matched;
505         const struct req *req;
506
507         req = (const struct req *)arg;
508
509         if (sz > 0)
510                 assert(req->q.manroot >= 0);
511
512         for (matched = i = 0; i < sz; i++)
513                 if (r[i].matched)
514                         matched++;
515         
516         if (1 == matched) {
517                 for (i = 0; i < sz; i++)
518                         if (r[i].matched)
519                                 break;
520                 /*
521                  * If we have just one result, then jump there now
522                  * without any delay.
523                  */
524                 puts("Status: 303 See Other");
525                 printf("Location: http://%s%s/show/%d/%u/%u.html?",
526                                 host, progname, req->q.manroot,
527                                 r[i].volume, r[i].rec);
528                 http_printquery(req);
529                 puts("\n"
530                      "Content-Type: text/html; charset=utf-8\n");
531                 return;
532         }
533
534         resp_begin_html(200, NULL);
535         resp_searchform(req);
536
537         puts("<DIV CLASS=\"results\">");
538
539         if (0 == matched) {
540                 puts("<P>\n"
541                      "No results found.\n"
542                      "</P>\n"
543                      "</DIV>");
544                 resp_end_html();
545                 return;
546         }
547
548         qsort(r, sz, sizeof(struct res), cmp);
549
550         puts("<TABLE>");
551
552         for (i = 0; i < sz; i++) {
553                 if ( ! r[i].matched)
554                         continue;
555                 printf("<TR>\n"
556                        "<TD CLASS=\"title\">\n"
557                        "<A HREF=\"%s/show/%d/%u/%u.html?", 
558                                 progname, req->q.manroot,
559                                 r[i].volume, r[i].rec);
560                 html_printquery(req);
561                 printf("\">");
562                 html_print(r[i].title);
563                 putchar('(');
564                 html_print(r[i].cat);
565                 if (r[i].arch && '\0' != *r[i].arch) {
566                         putchar('/');
567                         html_print(r[i].arch);
568                 }
569                 printf(")</A>\n"
570                        "</TD>\n"
571                        "<TD CLASS=\"desc\">");
572                 html_print(r[i].desc);
573                 puts("</TD>\n"
574                      "</TR>");
575         }
576
577         puts("</TABLE>\n"
578              "</DIV>");
579         resp_end_html();
580 }
581
582 /* ARGSUSED */
583 static void
584 pg_index(const struct req *req, char *path)
585 {
586
587         resp_index(req);
588 }
589
590 static void
591 catman(const struct req *req, const char *file)
592 {
593         FILE            *f;
594         size_t           len;
595         int              i;
596         char            *p;
597         int              italic, bold;
598
599         if (NULL == (f = fopen(file, "r"))) {
600                 resp_baddb();
601                 return;
602         }
603
604         resp_begin_html(200, NULL);
605         resp_searchform(req);
606         puts("<DIV CLASS=\"catman\">\n"
607              "<PRE>");
608
609         while (NULL != (p = fgetln(f, &len))) {
610                 bold = italic = 0;
611                 for (i = 0; i < (int)len - 1; i++) {
612                         /* 
613                          * This means that the catpage is out of state.
614                          * Ignore it and keep going (although the
615                          * catpage is bogus).
616                          */
617
618                         if ('\b' == p[i] || '\n' == p[i])
619                                 continue;
620
621                         /*
622                          * Print a regular character.
623                          * Close out any bold/italic scopes.
624                          * If we're in back-space mode, make sure we'll
625                          * have something to enter when we backspace.
626                          */
627
628                         if ('\b' != p[i + 1]) {
629                                 if (italic)
630                                         printf("</I>");
631                                 if (bold)
632                                         printf("</B>");
633                                 italic = bold = 0;
634                                 html_putchar(p[i]);
635                                 continue;
636                         } else if (i + 2 >= (int)len)
637                                 continue;
638
639                         /* Italic mode. */
640
641                         if ('_' == p[i]) {
642                                 if (bold)
643                                         printf("</B>");
644                                 if ( ! italic)
645                                         printf("<I>");
646                                 bold = 0;
647                                 italic = 1;
648                                 i += 2;
649                                 html_putchar(p[i]);
650                                 continue;
651                         }
652
653                         /* 
654                          * Handle funny behaviour troff-isms.
655                          * These grok'd from the original man2html.c.
656                          */
657
658                         if (('+' == p[i] && 'o' == p[i + 2]) ||
659                                         ('o' == p[i] && '+' == p[i + 2]) ||
660                                         ('|' == p[i] && '=' == p[i + 2]) ||
661                                         ('=' == p[i] && '|' == p[i + 2]) ||
662                                         ('*' == p[i] && '=' == p[i + 2]) ||
663                                         ('=' == p[i] && '*' == p[i + 2]) ||
664                                         ('*' == p[i] && '|' == p[i + 2]) ||
665                                         ('|' == p[i] && '*' == p[i + 2]))  {
666                                 if (italic)
667                                         printf("</I>");
668                                 if (bold)
669                                         printf("</B>");
670                                 italic = bold = 0;
671                                 putchar('*');
672                                 i += 2;
673                                 continue;
674                         } else if (('|' == p[i] && '-' == p[i + 2]) ||
675                                         ('-' == p[i] && '|' == p[i + 1]) ||
676                                         ('+' == p[i] && '-' == p[i + 1]) ||
677                                         ('-' == p[i] && '+' == p[i + 1]) ||
678                                         ('+' == p[i] && '|' == p[i + 1]) ||
679                                         ('|' == p[i] && '+' == p[i + 1]))  {
680                                 if (italic)
681                                         printf("</I>");
682                                 if (bold)
683                                         printf("</B>");
684                                 italic = bold = 0;
685                                 putchar('+');
686                                 i += 2;
687                                 continue;
688                         }
689
690                         /* Bold mode. */
691                         
692                         if (italic)
693                                 printf("</I>");
694                         if ( ! bold)
695                                 printf("<B>");
696                         bold = 1;
697                         italic = 0;
698                         i += 2;
699                         html_putchar(p[i]);
700                 }
701
702                 /* 
703                  * Clean up the last character.
704                  * We can get to a newline; don't print that. 
705                  */
706
707                 if (italic)
708                         printf("</I>");
709                 if (bold)
710                         printf("</B>");
711
712                 if (i == (int)len - 1 && '\n' != p[i])
713                         html_putchar(p[i]);
714
715                 putchar('\n');
716         }
717
718         puts("</PRE>\n"
719              "</DIV>\n"
720              "</BODY>\n"
721              "</HTML>");
722
723         fclose(f);
724 }
725
726 static void
727 format(const struct req *req, const char *file)
728 {
729         struct mparse   *mp;
730         int              fd;
731         struct mdoc     *mdoc;
732         struct man      *man;
733         void            *vp;
734         enum mandoclevel rc;
735         char             opts[PATH_MAX + 128];
736
737         if (-1 == (fd = open(file, O_RDONLY, 0))) {
738                 resp_baddb();
739                 return;
740         }
741
742         mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL);
743         rc = mparse_readfd(mp, fd, file);
744         close(fd);
745
746         if (rc >= MANDOCLEVEL_FATAL) {
747                 resp_baddb();
748                 return;
749         }
750
751         snprintf(opts, sizeof(opts), "fragment,"
752                         "man=%s/search.html?sec=%%S&expr=Nm~^%%N$,"
753                         /*"includes=/cgi-bin/man.cgi/usr/include/%%I"*/,
754                         progname);
755
756         mparse_result(mp, &mdoc, &man);
757         if (NULL == man && NULL == mdoc) {
758                 resp_baddb();
759                 mparse_free(mp);
760                 return;
761         }
762
763         resp_begin_html(200, NULL);
764         resp_searchform(req);
765
766         vp = html_alloc(opts);
767
768         if (NULL != mdoc)
769                 html_mdoc(vp, mdoc);
770         else
771                 html_man(vp, man);
772
773         puts("</BODY>\n"
774              "</HTML>");
775
776         html_free(vp);
777         mparse_free(mp);
778 }
779
780 static void
781 pg_show(const struct req *req, char *path)
782 {
783         struct manpaths  ps;
784         size_t           sz;
785         char            *sub;
786         char             file[PATH_MAX];
787         const char      *cp;
788         int              rc, catm;
789         unsigned int     vol, rec, mr;
790         DB              *idx;
791         DBT              key, val;
792
793         idx = NULL;
794
795         /* Parse out mroot, volume, and record from the path. */
796
797         if (NULL == path || NULL == (sub = strchr(path, '/'))) {
798                 resp_error400();
799                 return;
800         } 
801         *sub++ = '\0';
802         if ( ! atou(path, &mr)) {
803                 resp_error400();
804                 return;
805         }
806         path = sub;
807         if (NULL == (sub = strchr(path, '/'))) {
808                 resp_error400();
809                 return;
810         }
811         *sub++ = '\0';
812         if ( ! atou(path, &vol) || ! atou(sub, &rec)) {
813                 resp_error400();
814                 return;
815         } else if (mr >= (unsigned int)req->psz) {
816                 resp_error400();
817                 return;
818         }
819
820         /*
821          * Begin by chdir()ing into the manroot.
822          * This way we can pick up the database files, which are
823          * relative to the manpath root.
824          */
825
826         if (-1 == chdir(req->p[(int)mr].path)) {
827                 perror(req->p[(int)mr].path);
828                 resp_baddb();
829                 return;
830         }
831
832         memset(&ps, 0, sizeof(struct manpaths));
833         manpath_manconf(&ps, "etc/catman.conf");
834
835         if (vol >= (unsigned int)ps.sz) {
836                 resp_error400();
837                 goto out;
838         }
839
840         sz = strlcpy(file, ps.paths[vol], PATH_MAX);
841         assert(sz < PATH_MAX);
842         strlcat(file, "/", PATH_MAX);
843         strlcat(file, MANDOC_IDX, PATH_MAX);
844
845         /* Open the index recno(3) database. */
846
847         idx = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
848         if (NULL == idx) {
849                 perror(file);
850                 resp_baddb();
851                 goto out;
852         }
853
854         key.data = &rec;
855         key.size = 4;
856
857         if (0 != (rc = (*idx->get)(idx, &key, &val, 0))) {
858                 rc < 0 ? resp_baddb() : resp_error400();
859                 goto out;
860         } else if (0 == val.size) {
861                 resp_baddb();
862                 goto out;
863         }
864
865         cp = (char *)val.data;
866         catm = 'c' == *cp++;
867
868         if (NULL == memchr(cp, '\0', val.size - 1)) 
869                 resp_baddb();
870         else {
871                 file[(int)sz] = '\0';
872                 strlcat(file, "/", PATH_MAX);
873                 strlcat(file, cp, PATH_MAX);
874                 if (catm) 
875                         catman(req, file);
876                 else
877                         format(req, file);
878         }
879 out:
880         if (idx)
881                 (*idx->close)(idx);
882         manpath_free(&ps);
883 }
884
885 static void
886 pg_search(const struct req *req, char *path)
887 {
888         size_t            tt, ressz;
889         struct manpaths   ps;
890         int               i, sz, rc;
891         const char       *ep, *start;
892         struct res      *res;
893         char            **cp;
894         struct opts       opt;
895         struct expr      *expr;
896
897         if (req->q.manroot < 0 || 0 == req->psz) {
898                 resp_search(NULL, 0, (void *)req);
899                 return;
900         }
901
902         memset(&opt, 0, sizeof(struct opts));
903
904         ep       = req->q.expr;
905         opt.arch = req->q.arch;
906         opt.cat  = req->q.sec;
907         rc       = -1;
908         sz       = 0;
909         cp       = NULL;
910         ressz    = 0;
911         res      = NULL;
912
913         /*
914          * Begin by chdir()ing into the root of the manpath.
915          * This way we can pick up the database files, which are
916          * relative to the manpath root.
917          */
918
919         assert(req->q.manroot < (int)req->psz);
920         if (-1 == (chdir(req->p[req->q.manroot].path))) {
921                 perror(req->p[req->q.manroot].path);
922                 resp_search(NULL, 0, (void *)req);
923                 return;
924         }
925
926         memset(&ps, 0, sizeof(struct manpaths));
927         manpath_manconf(&ps, "etc/catman.conf");
928
929         /*
930          * Poor man's tokenisation: just break apart by spaces.
931          * Yes, this is half-ass.  But it works for now.
932          */
933
934         while (ep && isspace((unsigned char)*ep))
935                 ep++;
936
937         while (ep && '\0' != *ep) {
938                 cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *));
939                 start = ep;
940                 while ('\0' != *ep && ! isspace((unsigned char)*ep))
941                         ep++;
942                 cp[sz] = mandoc_malloc((ep - start) + 1);
943                 memcpy(cp[sz], start, ep - start);
944                 cp[sz++][ep - start] = '\0';
945                 while (isspace((unsigned char)*ep))
946                         ep++;
947         }
948
949         /*
950          * Pump down into apropos backend.
951          * The resp_search() function is called with the results.
952          */
953
954         expr = req->q.legacy ? 
955                 termcomp(sz, cp, &tt) : exprcomp(sz, cp, &tt);
956
957         if (NULL != expr)
958                 rc = apropos_search
959                         (ps.sz, ps.paths, &opt, expr, tt, 
960                          (void *)req, &ressz, &res, resp_search);
961
962         /* ...unless errors occured. */
963
964         if (0 == rc)
965                 resp_baddb();
966         else if (-1 == rc)
967                 resp_search(NULL, 0, NULL);
968
969         for (i = 0; i < sz; i++)
970                 free(cp[i]);
971
972         free(cp);
973         resfree(res, ressz);
974         exprfree(expr);
975         manpath_free(&ps);
976 }
977
978 int
979 main(void)
980 {
981         int              i;
982         char             buf[PATH_MAX];
983         DIR             *cwd;
984         struct req       req;
985         char            *p, *path, *subpath;
986
987         /* Scan our run-time environment. */
988
989         if (NULL == (cache = getenv("CACHE_DIR")))
990                 cache = "/cache/man.cgi";
991
992         if (NULL == (progname = getenv("SCRIPT_NAME")))
993                 progname = "";
994
995         if (NULL == (css = getenv("CSS_DIR")))
996                 css = "";
997
998         if (NULL == (host = getenv("HTTP_HOST")))
999                 host = "localhost";
1000
1001         /*
1002          * First we change directory into the cache directory so that
1003          * subsequent scanning for manpath directories is rooted
1004          * relative to the same position.
1005          */
1006
1007         if (-1 == chdir(cache)) {
1008                 perror(cache);
1009                 resp_bad();
1010                 return(EXIT_FAILURE);
1011         } else if (NULL == (cwd = opendir(cache))) {
1012                 perror(cache);
1013                 resp_bad();
1014                 return(EXIT_FAILURE);
1015         } 
1016
1017         memset(&req, 0, sizeof(struct req));
1018
1019         strlcpy(buf, ".", PATH_MAX);
1020         pathgen(cwd, buf, &req);
1021         closedir(cwd);
1022
1023         /* Next parse out the query string. */
1024
1025         if (NULL != (p = getenv("QUERY_STRING")))
1026                 http_parse(&req, p);
1027
1028         /*
1029          * Now juggle paths to extract information.
1030          * We want to extract our filetype (the file suffix), the
1031          * initial path component, then the trailing component(s).
1032          * Start with leading subpath component. 
1033          */
1034
1035         subpath = path = NULL;
1036         req.page = PAGE__MAX;
1037
1038         if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path)
1039                 req.page = PAGE_INDEX;
1040
1041         if (NULL != path && '/' == *path && '\0' == *++path)
1042                 req.page = PAGE_INDEX;
1043
1044         /* Strip file suffix. */
1045
1046         if (NULL != path && NULL != (p = strrchr(path, '.')))
1047                 if (NULL != p && NULL == strchr(p, '/'))
1048                         *p++ = '\0';
1049
1050         /* Resolve subpath component. */
1051
1052         if (NULL != path && NULL != (subpath = strchr(path, '/')))
1053                 *subpath++ = '\0';
1054
1055         /* Map path into one we recognise. */
1056
1057         if (NULL != path && '\0' != *path)
1058                 for (i = 0; i < (int)PAGE__MAX; i++) 
1059                         if (0 == strcmp(pages[i], path)) {
1060                                 req.page = (enum page)i;
1061                                 break;
1062                         }
1063
1064         /* Route pages. */
1065
1066         switch (req.page) {
1067         case (PAGE_INDEX):
1068                 pg_index(&req, subpath);
1069                 break;
1070         case (PAGE_SEARCH):
1071                 pg_search(&req, subpath);
1072                 break;
1073         case (PAGE_SHOW):
1074                 pg_show(&req, subpath);
1075                 break;
1076         default:
1077                 resp_error404(path);
1078                 break;
1079         }
1080
1081         for (i = 0; i < (int)req.psz; i++) {
1082                 free(req.p[i].path);
1083                 free(req.p[i].name);
1084         }
1085
1086         free(req.p);
1087         return(EXIT_SUCCESS);
1088 }
1089
1090 static int
1091 cmp(const void *p1, const void *p2)
1092 {
1093
1094         return(strcasecmp(((const struct res *)p1)->title,
1095                                 ((const struct res *)p2)->title));
1096 }
1097
1098 /*
1099  * Check to see if an "etc" path consists of a catman.conf file.  If it
1100  * does, that means that the path contains a tree created by catman(8)
1101  * and should be used for indexing.
1102  */
1103 static int
1104 pathstop(DIR *dir)
1105 {
1106         struct dirent   *d;
1107 #if defined(__sun)
1108         struct stat      sb;
1109 #endif
1110
1111         while (NULL != (d = readdir(dir))) {
1112 #if defined(__sun)
1113                 stat(d->d_name, &sb);
1114                 if (S_IFREG & sb.st_mode)
1115 #else
1116                 if (DT_REG == d->d_type)
1117 #endif
1118                         if (0 == strcmp(d->d_name, "catman.conf"))
1119                                 return(1);
1120   }
1121
1122         return(0);
1123 }
1124
1125 /*
1126  * Scan for indexable paths.
1127  * This adds all paths with "etc/catman.conf" to the buffer.
1128  */
1129 static void
1130 pathgen(DIR *dir, char *path, struct req *req)
1131 {
1132         struct dirent   *d;
1133         char            *cp;
1134         DIR             *cd;
1135         int              rc;
1136         size_t           sz, ssz;
1137 #if defined(__sun)
1138         struct stat      sb;
1139 #endif
1140
1141         sz = strlcat(path, "/", PATH_MAX);
1142         if (sz >= PATH_MAX) {
1143                 fprintf(stderr, "%s: Path too long", path);
1144                 return;
1145         } 
1146
1147         /* 
1148          * First, scan for the "etc" directory.
1149          * If it's found, then see if it should cause us to stop.  This
1150          * happens when a catman.conf is found in the directory.
1151          */
1152
1153         rc = 0;
1154         while (0 == rc && NULL != (d = readdir(dir))) {
1155 #if defined(__sun)
1156                 stat(d->d_name, &sb);
1157                 if (!(S_IFDIR & sb.st_mode)
1158 #else
1159                 if (DT_DIR != d->d_type
1160 #endif
1161         || strcmp(d->d_name, "etc"))
1162                         continue;
1163
1164                 path[(int)sz] = '\0';
1165                 ssz = strlcat(path, d->d_name, PATH_MAX);
1166
1167                 if (ssz >= PATH_MAX) {
1168                         fprintf(stderr, "%s: Path too long", path);
1169                         return;
1170                 } else if (NULL == (cd = opendir(path))) {
1171                         perror(path);
1172                         return;
1173                 } 
1174                 
1175                 rc = pathstop(cd);
1176                 closedir(cd);
1177         }
1178
1179         if (rc > 0) {
1180                 /* This also strips the trailing slash. */
1181                 path[(int)--sz] = '\0';
1182                 req->p = mandoc_realloc
1183                         (req->p, 
1184                          (req->psz + 1) * sizeof(struct paths));
1185                 /*
1186                  * Strip out the leading "./" unless we're just a ".",
1187                  * in which case use an empty string as our name.
1188                  */
1189                 req->p[(int)req->psz].path = mandoc_strdup(path);
1190                 req->p[(int)req->psz].name = 
1191                         cp = mandoc_strdup(path + (1 == sz ? 1 : 2));
1192                 req->psz++;
1193                 /* 
1194                  * The name is just the path with all the slashes taken
1195                  * out of it.  Simple but effective. 
1196                  */
1197                 for ( ; '\0' != *cp; cp++) 
1198                         if ('/' == *cp)
1199                                 *cp = ' ';
1200                 return;
1201         } 
1202
1203         /*
1204          * If no etc/catman.conf was found, recursively enter child
1205          * directory and continue scanning.
1206          */
1207
1208         rewinddir(dir);
1209         while (NULL != (d = readdir(dir))) {
1210 #if defined(__sun)
1211                 stat(d->d_name, &sb);
1212                 if (!(S_IFDIR & sb.st_mode)
1213 #else
1214                 if (DT_DIR != d->d_type
1215 #endif
1216         || '.' == d->d_name[0])
1217                         continue;
1218
1219                 path[(int)sz] = '\0';
1220                 ssz = strlcat(path, d->d_name, PATH_MAX);
1221
1222                 if (ssz >= PATH_MAX) {
1223                         fprintf(stderr, "%s: Path too long", path);
1224                         return;
1225                 } else if (NULL == (cd = opendir(path))) {
1226                         perror(path);
1227                         return;
1228                 }
1229
1230                 pathgen(cd, path, req);
1231                 closedir(cd);
1232         }
1233 }