Import mdocml-1.12.3
[dragonfly.git] / contrib / mdocml / cgi.c
CommitLineData
7888c61d 1/* $Id: cgi.c,v 1.46 2013/10/11 00:06:48 schwarze Exp $ */
36342e81
SW
2/*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17#ifdef HAVE_CONFIG_H
18#include "config.h"
19#endif
20
36342e81
SW
21#include <sys/wait.h>
22
23#include <assert.h>
24#include <ctype.h>
25#include <errno.h>
26#include <dirent.h>
27#include <fcntl.h>
28#include <limits.h>
29#include <regex.h>
30#include <stdio.h>
31#include <stdarg.h>
32#include <stdint.h>
33#include <stdlib.h>
34#include <string.h>
35#include <unistd.h>
36
7888c61d
FF
37#if defined(__sun)
38/* for stat() */
39#include <fcntl.h>
40#include <sys/types.h>
41#include <sys/stat.h>
42#endif
43
36342e81
SW
44#include "apropos_db.h"
45#include "mandoc.h"
46#include "mdoc.h"
47#include "man.h"
48#include "main.h"
49#include "manpath.h"
50#include "mandocdb.h"
51
7888c61d 52#if defined(__linux__) || defined(__sun)
36342e81
SW
53# include <db_185.h>
54#else
55# include <db.h>
56#endif
57
58enum page {
59 PAGE_INDEX,
60 PAGE_SEARCH,
61 PAGE_SHOW,
62 PAGE__MAX
63};
64
65struct paths {
66 char *name;
67 char *path;
68};
69
70/*
71 * A query as passed to the search function.
72 */
73struct query {
74 const char *arch; /* architecture */
75 const char *sec; /* manual section */
76 const char *expr; /* unparsed expression string */
77 int manroot; /* manroot index (or -1)*/
78 int legacy; /* whether legacy mode */
79};
80
81struct req {
82 struct query q;
83 struct paths *p;
84 size_t psz;
85 enum page page;
86};
87
88static int atou(const char *, unsigned *);
89static void catman(const struct req *, const char *);
90static int cmp(const void *, const void *);
91static void format(const struct req *, const char *);
92static void html_print(const char *);
93static void html_printquery(const struct req *);
94static void html_putchar(char);
95static int http_decode(char *);
96static void http_parse(struct req *, char *);
97static void http_print(const char *);
98static void http_putchar(char);
99static void http_printquery(const struct req *);
100static int pathstop(DIR *);
101static void pathgen(DIR *, char *, struct req *);
102static void pg_index(const struct req *, char *);
103static void pg_search(const struct req *, char *);
104static void pg_show(const struct req *, char *);
105static void resp_bad(void);
106static void resp_baddb(void);
107static void resp_error400(void);
108static void resp_error404(const char *);
109static void resp_begin_html(int, const char *);
110static void resp_begin_http(int, const char *);
111static void resp_end_html(void);
112static void resp_index(const struct req *);
113static void resp_search(struct res *, size_t, void *);
114static void resp_searchform(const struct req *);
115
116static const char *progname; /* cgi script name */
117static const char *cache; /* cache directory */
118static const char *css; /* css directory */
119static const char *host; /* hostname */
120
121static const char * const pages[PAGE__MAX] = {
122 "index", /* PAGE_INDEX */
123 "search", /* PAGE_SEARCH */
124 "show", /* PAGE_SHOW */
125};
126
127/*
128 * This is just OpenBSD's strtol(3) suggestion.
129 * I use it instead of strtonum(3) for portability's sake.
130 */
131static int
132atou(const char *buf, unsigned *v)
133{
134 char *ep;
135 long lval;
136
137 errno = 0;
138 lval = strtol(buf, &ep, 10);
139 if (buf[0] == '\0' || *ep != '\0')
140 return(0);
141 if ((errno == ERANGE && (lval == LONG_MAX ||
142 lval == LONG_MIN)) ||
143 (lval > INT_MAX || lval < 0))
144 return(0);
145
146 *v = (unsigned int)lval;
147 return(1);
148}
149
150/*
151 * Print a character, escaping HTML along the way.
152 * This will pass non-ASCII straight to output: be warned!
153 */
154static void
155html_putchar(char c)
156{
157
158 switch (c) {
159 case ('"'):
160 printf("&quote;");
161 break;
162 case ('&'):
163 printf("&amp;");
164 break;
165 case ('>'):
166 printf("&gt;");
167 break;
168 case ('<'):
169 printf("&lt;");
170 break;
171 default:
172 putchar((unsigned char)c);
173 break;
174 }
175}
176static void
177http_printquery(const struct req *req)
178{
179
180 printf("&expr=");
181 http_print(req->q.expr ? req->q.expr : "");
182 printf("&sec=");
183 http_print(req->q.sec ? req->q.sec : "");
184 printf("&arch=");
185 http_print(req->q.arch ? req->q.arch : "");
186}
187
188
189static void
190html_printquery(const struct req *req)
191{
192
193 printf("&amp;expr=");
194 html_print(req->q.expr ? req->q.expr : "");
195 printf("&amp;sec=");
196 html_print(req->q.sec ? req->q.sec : "");
197 printf("&amp;arch=");
198 html_print(req->q.arch ? req->q.arch : "");
199}
200
201static void
202http_print(const char *p)
203{
204
205 if (NULL == p)
206 return;
207 while ('\0' != *p)
208 http_putchar(*p++);
209}
210
211/*
212 * Call through to html_putchar().
213 * Accepts NULL strings.
214 */
215static void
216html_print(const char *p)
217{
218
219 if (NULL == p)
220 return;
221 while ('\0' != *p)
222 html_putchar(*p++);
223}
224
225/*
226 * Parse out key-value pairs from an HTTP request variable.
227 * This can be either a cookie or a POST/GET string, although man.cgi
228 * uses only GET for simplicity.
229 */
230static void
231http_parse(struct req *req, char *p)
232{
233 char *key, *val, *manroot;
234 int i, legacy;
235
236 memset(&req->q, 0, sizeof(struct query));
237
238 legacy = -1;
239 manroot = NULL;
240
241 while ('\0' != *p) {
242 key = p;
243 val = NULL;
244
245 p += (int)strcspn(p, ";&");
246 if ('\0' != *p)
247 *p++ = '\0';
248 if (NULL != (val = strchr(key, '=')))
249 *val++ = '\0';
250
251 if ('\0' == *key || NULL == val || '\0' == *val)
252 continue;
253
254 /* Just abort handling. */
255
256 if ( ! http_decode(key))
257 break;
258 if (NULL != val && ! http_decode(val))
259 break;
260
261 if (0 == strcmp(key, "expr"))
262 req->q.expr = val;
263 else if (0 == strcmp(key, "query"))
264 req->q.expr = val;
265 else if (0 == strcmp(key, "sec"))
266 req->q.sec = val;
267 else if (0 == strcmp(key, "sektion"))
268 req->q.sec = val;
269 else if (0 == strcmp(key, "arch"))
270 req->q.arch = val;
271 else if (0 == strcmp(key, "manpath"))
272 manroot = val;
273 else if (0 == strcmp(key, "apropos"))
274 legacy = 0 == strcmp(val, "0");
275 }
276
277 /* Test for old man.cgi compatibility mode. */
278
279 req->q.legacy = legacy > 0;
280
281 /*
282 * Section "0" means no section when in legacy mode.
283 * For some man.cgi scripts, "default" arch is none.
284 */
285
286 if (req->q.legacy && NULL != req->q.sec)
287 if (0 == strcmp(req->q.sec, "0"))
288 req->q.sec = NULL;
289 if (req->q.legacy && NULL != req->q.arch)
290 if (0 == strcmp(req->q.arch, "default"))
291 req->q.arch = NULL;
292
293 /* Default to first manroot. */
294
295 if (NULL != manroot) {
296 for (i = 0; i < (int)req->psz; i++)
297 if (0 == strcmp(req->p[i].name, manroot))
298 break;
299 req->q.manroot = i < (int)req->psz ? i : -1;
300 }
301}
302
303static void
304http_putchar(char c)
305{
306
307 if (isalnum((unsigned char)c)) {
308 putchar((unsigned char)c);
309 return;
310 } else if (' ' == c) {
311 putchar('+');
312 return;
313 }
314 printf("%%%.2x", c);
315}
316
317/*
318 * HTTP-decode a string. The standard explanation is that this turns
319 * "%4e+foo" into "n foo" in the regular way. This is done in-place
320 * over the allocated string.
321 */
322static int
323http_decode(char *p)
324{
325 char hex[3];
326 int c;
327
328 hex[2] = '\0';
329
330 for ( ; '\0' != *p; p++) {
331 if ('%' == *p) {
332 if ('\0' == (hex[0] = *(p + 1)))
333 return(0);
334 if ('\0' == (hex[1] = *(p + 2)))
335 return(0);
336 if (1 != sscanf(hex, "%x", &c))
337 return(0);
338 if ('\0' == c)
339 return(0);
340
341 *p = (char)c;
342 memmove(p + 1, p + 3, strlen(p + 3) + 1);
343 } else
344 *p = '+' == *p ? ' ' : *p;
345 }
346
347 *p = '\0';
348 return(1);
349}
350
351static void
352resp_begin_http(int code, const char *msg)
353{
354
355 if (200 != code)
356 printf("Status: %d %s\n", code, msg);
357
358 puts("Content-Type: text/html; charset=utf-8\n"
359 "Cache-Control: no-cache\n"
360 "Pragma: no-cache\n"
361 "");
362
363 fflush(stdout);
364}
365
366static void
367resp_begin_html(int code, const char *msg)
368{
369
370 resp_begin_http(code, msg);
371
372 printf("<!DOCTYPE HTML PUBLIC "
373 " \"-//W3C//DTD HTML 4.01//EN\""
374 " \"http://www.w3.org/TR/html4/strict.dtd\">\n"
375 "<HTML>\n"
376 "<HEAD>\n"
377 "<META HTTP-EQUIV=\"Content-Type\""
378 " CONTENT=\"text/html; charset=utf-8\">\n"
379 "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
380 " TYPE=\"text/css\" media=\"all\">\n"
381 "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
382 " TYPE=\"text/css\" media=\"all\">\n"
383 "<TITLE>System Manpage Reference</TITLE>\n"
384 "</HEAD>\n"
385 "<BODY>\n"
386 "<!-- Begin page content. //-->\n", css, css);
387}
388
389static void
390resp_end_html(void)
391{
392
393 puts("</BODY>\n"
394 "</HTML>");
395}
396
397static void
398resp_searchform(const struct req *req)
399{
400 int i;
401
402 puts("<!-- Begin search form. //-->");
403 printf("<DIV ID=\"mancgi\">\n"
404 "<FORM ACTION=\"%s/search.html\" METHOD=\"get\">\n"
405 "<FIELDSET>\n"
406 "<LEGEND>Search Parameters</LEGEND>\n"
407 "<INPUT TYPE=\"submit\" "
408 " VALUE=\"Search\"> for manuals satisfying \n"
409 "<INPUT TYPE=\"text\" NAME=\"expr\" VALUE=\"",
410 progname);
411 html_print(req->q.expr ? req->q.expr : "");
412 printf("\">, section "
413 "<INPUT TYPE=\"text\""
414 " SIZE=\"4\" NAME=\"sec\" VALUE=\"");
415 html_print(req->q.sec ? req->q.sec : "");
416 printf("\">, arch "
417 "<INPUT TYPE=\"text\""
418 " SIZE=\"8\" NAME=\"arch\" VALUE=\"");
419 html_print(req->q.arch ? req->q.arch : "");
420 printf("\">");
421 if (req->psz > 1) {
422 puts(", <SELECT NAME=\"manpath\">");
423 for (i = 0; i < (int)req->psz; i++) {
424 printf("<OPTION %s VALUE=\"",
425 (i == req->q.manroot) ||
426 (0 == i && -1 == req->q.manroot) ?
427 "SELECTED=\"selected\"" : "");
428 html_print(req->p[i].name);
429 printf("\">");
430 html_print(req->p[i].name);
431 puts("</OPTION>");
432 }
433 puts("</SELECT>");
434 }
435 puts(".\n"
436 "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n"
437 "</FIELDSET>\n"
438 "</FORM>\n"
439 "</DIV>");
440 puts("<!-- End search form. //-->");
441}
442
443static void
444resp_index(const struct req *req)
445{
446
447 resp_begin_html(200, NULL);
448 resp_searchform(req);
449 resp_end_html();
450}
451
452static void
453resp_error400(void)
454{
455
456 resp_begin_html(400, "Query Malformed");
457 printf("<H1>Malformed Query</H1>\n"
458 "<P>\n"
459 "The query your entered was malformed.\n"
460 "Try again from the\n"
461 "<A HREF=\"%s/index.html\">main page</A>.\n"
462 "</P>", progname);
463 resp_end_html();
464}
465
466static void
467resp_error404(const char *page)
468{
469
470 resp_begin_html(404, "Not Found");
471 puts("<H1>Page Not Found</H1>\n"
472 "<P>\n"
473 "The page you're looking for, ");
474 printf("<B>");
475 html_print(page);
476 printf("</B>,\n"
477 "could not be found.\n"
478 "Try searching from the\n"
479 "<A HREF=\"%s/index.html\">main page</A>.\n"
480 "</P>", progname);
481 resp_end_html();
482}
483
484static void
485resp_bad(void)
486{
487 resp_begin_html(500, "Internal Server Error");
488 puts("<P>Generic badness happened.</P>");
489 resp_end_html();
490}
491
492static void
493resp_baddb(void)
494{
495
496 resp_begin_html(500, "Internal Server Error");
497 puts("<P>Your database is broken.</P>");
498 resp_end_html();
499}
500
501static void
502resp_search(struct res *r, size_t sz, void *arg)
503{
504 size_t i, matched;
505 const struct req *req;
506
507 req = (const struct req *)arg;
508
509 if (sz > 0)
510 assert(req->q.manroot >= 0);
511
512 for (matched = i = 0; i < sz; i++)
513 if (r[i].matched)
514 matched++;
515
516 if (1 == matched) {
517 for (i = 0; i < sz; i++)
518 if (r[i].matched)
519 break;
520 /*
521 * If we have just one result, then jump there now
522 * without any delay.
523 */
524 puts("Status: 303 See Other");
525 printf("Location: http://%s%s/show/%d/%u/%u.html?",
526 host, progname, req->q.manroot,
527 r[i].volume, r[i].rec);
528 http_printquery(req);
529 puts("\n"
530 "Content-Type: text/html; charset=utf-8\n");
531 return;
532 }
533
534 resp_begin_html(200, NULL);
535 resp_searchform(req);
536
537 puts("<DIV CLASS=\"results\">");
538
539 if (0 == matched) {
540 puts("<P>\n"
541 "No results found.\n"
542 "</P>\n"
543 "</DIV>");
544 resp_end_html();
545 return;
546 }
547
548 qsort(r, sz, sizeof(struct res), cmp);
549
550 puts("<TABLE>");
551
552 for (i = 0; i < sz; i++) {
553 if ( ! r[i].matched)
554 continue;
555 printf("<TR>\n"
556 "<TD CLASS=\"title\">\n"
557 "<A HREF=\"%s/show/%d/%u/%u.html?",
558 progname, req->q.manroot,
559 r[i].volume, r[i].rec);
560 html_printquery(req);
561 printf("\">");
562 html_print(r[i].title);
563 putchar('(');
564 html_print(r[i].cat);
565 if (r[i].arch && '\0' != *r[i].arch) {
566 putchar('/');
567 html_print(r[i].arch);
568 }
569 printf(")</A>\n"
570 "</TD>\n"
571 "<TD CLASS=\"desc\">");
572 html_print(r[i].desc);
573 puts("</TD>\n"
574 "</TR>");
575 }
576
577 puts("</TABLE>\n"
578 "</DIV>");
579 resp_end_html();
580}
581
582/* ARGSUSED */
583static void
584pg_index(const struct req *req, char *path)
585{
586
587 resp_index(req);
588}
589
590static void
591catman(const struct req *req, const char *file)
592{
593 FILE *f;
594 size_t len;
595 int i;
596 char *p;
597 int italic, bold;
598
599 if (NULL == (f = fopen(file, "r"))) {
600 resp_baddb();
601 return;
602 }
603
604 resp_begin_html(200, NULL);
605 resp_searchform(req);
606 puts("<DIV CLASS=\"catman\">\n"
607 "<PRE>");
608
609 while (NULL != (p = fgetln(f, &len))) {
610 bold = italic = 0;
611 for (i = 0; i < (int)len - 1; i++) {
612 /*
613 * This means that the catpage is out of state.
614 * Ignore it and keep going (although the
615 * catpage is bogus).
616 */
617
618 if ('\b' == p[i] || '\n' == p[i])
619 continue;
620
621 /*
622 * Print a regular character.
623 * Close out any bold/italic scopes.
624 * If we're in back-space mode, make sure we'll
625 * have something to enter when we backspace.
626 */
627
628 if ('\b' != p[i + 1]) {
629 if (italic)
630 printf("</I>");
631 if (bold)
632 printf("</B>");
633 italic = bold = 0;
634 html_putchar(p[i]);
635 continue;
636 } else if (i + 2 >= (int)len)
637 continue;
638
639 /* Italic mode. */
640
641 if ('_' == p[i]) {
642 if (bold)
643 printf("</B>");
644 if ( ! italic)
645 printf("<I>");
646 bold = 0;
647 italic = 1;
648 i += 2;
649 html_putchar(p[i]);
650 continue;
651 }
652
653 /*
654 * Handle funny behaviour troff-isms.
655 * These grok'd from the original man2html.c.
656 */
657
658 if (('+' == p[i] && 'o' == p[i + 2]) ||
659 ('o' == p[i] && '+' == p[i + 2]) ||
660 ('|' == p[i] && '=' == p[i + 2]) ||
661 ('=' == p[i] && '|' == p[i + 2]) ||
662 ('*' == p[i] && '=' == p[i + 2]) ||
663 ('=' == p[i] && '*' == p[i + 2]) ||
664 ('*' == p[i] && '|' == p[i + 2]) ||
665 ('|' == p[i] && '*' == p[i + 2])) {
666 if (italic)
667 printf("</I>");
668 if (bold)
669 printf("</B>");
670 italic = bold = 0;
671 putchar('*');
672 i += 2;
673 continue;
674 } else if (('|' == p[i] && '-' == p[i + 2]) ||
675 ('-' == p[i] && '|' == p[i + 1]) ||
676 ('+' == p[i] && '-' == p[i + 1]) ||
677 ('-' == p[i] && '+' == p[i + 1]) ||
678 ('+' == p[i] && '|' == p[i + 1]) ||
679 ('|' == p[i] && '+' == p[i + 1])) {
680 if (italic)
681 printf("</I>");
682 if (bold)
683 printf("</B>");
684 italic = bold = 0;
685 putchar('+');
686 i += 2;
687 continue;
688 }
689
690 /* Bold mode. */
691
692 if (italic)
693 printf("</I>");
694 if ( ! bold)
695 printf("<B>");
696 bold = 1;
697 italic = 0;
698 i += 2;
699 html_putchar(p[i]);
700 }
701
702 /*
703 * Clean up the last character.
704 * We can get to a newline; don't print that.
705 */
706
707 if (italic)
708 printf("</I>");
709 if (bold)
710 printf("</B>");
711
712 if (i == (int)len - 1 && '\n' != p[i])
713 html_putchar(p[i]);
714
715 putchar('\n');
716 }
717
718 puts("</PRE>\n"
719 "</DIV>\n"
720 "</BODY>\n"
721 "</HTML>");
722
723 fclose(f);
724}
725
726static void
727format(const struct req *req, const char *file)
728{
729 struct mparse *mp;
730 int fd;
731 struct mdoc *mdoc;
732 struct man *man;
733 void *vp;
734 enum mandoclevel rc;
f88b6c16 735 char opts[PATH_MAX + 128];
36342e81
SW
736
737 if (-1 == (fd = open(file, O_RDONLY, 0))) {
738 resp_baddb();
739 return;
740 }
741
f88b6c16 742 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL);
36342e81
SW
743 rc = mparse_readfd(mp, fd, file);
744 close(fd);
745
746 if (rc >= MANDOCLEVEL_FATAL) {
747 resp_baddb();
748 return;
749 }
750
751 snprintf(opts, sizeof(opts), "fragment,"
f88b6c16 752 "man=%s/search.html?sec=%%S&expr=Nm~^%%N$,"
36342e81
SW
753 /*"includes=/cgi-bin/man.cgi/usr/include/%%I"*/,
754 progname);
755
756 mparse_result(mp, &mdoc, &man);
757 if (NULL == man && NULL == mdoc) {
758 resp_baddb();
759 mparse_free(mp);
760 return;
761 }
762
763 resp_begin_html(200, NULL);
764 resp_searchform(req);
765
766 vp = html_alloc(opts);
767
768 if (NULL != mdoc)
769 html_mdoc(vp, mdoc);
770 else
771 html_man(vp, man);
772
773 puts("</BODY>\n"
774 "</HTML>");
775
776 html_free(vp);
777 mparse_free(mp);
778}
779
780static void
781pg_show(const struct req *req, char *path)
782{
783 struct manpaths ps;
784 size_t sz;
785 char *sub;
f88b6c16 786 char file[PATH_MAX];
36342e81
SW
787 const char *cp;
788 int rc, catm;
789 unsigned int vol, rec, mr;
790 DB *idx;
791 DBT key, val;
792
793 idx = NULL;
794
795 /* Parse out mroot, volume, and record from the path. */
796
797 if (NULL == path || NULL == (sub = strchr(path, '/'))) {
798 resp_error400();
799 return;
800 }
801 *sub++ = '\0';
802 if ( ! atou(path, &mr)) {
803 resp_error400();
804 return;
805 }
806 path = sub;
807 if (NULL == (sub = strchr(path, '/'))) {
808 resp_error400();
809 return;
810 }
811 *sub++ = '\0';
812 if ( ! atou(path, &vol) || ! atou(sub, &rec)) {
813 resp_error400();
814 return;
815 } else if (mr >= (unsigned int)req->psz) {
816 resp_error400();
817 return;
818 }
819
820 /*
821 * Begin by chdir()ing into the manroot.
822 * This way we can pick up the database files, which are
823 * relative to the manpath root.
824 */
825
826 if (-1 == chdir(req->p[(int)mr].path)) {
827 perror(req->p[(int)mr].path);
828 resp_baddb();
829 return;
830 }
831
832 memset(&ps, 0, sizeof(struct manpaths));
833 manpath_manconf(&ps, "etc/catman.conf");
834
835 if (vol >= (unsigned int)ps.sz) {
836 resp_error400();
837 goto out;
838 }
839
f88b6c16
FF
840 sz = strlcpy(file, ps.paths[vol], PATH_MAX);
841 assert(sz < PATH_MAX);
842 strlcat(file, "/", PATH_MAX);
843 strlcat(file, MANDOC_IDX, PATH_MAX);
36342e81
SW
844
845 /* Open the index recno(3) database. */
846
847 idx = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
848 if (NULL == idx) {
849 perror(file);
850 resp_baddb();
851 goto out;
852 }
853
854 key.data = &rec;
855 key.size = 4;
856
857 if (0 != (rc = (*idx->get)(idx, &key, &val, 0))) {
858 rc < 0 ? resp_baddb() : resp_error400();
859 goto out;
860 } else if (0 == val.size) {
861 resp_baddb();
862 goto out;
863 }
864
865 cp = (char *)val.data;
866 catm = 'c' == *cp++;
867
868 if (NULL == memchr(cp, '\0', val.size - 1))
869 resp_baddb();
870 else {
871 file[(int)sz] = '\0';
f88b6c16
FF
872 strlcat(file, "/", PATH_MAX);
873 strlcat(file, cp, PATH_MAX);
36342e81
SW
874 if (catm)
875 catman(req, file);
876 else
877 format(req, file);
878 }
879out:
880 if (idx)
881 (*idx->close)(idx);
882 manpath_free(&ps);
883}
884
885static void
886pg_search(const struct req *req, char *path)
887{
888 size_t tt, ressz;
889 struct manpaths ps;
890 int i, sz, rc;
891 const char *ep, *start;
892 struct res *res;
893 char **cp;
894 struct opts opt;
895 struct expr *expr;
896
897 if (req->q.manroot < 0 || 0 == req->psz) {
898 resp_search(NULL, 0, (void *)req);
899 return;
900 }
901
902 memset(&opt, 0, sizeof(struct opts));
903
904 ep = req->q.expr;
905 opt.arch = req->q.arch;
906 opt.cat = req->q.sec;
907 rc = -1;
908 sz = 0;
909 cp = NULL;
910 ressz = 0;
911 res = NULL;
912
913 /*
914 * Begin by chdir()ing into the root of the manpath.
915 * This way we can pick up the database files, which are
916 * relative to the manpath root.
917 */
918
919 assert(req->q.manroot < (int)req->psz);
920 if (-1 == (chdir(req->p[req->q.manroot].path))) {
921 perror(req->p[req->q.manroot].path);
922 resp_search(NULL, 0, (void *)req);
923 return;
924 }
925
926 memset(&ps, 0, sizeof(struct manpaths));
927 manpath_manconf(&ps, "etc/catman.conf");
928
929 /*
930 * Poor man's tokenisation: just break apart by spaces.
931 * Yes, this is half-ass. But it works for now.
932 */
933
934 while (ep && isspace((unsigned char)*ep))
935 ep++;
936
937 while (ep && '\0' != *ep) {
938 cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *));
939 start = ep;
940 while ('\0' != *ep && ! isspace((unsigned char)*ep))
941 ep++;
942 cp[sz] = mandoc_malloc((ep - start) + 1);
943 memcpy(cp[sz], start, ep - start);
944 cp[sz++][ep - start] = '\0';
945 while (isspace((unsigned char)*ep))
946 ep++;
947 }
948
949 /*
950 * Pump down into apropos backend.
951 * The resp_search() function is called with the results.
952 */
953
954 expr = req->q.legacy ?
955 termcomp(sz, cp, &tt) : exprcomp(sz, cp, &tt);
956
957 if (NULL != expr)
958 rc = apropos_search
959 (ps.sz, ps.paths, &opt, expr, tt,
960 (void *)req, &ressz, &res, resp_search);
961
962 /* ...unless errors occured. */
963
964 if (0 == rc)
965 resp_baddb();
966 else if (-1 == rc)
967 resp_search(NULL, 0, NULL);
968
969 for (i = 0; i < sz; i++)
970 free(cp[i]);
971
972 free(cp);
973 resfree(res, ressz);
974 exprfree(expr);
975 manpath_free(&ps);
976}
977
978int
979main(void)
980{
981 int i;
f88b6c16 982 char buf[PATH_MAX];
36342e81
SW
983 DIR *cwd;
984 struct req req;
985 char *p, *path, *subpath;
986
987 /* Scan our run-time environment. */
988
989 if (NULL == (cache = getenv("CACHE_DIR")))
990 cache = "/cache/man.cgi";
991
992 if (NULL == (progname = getenv("SCRIPT_NAME")))
993 progname = "";
994
995 if (NULL == (css = getenv("CSS_DIR")))
996 css = "";
997
998 if (NULL == (host = getenv("HTTP_HOST")))
999 host = "localhost";
1000
1001 /*
1002 * First we change directory into the cache directory so that
1003 * subsequent scanning for manpath directories is rooted
1004 * relative to the same position.
1005 */
1006
1007 if (-1 == chdir(cache)) {
1008 perror(cache);
1009 resp_bad();
1010 return(EXIT_FAILURE);
1011 } else if (NULL == (cwd = opendir(cache))) {
1012 perror(cache);
1013 resp_bad();
1014 return(EXIT_FAILURE);
1015 }
1016
1017 memset(&req, 0, sizeof(struct req));
1018
f88b6c16 1019 strlcpy(buf, ".", PATH_MAX);
36342e81
SW
1020 pathgen(cwd, buf, &req);
1021 closedir(cwd);
1022
1023 /* Next parse out the query string. */
1024
1025 if (NULL != (p = getenv("QUERY_STRING")))
1026 http_parse(&req, p);
1027
1028 /*
1029 * Now juggle paths to extract information.
1030 * We want to extract our filetype (the file suffix), the
1031 * initial path component, then the trailing component(s).
1032 * Start with leading subpath component.
1033 */
1034
1035 subpath = path = NULL;
1036 req.page = PAGE__MAX;
1037
1038 if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path)
1039 req.page = PAGE_INDEX;
1040
1041 if (NULL != path && '/' == *path && '\0' == *++path)
1042 req.page = PAGE_INDEX;
1043
1044 /* Strip file suffix. */
1045
1046 if (NULL != path && NULL != (p = strrchr(path, '.')))
1047 if (NULL != p && NULL == strchr(p, '/'))
1048 *p++ = '\0';
1049
1050 /* Resolve subpath component. */
1051
1052 if (NULL != path && NULL != (subpath = strchr(path, '/')))
1053 *subpath++ = '\0';
1054
1055 /* Map path into one we recognise. */
1056
1057 if (NULL != path && '\0' != *path)
1058 for (i = 0; i < (int)PAGE__MAX; i++)
1059 if (0 == strcmp(pages[i], path)) {
1060 req.page = (enum page)i;
1061 break;
1062 }
1063
1064 /* Route pages. */
1065
1066 switch (req.page) {
1067 case (PAGE_INDEX):
1068 pg_index(&req, subpath);
1069 break;
1070 case (PAGE_SEARCH):
1071 pg_search(&req, subpath);
1072 break;
1073 case (PAGE_SHOW):
1074 pg_show(&req, subpath);
1075 break;
1076 default:
1077 resp_error404(path);
1078 break;
1079 }
1080
1081 for (i = 0; i < (int)req.psz; i++) {
1082 free(req.p[i].path);
1083 free(req.p[i].name);
1084 }
1085
1086 free(req.p);
1087 return(EXIT_SUCCESS);
1088}
1089
1090static int
1091cmp(const void *p1, const void *p2)
1092{
1093
1094 return(strcasecmp(((const struct res *)p1)->title,
1095 ((const struct res *)p2)->title));
1096}
1097
1098/*
1099 * Check to see if an "etc" path consists of a catman.conf file. If it
1100 * does, that means that the path contains a tree created by catman(8)
1101 * and should be used for indexing.
1102 */
1103static int
1104pathstop(DIR *dir)
1105{
1106 struct dirent *d;
7888c61d
FF
1107#if defined(__sun)
1108 struct stat sb;
1109#endif
36342e81 1110
7888c61d
FF
1111 while (NULL != (d = readdir(dir))) {
1112#if defined(__sun)
1113 stat(d->d_name, &sb);
1114 if (S_IFREG & sb.st_mode)
1115#else
36342e81 1116 if (DT_REG == d->d_type)
7888c61d 1117#endif
36342e81
SW
1118 if (0 == strcmp(d->d_name, "catman.conf"))
1119 return(1);
7888c61d 1120 }
36342e81
SW
1121
1122 return(0);
1123}
1124
1125/*
1126 * Scan for indexable paths.
1127 * This adds all paths with "etc/catman.conf" to the buffer.
1128 */
1129static void
1130pathgen(DIR *dir, char *path, struct req *req)
1131{
1132 struct dirent *d;
1133 char *cp;
1134 DIR *cd;
1135 int rc;
1136 size_t sz, ssz;
7888c61d
FF
1137#if defined(__sun)
1138 struct stat sb;
1139#endif
36342e81 1140
f88b6c16
FF
1141 sz = strlcat(path, "/", PATH_MAX);
1142 if (sz >= PATH_MAX) {
36342e81
SW
1143 fprintf(stderr, "%s: Path too long", path);
1144 return;
1145 }
1146
1147 /*
1148 * First, scan for the "etc" directory.
1149 * If it's found, then see if it should cause us to stop. This
1150 * happens when a catman.conf is found in the directory.
1151 */
1152
1153 rc = 0;
1154 while (0 == rc && NULL != (d = readdir(dir))) {
7888c61d
FF
1155#if defined(__sun)
1156 stat(d->d_name, &sb);
1157 if (!(S_IFDIR & sb.st_mode)
1158#else
1159 if (DT_DIR != d->d_type
1160#endif
1161 || strcmp(d->d_name, "etc"))
36342e81
SW
1162 continue;
1163
1164 path[(int)sz] = '\0';
f88b6c16 1165 ssz = strlcat(path, d->d_name, PATH_MAX);
36342e81 1166
f88b6c16 1167 if (ssz >= PATH_MAX) {
36342e81
SW
1168 fprintf(stderr, "%s: Path too long", path);
1169 return;
1170 } else if (NULL == (cd = opendir(path))) {
1171 perror(path);
1172 return;
1173 }
1174
1175 rc = pathstop(cd);
1176 closedir(cd);
1177 }
1178
1179 if (rc > 0) {
1180 /* This also strips the trailing slash. */
1181 path[(int)--sz] = '\0';
1182 req->p = mandoc_realloc
1183 (req->p,
1184 (req->psz + 1) * sizeof(struct paths));
1185 /*
1186 * Strip out the leading "./" unless we're just a ".",
1187 * in which case use an empty string as our name.
1188 */
1189 req->p[(int)req->psz].path = mandoc_strdup(path);
1190 req->p[(int)req->psz].name =
1191 cp = mandoc_strdup(path + (1 == sz ? 1 : 2));
1192 req->psz++;
1193 /*
1194 * The name is just the path with all the slashes taken
1195 * out of it. Simple but effective.
1196 */
1197 for ( ; '\0' != *cp; cp++)
1198 if ('/' == *cp)
1199 *cp = ' ';
1200 return;
1201 }
1202
1203 /*
1204 * If no etc/catman.conf was found, recursively enter child
1205 * directory and continue scanning.
1206 */
1207
1208 rewinddir(dir);
1209 while (NULL != (d = readdir(dir))) {
7888c61d
FF
1210#if defined(__sun)
1211 stat(d->d_name, &sb);
1212 if (!(S_IFDIR & sb.st_mode)
1213#else
1214 if (DT_DIR != d->d_type
1215#endif
1216 || '.' == d->d_name[0])
36342e81
SW
1217 continue;
1218
1219 path[(int)sz] = '\0';
f88b6c16 1220 ssz = strlcat(path, d->d_name, PATH_MAX);
36342e81 1221
f88b6c16 1222 if (ssz >= PATH_MAX) {
36342e81
SW
1223 fprintf(stderr, "%s: Path too long", path);
1224 return;
1225 } else if (NULL == (cd = opendir(path))) {
1226 perror(path);
1227 return;
1228 }
1229
1230 pathgen(cd, path, req);
1231 closedir(cd);
1232 }
1233}