1 /* $Id: cgi.c,v 1.42 2012/03/24 01:46:25 kristaps Exp $ */
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include <sys/param.h>
38 #include "apropos_db.h"
65 * A query as passed to the search function.
68 const char *arch; /* architecture */
69 const char *sec; /* manual section */
70 const char *expr; /* unparsed expression string */
71 int manroot; /* manroot index (or -1)*/
72 int legacy; /* whether legacy mode */
82 static int atou(const char *, unsigned *);
83 static void catman(const struct req *, const char *);
84 static int cmp(const void *, const void *);
85 static void format(const struct req *, const char *);
86 static void html_print(const char *);
87 static void html_printquery(const struct req *);
88 static void html_putchar(char);
89 static int http_decode(char *);
90 static void http_parse(struct req *, char *);
91 static void http_print(const char *);
92 static void http_putchar(char);
93 static void http_printquery(const struct req *);
94 static int pathstop(DIR *);
95 static void pathgen(DIR *, char *, struct req *);
96 static void pg_index(const struct req *, char *);
97 static void pg_search(const struct req *, char *);
98 static void pg_show(const struct req *, char *);
99 static void resp_bad(void);
100 static void resp_baddb(void);
101 static void resp_error400(void);
102 static void resp_error404(const char *);
103 static void resp_begin_html(int, const char *);
104 static void resp_begin_http(int, const char *);
105 static void resp_end_html(void);
106 static void resp_index(const struct req *);
107 static void resp_search(struct res *, size_t, void *);
108 static void resp_searchform(const struct req *);
110 static const char *progname; /* cgi script name */
111 static const char *cache; /* cache directory */
112 static const char *css; /* css directory */
113 static const char *host; /* hostname */
115 static const char * const pages[PAGE__MAX] = {
116 "index", /* PAGE_INDEX */
117 "search", /* PAGE_SEARCH */
118 "show", /* PAGE_SHOW */
122 * This is just OpenBSD's strtol(3) suggestion.
123 * I use it instead of strtonum(3) for portability's sake.
126 atou(const char *buf, unsigned *v)
132 lval = strtol(buf, &ep, 10);
133 if (buf[0] == '\0' || *ep != '\0')
135 if ((errno == ERANGE && (lval == LONG_MAX ||
136 lval == LONG_MIN)) ||
137 (lval > INT_MAX || lval < 0))
140 *v = (unsigned int)lval;
145 * Print a character, escaping HTML along the way.
146 * This will pass non-ASCII straight to output: be warned!
166 putchar((unsigned char)c);
171 http_printquery(const struct req *req)
175 http_print(req->q.expr ? req->q.expr : "");
177 http_print(req->q.sec ? req->q.sec : "");
179 http_print(req->q.arch ? req->q.arch : "");
184 html_printquery(const struct req *req)
187 printf("&expr=");
188 html_print(req->q.expr ? req->q.expr : "");
190 html_print(req->q.sec ? req->q.sec : "");
191 printf("&arch=");
192 html_print(req->q.arch ? req->q.arch : "");
196 http_print(const char *p)
206 * Call through to html_putchar().
207 * Accepts NULL strings.
210 html_print(const char *p)
220 * Parse out key-value pairs from an HTTP request variable.
221 * This can be either a cookie or a POST/GET string, although man.cgi
222 * uses only GET for simplicity.
225 http_parse(struct req *req, char *p)
227 char *key, *val, *manroot;
230 memset(&req->q, 0, sizeof(struct query));
239 p += (int)strcspn(p, ";&");
242 if (NULL != (val = strchr(key, '=')))
245 if ('\0' == *key || NULL == val || '\0' == *val)
248 /* Just abort handling. */
250 if ( ! http_decode(key))
252 if (NULL != val && ! http_decode(val))
255 if (0 == strcmp(key, "expr"))
257 else if (0 == strcmp(key, "query"))
259 else if (0 == strcmp(key, "sec"))
261 else if (0 == strcmp(key, "sektion"))
263 else if (0 == strcmp(key, "arch"))
265 else if (0 == strcmp(key, "manpath"))
267 else if (0 == strcmp(key, "apropos"))
268 legacy = 0 == strcmp(val, "0");
271 /* Test for old man.cgi compatibility mode. */
273 req->q.legacy = legacy > 0;
276 * Section "0" means no section when in legacy mode.
277 * For some man.cgi scripts, "default" arch is none.
280 if (req->q.legacy && NULL != req->q.sec)
281 if (0 == strcmp(req->q.sec, "0"))
283 if (req->q.legacy && NULL != req->q.arch)
284 if (0 == strcmp(req->q.arch, "default"))
287 /* Default to first manroot. */
289 if (NULL != manroot) {
290 for (i = 0; i < (int)req->psz; i++)
291 if (0 == strcmp(req->p[i].name, manroot))
293 req->q.manroot = i < (int)req->psz ? i : -1;
301 if (isalnum((unsigned char)c)) {
302 putchar((unsigned char)c);
304 } else if (' ' == c) {
312 * HTTP-decode a string. The standard explanation is that this turns
313 * "%4e+foo" into "n foo" in the regular way. This is done in-place
314 * over the allocated string.
324 for ( ; '\0' != *p; p++) {
326 if ('\0' == (hex[0] = *(p + 1)))
328 if ('\0' == (hex[1] = *(p + 2)))
330 if (1 != sscanf(hex, "%x", &c))
336 memmove(p + 1, p + 3, strlen(p + 3) + 1);
338 *p = '+' == *p ? ' ' : *p;
346 resp_begin_http(int code, const char *msg)
350 printf("Status: %d %s\n", code, msg);
352 puts("Content-Type: text/html; charset=utf-8\n"
353 "Cache-Control: no-cache\n"
361 resp_begin_html(int code, const char *msg)
364 resp_begin_http(code, msg);
366 printf("<!DOCTYPE HTML PUBLIC "
367 " \"-//W3C//DTD HTML 4.01//EN\""
368 " \"http://www.w3.org/TR/html4/strict.dtd\">\n"
371 "<META HTTP-EQUIV=\"Content-Type\""
372 " CONTENT=\"text/html; charset=utf-8\">\n"
373 "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
374 " TYPE=\"text/css\" media=\"all\">\n"
375 "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
376 " TYPE=\"text/css\" media=\"all\">\n"
377 "<TITLE>System Manpage Reference</TITLE>\n"
380 "<!-- Begin page content. //-->\n", css, css);
392 resp_searchform(const struct req *req)
396 puts("<!-- Begin search form. //-->");
397 printf("<DIV ID=\"mancgi\">\n"
398 "<FORM ACTION=\"%s/search.html\" METHOD=\"get\">\n"
400 "<LEGEND>Search Parameters</LEGEND>\n"
401 "<INPUT TYPE=\"submit\" "
402 " VALUE=\"Search\"> for manuals satisfying \n"
403 "<INPUT TYPE=\"text\" NAME=\"expr\" VALUE=\"",
405 html_print(req->q.expr ? req->q.expr : "");
406 printf("\">, section "
407 "<INPUT TYPE=\"text\""
408 " SIZE=\"4\" NAME=\"sec\" VALUE=\"");
409 html_print(req->q.sec ? req->q.sec : "");
411 "<INPUT TYPE=\"text\""
412 " SIZE=\"8\" NAME=\"arch\" VALUE=\"");
413 html_print(req->q.arch ? req->q.arch : "");
416 puts(", <SELECT NAME=\"manpath\">");
417 for (i = 0; i < (int)req->psz; i++) {
418 printf("<OPTION %s VALUE=\"",
419 (i == req->q.manroot) ||
420 (0 == i && -1 == req->q.manroot) ?
421 "SELECTED=\"selected\"" : "");
422 html_print(req->p[i].name);
424 html_print(req->p[i].name);
430 "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n"
434 puts("<!-- End search form. //-->");
438 resp_index(const struct req *req)
441 resp_begin_html(200, NULL);
442 resp_searchform(req);
450 resp_begin_html(400, "Query Malformed");
451 printf("<H1>Malformed Query</H1>\n"
453 "The query your entered was malformed.\n"
454 "Try again from the\n"
455 "<A HREF=\"%s/index.html\">main page</A>.\n"
461 resp_error404(const char *page)
464 resp_begin_html(404, "Not Found");
465 puts("<H1>Page Not Found</H1>\n"
467 "The page you're looking for, ");
471 "could not be found.\n"
472 "Try searching from the\n"
473 "<A HREF=\"%s/index.html\">main page</A>.\n"
481 resp_begin_html(500, "Internal Server Error");
482 puts("<P>Generic badness happened.</P>");
490 resp_begin_html(500, "Internal Server Error");
491 puts("<P>Your database is broken.</P>");
496 resp_search(struct res *r, size_t sz, void *arg)
499 const struct req *req;
501 req = (const struct req *)arg;
504 assert(req->q.manroot >= 0);
506 for (matched = i = 0; i < sz; i++)
511 for (i = 0; i < sz; i++)
515 * If we have just one result, then jump there now
518 puts("Status: 303 See Other");
519 printf("Location: http://%s%s/show/%d/%u/%u.html?",
520 host, progname, req->q.manroot,
521 r[i].volume, r[i].rec);
522 http_printquery(req);
524 "Content-Type: text/html; charset=utf-8\n");
528 resp_begin_html(200, NULL);
529 resp_searchform(req);
531 puts("<DIV CLASS=\"results\">");
535 "No results found.\n"
542 qsort(r, sz, sizeof(struct res), cmp);
546 for (i = 0; i < sz; i++) {
550 "<TD CLASS=\"title\">\n"
551 "<A HREF=\"%s/show/%d/%u/%u.html?",
552 progname, req->q.manroot,
553 r[i].volume, r[i].rec);
554 html_printquery(req);
556 html_print(r[i].title);
558 html_print(r[i].cat);
559 if (r[i].arch && '\0' != *r[i].arch) {
561 html_print(r[i].arch);
565 "<TD CLASS=\"desc\">");
566 html_print(r[i].desc);
578 pg_index(const struct req *req, char *path)
585 catman(const struct req *req, const char *file)
593 if (NULL == (f = fopen(file, "r"))) {
598 resp_begin_html(200, NULL);
599 resp_searchform(req);
600 puts("<DIV CLASS=\"catman\">\n"
603 while (NULL != (p = fgetln(f, &len))) {
605 for (i = 0; i < (int)len - 1; i++) {
607 * This means that the catpage is out of state.
608 * Ignore it and keep going (although the
612 if ('\b' == p[i] || '\n' == p[i])
616 * Print a regular character.
617 * Close out any bold/italic scopes.
618 * If we're in back-space mode, make sure we'll
619 * have something to enter when we backspace.
622 if ('\b' != p[i + 1]) {
630 } else if (i + 2 >= (int)len)
648 * Handle funny behaviour troff-isms.
649 * These grok'd from the original man2html.c.
652 if (('+' == p[i] && 'o' == p[i + 2]) ||
653 ('o' == p[i] && '+' == p[i + 2]) ||
654 ('|' == p[i] && '=' == p[i + 2]) ||
655 ('=' == p[i] && '|' == p[i + 2]) ||
656 ('*' == p[i] && '=' == p[i + 2]) ||
657 ('=' == p[i] && '*' == p[i + 2]) ||
658 ('*' == p[i] && '|' == p[i + 2]) ||
659 ('|' == p[i] && '*' == p[i + 2])) {
668 } else if (('|' == p[i] && '-' == p[i + 2]) ||
669 ('-' == p[i] && '|' == p[i + 1]) ||
670 ('+' == p[i] && '-' == p[i + 1]) ||
671 ('-' == p[i] && '+' == p[i + 1]) ||
672 ('+' == p[i] && '|' == p[i + 1]) ||
673 ('|' == p[i] && '+' == p[i + 1])) {
697 * Clean up the last character.
698 * We can get to a newline; don't print that.
706 if (i == (int)len - 1 && '\n' != p[i])
721 format(const struct req *req, const char *file)
729 char opts[MAXPATHLEN + 128];
731 if (-1 == (fd = open(file, O_RDONLY, 0))) {
736 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
737 rc = mparse_readfd(mp, fd, file);
740 if (rc >= MANDOCLEVEL_FATAL) {
745 snprintf(opts, sizeof(opts), "fragment,"
746 "man=%s/search.html?sec=%%S&expr=%%N,"
747 /*"includes=/cgi-bin/man.cgi/usr/include/%%I"*/,
750 mparse_result(mp, &mdoc, &man);
751 if (NULL == man && NULL == mdoc) {
757 resp_begin_html(200, NULL);
758 resp_searchform(req);
760 vp = html_alloc(opts);
775 pg_show(const struct req *req, char *path)
780 char file[MAXPATHLEN];
783 unsigned int vol, rec, mr;
789 /* Parse out mroot, volume, and record from the path. */
791 if (NULL == path || NULL == (sub = strchr(path, '/'))) {
796 if ( ! atou(path, &mr)) {
801 if (NULL == (sub = strchr(path, '/'))) {
806 if ( ! atou(path, &vol) || ! atou(sub, &rec)) {
809 } else if (mr >= (unsigned int)req->psz) {
815 * Begin by chdir()ing into the manroot.
816 * This way we can pick up the database files, which are
817 * relative to the manpath root.
820 if (-1 == chdir(req->p[(int)mr].path)) {
821 perror(req->p[(int)mr].path);
826 memset(&ps, 0, sizeof(struct manpaths));
827 manpath_manconf(&ps, "etc/catman.conf");
829 if (vol >= (unsigned int)ps.sz) {
834 sz = strlcpy(file, ps.paths[vol], MAXPATHLEN);
835 assert(sz < MAXPATHLEN);
836 strlcat(file, "/", MAXPATHLEN);
837 strlcat(file, MANDOC_IDX, MAXPATHLEN);
839 /* Open the index recno(3) database. */
841 idx = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
851 if (0 != (rc = (*idx->get)(idx, &key, &val, 0))) {
852 rc < 0 ? resp_baddb() : resp_error400();
854 } else if (0 == val.size) {
859 cp = (char *)val.data;
862 if (NULL == memchr(cp, '\0', val.size - 1))
865 file[(int)sz] = '\0';
866 strlcat(file, "/", MAXPATHLEN);
867 strlcat(file, cp, MAXPATHLEN);
880 pg_search(const struct req *req, char *path)
885 const char *ep, *start;
891 if (req->q.manroot < 0 || 0 == req->psz) {
892 resp_search(NULL, 0, (void *)req);
896 memset(&opt, 0, sizeof(struct opts));
899 opt.arch = req->q.arch;
900 opt.cat = req->q.sec;
908 * Begin by chdir()ing into the root of the manpath.
909 * This way we can pick up the database files, which are
910 * relative to the manpath root.
913 assert(req->q.manroot < (int)req->psz);
914 if (-1 == (chdir(req->p[req->q.manroot].path))) {
915 perror(req->p[req->q.manroot].path);
916 resp_search(NULL, 0, (void *)req);
920 memset(&ps, 0, sizeof(struct manpaths));
921 manpath_manconf(&ps, "etc/catman.conf");
924 * Poor man's tokenisation: just break apart by spaces.
925 * Yes, this is half-ass. But it works for now.
928 while (ep && isspace((unsigned char)*ep))
931 while (ep && '\0' != *ep) {
932 cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *));
934 while ('\0' != *ep && ! isspace((unsigned char)*ep))
936 cp[sz] = mandoc_malloc((ep - start) + 1);
937 memcpy(cp[sz], start, ep - start);
938 cp[sz++][ep - start] = '\0';
939 while (isspace((unsigned char)*ep))
944 * Pump down into apropos backend.
945 * The resp_search() function is called with the results.
948 expr = req->q.legacy ?
949 termcomp(sz, cp, &tt) : exprcomp(sz, cp, &tt);
953 (ps.sz, ps.paths, &opt, expr, tt,
954 (void *)req, &ressz, &res, resp_search);
956 /* ...unless errors occured. */
961 resp_search(NULL, 0, NULL);
963 for (i = 0; i < sz; i++)
976 char buf[MAXPATHLEN];
979 char *p, *path, *subpath;
981 /* Scan our run-time environment. */
983 if (NULL == (cache = getenv("CACHE_DIR")))
984 cache = "/cache/man.cgi";
986 if (NULL == (progname = getenv("SCRIPT_NAME")))
989 if (NULL == (css = getenv("CSS_DIR")))
992 if (NULL == (host = getenv("HTTP_HOST")))
996 * First we change directory into the cache directory so that
997 * subsequent scanning for manpath directories is rooted
998 * relative to the same position.
1001 if (-1 == chdir(cache)) {
1004 return(EXIT_FAILURE);
1005 } else if (NULL == (cwd = opendir(cache))) {
1008 return(EXIT_FAILURE);
1011 memset(&req, 0, sizeof(struct req));
1013 strlcpy(buf, ".", MAXPATHLEN);
1014 pathgen(cwd, buf, &req);
1017 /* Next parse out the query string. */
1019 if (NULL != (p = getenv("QUERY_STRING")))
1020 http_parse(&req, p);
1023 * Now juggle paths to extract information.
1024 * We want to extract our filetype (the file suffix), the
1025 * initial path component, then the trailing component(s).
1026 * Start with leading subpath component.
1029 subpath = path = NULL;
1030 req.page = PAGE__MAX;
1032 if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path)
1033 req.page = PAGE_INDEX;
1035 if (NULL != path && '/' == *path && '\0' == *++path)
1036 req.page = PAGE_INDEX;
1038 /* Strip file suffix. */
1040 if (NULL != path && NULL != (p = strrchr(path, '.')))
1041 if (NULL != p && NULL == strchr(p, '/'))
1044 /* Resolve subpath component. */
1046 if (NULL != path && NULL != (subpath = strchr(path, '/')))
1049 /* Map path into one we recognise. */
1051 if (NULL != path && '\0' != *path)
1052 for (i = 0; i < (int)PAGE__MAX; i++)
1053 if (0 == strcmp(pages[i], path)) {
1054 req.page = (enum page)i;
1062 pg_index(&req, subpath);
1065 pg_search(&req, subpath);
1068 pg_show(&req, subpath);
1071 resp_error404(path);
1075 for (i = 0; i < (int)req.psz; i++) {
1076 free(req.p[i].path);
1077 free(req.p[i].name);
1081 return(EXIT_SUCCESS);
1085 cmp(const void *p1, const void *p2)
1088 return(strcasecmp(((const struct res *)p1)->title,
1089 ((const struct res *)p2)->title));
1093 * Check to see if an "etc" path consists of a catman.conf file. If it
1094 * does, that means that the path contains a tree created by catman(8)
1095 * and should be used for indexing.
1102 while (NULL != (d = readdir(dir)))
1103 if (DT_REG == d->d_type)
1104 if (0 == strcmp(d->d_name, "catman.conf"))
1111 * Scan for indexable paths.
1112 * This adds all paths with "etc/catman.conf" to the buffer.
1115 pathgen(DIR *dir, char *path, struct req *req)
1123 sz = strlcat(path, "/", MAXPATHLEN);
1124 if (sz >= MAXPATHLEN) {
1125 fprintf(stderr, "%s: Path too long", path);
1130 * First, scan for the "etc" directory.
1131 * If it's found, then see if it should cause us to stop. This
1132 * happens when a catman.conf is found in the directory.
1136 while (0 == rc && NULL != (d = readdir(dir))) {
1137 if (DT_DIR != d->d_type || strcmp(d->d_name, "etc"))
1140 path[(int)sz] = '\0';
1141 ssz = strlcat(path, d->d_name, MAXPATHLEN);
1143 if (ssz >= MAXPATHLEN) {
1144 fprintf(stderr, "%s: Path too long", path);
1146 } else if (NULL == (cd = opendir(path))) {
1156 /* This also strips the trailing slash. */
1157 path[(int)--sz] = '\0';
1158 req->p = mandoc_realloc
1160 (req->psz + 1) * sizeof(struct paths));
1162 * Strip out the leading "./" unless we're just a ".",
1163 * in which case use an empty string as our name.
1165 req->p[(int)req->psz].path = mandoc_strdup(path);
1166 req->p[(int)req->psz].name =
1167 cp = mandoc_strdup(path + (1 == sz ? 1 : 2));
1170 * The name is just the path with all the slashes taken
1171 * out of it. Simple but effective.
1173 for ( ; '\0' != *cp; cp++)
1180 * If no etc/catman.conf was found, recursively enter child
1181 * directory and continue scanning.
1185 while (NULL != (d = readdir(dir))) {
1186 if (DT_DIR != d->d_type || '.' == d->d_name[0])
1189 path[(int)sz] = '\0';
1190 ssz = strlcat(path, d->d_name, MAXPATHLEN);
1192 if (ssz >= MAXPATHLEN) {
1193 fprintf(stderr, "%s: Path too long", path);
1195 } else if (NULL == (cd = opendir(path))) {
1200 pathgen(cd, path, req);