1 /* $Id: makewhatis.c,v 1.2 2011/05/15 02:47:17 kristaps Exp $ */
3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include <sys/param.h>
40 #define MANDOC_DB "mandoc.db"
41 #define MANDOC_IDX "mandoc.index"
42 #define MANDOC_BUFSZ BUFSIZ
43 #define MANDOC_FLAGS O_CREAT|O_TRUNC|O_RDWR
57 #define MAN_ARGS DB *db, \
59 DBT *key, size_t *ksz, \
61 DBT *rval, size_t *rsz, \
62 const struct man_node *n
63 #define MDOC_ARGS DB *db, \
65 DBT *key, size_t *ksz, \
67 DBT *rval, size_t *rsz, \
68 const struct mdoc_node *n
70 static void dbt_append(DBT *, size_t *, const char *);
71 static void dbt_appendb(DBT *, size_t *,
72 const void *, size_t);
73 static void dbt_init(DBT *, size_t *);
74 static void dbt_put(DB *, const char *, DBT *, DBT *);
75 static void usage(void);
76 static void pman(DB *, const char *, DBT *, size_t *,
77 DBT *, DBT *, size_t *, struct man *);
78 static int pman_node(MAN_ARGS);
79 static void pmdoc(DB *, const char *, DBT *, size_t *,
80 DBT *, DBT *, size_t *, struct mdoc *);
81 static void pmdoc_node(MDOC_ARGS);
82 static void pmdoc_An(MDOC_ARGS);
83 static void pmdoc_Cd(MDOC_ARGS);
84 static void pmdoc_Fd(MDOC_ARGS);
85 static void pmdoc_In(MDOC_ARGS);
86 static void pmdoc_Fn(MDOC_ARGS);
87 static void pmdoc_Fo(MDOC_ARGS);
88 static void pmdoc_Nd(MDOC_ARGS);
89 static void pmdoc_Nm(MDOC_ARGS);
90 static void pmdoc_St(MDOC_ARGS);
91 static void pmdoc_Vt(MDOC_ARGS);
93 typedef void (*pmdoc_nf)(MDOC_ARGS);
95 static const char *progname;
97 static const pmdoc_nf mdocs[MDOC_MAX] = {
223 main(int argc, char *argv[])
225 struct mparse *mp; /* parse sequence */
226 struct mdoc *mdoc; /* resulting mdoc */
227 struct man *man; /* resulting man */
228 char *fn; /* current file being parsed */
229 const char *msec, /* manual section */
230 *mtitle, /* manual title */
231 *arch, /* manual architecture */
232 *dir; /* result dir (default: cwd) */
233 char ibuf[MAXPATHLEN], /* index fname */
234 ibbuf[MAXPATHLEN], /* index backup fname */
235 fbuf[MAXPATHLEN], /* btree fname */
236 fbbuf[MAXPATHLEN]; /* btree backup fname */
238 DB *idx, /* index database */
239 *db; /* keyword database */
240 DBT rkey, rval, /* recno entries */
241 key, val; /* persistent keyword entries */
243 ksz, rsz; /* entry buffer size */
244 char vbuf[8]; /* stringified record number */
245 BTREEINFO info; /* btree configuration */
246 recno_t rec; /* current record number */
250 progname = strrchr(argv[0], '/');
251 if (progname == NULL)
258 while (-1 != (ch = getopt(argc, argv, "d:")))
265 return((int)MANDOCLEVEL_BADARG);
272 * Set up temporary file-names into which we're going to write
273 * all of our data (both for the index and database). These
274 * will be securely renamed to the real file-names after we've
275 * written all of our data.
278 ibuf[0] = ibuf[MAXPATHLEN - 2] =
279 ibbuf[0] = ibbuf[MAXPATHLEN - 2] =
280 fbuf[0] = fbuf[MAXPATHLEN - 2] =
281 fbbuf[0] = fbbuf[MAXPATHLEN - 2] = '\0';
283 strlcat(fbuf, dir, MAXPATHLEN);
284 strlcat(fbuf, MANDOC_DB, MAXPATHLEN);
286 strlcat(fbbuf, fbuf, MAXPATHLEN);
287 strlcat(fbbuf, "~", MAXPATHLEN);
289 strlcat(ibuf, dir, MAXPATHLEN);
290 strlcat(ibuf, MANDOC_IDX, MAXPATHLEN);
292 strlcat(ibbuf, ibuf, MAXPATHLEN);
293 strlcat(ibbuf, "~", MAXPATHLEN);
295 if ('\0' != fbuf[MAXPATHLEN - 2] ||
296 '\0' != fbbuf[MAXPATHLEN - 2] ||
297 '\0' != ibuf[MAXPATHLEN - 2] ||
298 '\0' != ibbuf[MAXPATHLEN - 2]) {
299 fprintf(stderr, "%s: Path too long\n", progname);
300 exit((int)MANDOCLEVEL_SYSERR);
304 * For the keyword database, open a BTREE database that allows
305 * duplicates. For the index database, use a standard RECNO
309 memset(&info, 0, sizeof(BTREEINFO));
311 db = dbopen(fbbuf, MANDOC_FLAGS, 0644, DB_BTREE, &info);
315 exit((int)MANDOCLEVEL_SYSERR);
318 idx = dbopen(ibbuf, MANDOC_FLAGS, 0644, DB_RECNO, NULL);
323 exit((int)MANDOCLEVEL_SYSERR);
327 * Try parsing the manuals given on the command line. If we
328 * totally fail, then just keep on going. Take resulting trees
329 * and push them down into the database code.
330 * Use the auto-parser and don't report any errors.
333 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
335 memset(&key, 0, sizeof(DBT));
336 memset(&val, 0, sizeof(DBT));
337 memset(&rkey, 0, sizeof(DBT));
338 memset(&rval, 0, sizeof(DBT));
340 val.size = sizeof(vbuf);
342 rkey.size = sizeof(recno_t);
347 while (NULL != (fn = *argv++)) {
350 /* Parse and get (non-empty) AST. */
352 if (mparse_readfd(mp, -1, fn) >= MANDOCLEVEL_FATAL) {
353 fprintf(stderr, "%s: Parse failure\n", fn);
356 mparse_result(mp, &mdoc, &man);
357 if (NULL == mdoc && NULL == man)
360 /* Manual section: can be empty string. */
362 msec = NULL != mdoc ?
363 mdoc_meta(mdoc)->msec :
365 mtitle = NULL != mdoc ?
366 mdoc_meta(mdoc)->title :
367 man_meta(man)->title;
368 arch = NULL != mdoc ? mdoc_meta(mdoc)->arch : NULL;
374 * The index record value consists of a nil-terminated
375 * filename, a nil-terminated manual section, and a
376 * nil-terminated description. Since the description
377 * may not be set, we set a sentinel to see if we're
378 * going to write a nil byte in its place.
381 dbt_init(&rval, &rsz);
382 dbt_appendb(&rval, &rsz, fn, strlen(fn) + 1);
383 dbt_appendb(&rval, &rsz, msec, strlen(msec) + 1);
384 dbt_appendb(&rval, &rsz, mtitle, strlen(mtitle) + 1);
385 dbt_appendb(&rval, &rsz, arch ? arch : "",
386 arch ? strlen(arch) + 1 : 1);
390 /* Fix the record number in the btree value. */
392 memset(val.data, 0, sizeof(uint32_t));
393 memcpy(val.data + 4, &rec, sizeof(uint32_t));
396 pmdoc(db, fbbuf, &key, &ksz,
397 &val, &rval, &rsz, mdoc);
399 pman(db, fbbuf, &key, &ksz,
400 &val, &rval, &rsz, man);
403 * Apply this to the index. If we haven't had a
404 * description set, put an empty one in now.
408 dbt_appendb(&rval, &rsz, "", 1);
411 dbt_put(idx, ibbuf, &rkey, &rval);
413 printf("Indexed: %s\n", fn);
425 /* Atomically replace the file with our temporary one. */
427 if (-1 == rename(fbbuf, fbuf))
429 if (-1 == rename(ibbuf, ibuf))
432 return((int)MANDOCLEVEL_OK);
436 * Initialise the stored database key whose data buffer is shared
437 * between uses (as the key must sometimes be constructed from an array
441 dbt_init(DBT *key, size_t *ksz)
445 assert(0 == key->size);
446 assert(NULL == key->data);
447 key->data = mandoc_malloc(MANDOC_BUFSZ);
455 * Append a binary value to a database entry. This can be invoked
456 * multiple times; the buffer is automatically resized.
459 dbt_appendb(DBT *key, size_t *ksz, const void *cp, size_t sz)
464 /* Overshoot by MANDOC_BUFSZ. */
466 while (key->size + sz >= *ksz) {
467 *ksz = key->size + sz + MANDOC_BUFSZ;
468 key->data = mandoc_realloc(key->data, *ksz);
471 memcpy(key->data + (int)key->size, cp, sz);
476 * Append a nil-terminated string to the database entry. This can be
477 * invoked multiple times. The database entry will be nil-terminated as
478 * well; if invoked multiple times, a space is put between strings.
481 dbt_append(DBT *key, size_t *ksz, const char *cp)
485 if (0 == (sz = strlen(cp)))
491 ((char *)key->data)[(int)key->size - 1] = ' ';
493 dbt_appendb(key, ksz, cp, sz + 1);
502 if (SEC_AUTHORS != n->sec)
505 for (n = n->child; n; n = n->next)
506 if (MDOC_TEXT == n->type)
507 dbt_append(key, ksz, n->string);
509 fl = (uint32_t)MANDOC_AUTHOR;
510 memcpy(val->data, &fl, 4);
518 const char *start, *end;
521 if (SEC_SYNOPSIS != n->sec)
523 if (NULL == (n = n->child) || MDOC_TEXT != n->type)
527 * Only consider those `Fd' macro fields that begin with an
528 * "inclusion" token (versus, e.g., #define).
530 if (strcmp("#include", n->string))
533 if (NULL == (n = n->next) || MDOC_TEXT != n->type)
537 * Strip away the enclosing angle brackets and make sure we're
542 if ('<' == *start || '"' == *start)
545 if (0 == (sz = strlen(start)))
548 end = &start[(int)sz - 1];
549 if ('>' == *end || '"' == *end)
552 assert(end >= start);
553 dbt_appendb(key, ksz, start, (size_t)(end - start + 1));
554 dbt_appendb(key, ksz, "", 1);
556 fl = (uint32_t)MANDOC_INCLUDES;
557 memcpy(val->data, &fl, 4);
566 if (SEC_SYNOPSIS != n->sec)
569 for (n = n->child; n; n = n->next)
570 if (MDOC_TEXT == n->type)
571 dbt_append(key, ksz, n->string);
573 fl = (uint32_t)MANDOC_CONFIG;
574 memcpy(val->data, &fl, 4);
583 if (SEC_SYNOPSIS != n->sec)
585 if (NULL == n->child || MDOC_TEXT != n->child->type)
588 dbt_append(key, ksz, n->child->string);
589 fl = (uint32_t)MANDOC_INCLUDES;
590 memcpy(val->data, &fl, 4);
600 if (SEC_SYNOPSIS != n->sec)
602 if (NULL == n->child || MDOC_TEXT != n->child->type)
605 /* .Fn "struct type *arg" "foo" */
607 cp = strrchr(n->child->string, ' ');
609 cp = n->child->string;
611 /* Strip away pointer symbol. */
616 dbt_append(key, ksz, cp);
617 fl = (uint32_t)MANDOC_FUNCTION;
618 memcpy(val->data, &fl, 4);
627 if (SEC_STANDARDS != n->sec)
629 if (NULL == n->child || MDOC_TEXT != n->child->type)
632 dbt_append(key, ksz, n->child->string);
633 fl = (uint32_t)MANDOC_STANDARD;
634 memcpy(val->data, &fl, 4);
645 if (SEC_SYNOPSIS != n->sec)
647 if (MDOC_Vt == n->tok && MDOC_BODY != n->type)
649 if (NULL == n->last || MDOC_TEXT != n->last->type)
653 * Strip away leading pointer symbol '*' and trailing ';'.
656 start = n->last->string;
658 while ('*' == *start)
661 if (0 == (sz = strlen(start)))
664 if (';' == start[(int)sz - 1])
670 dbt_appendb(key, ksz, start, sz);
671 dbt_appendb(key, ksz, "", 1);
673 fl = (uint32_t)MANDOC_VARIABLE;
674 memcpy(val->data, &fl, 4);
683 if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
685 if (NULL == n->child || MDOC_TEXT != n->child->type)
688 dbt_append(key, ksz, n->child->string);
689 fl = (uint32_t)MANDOC_FUNCTION;
690 memcpy(val->data, &fl, 4);
700 for (first = 1, n = n->child; n; n = n->next) {
701 if (MDOC_TEXT != n->type)
704 dbt_appendb(rval, rsz, n->string, strlen(n->string) + 1);
706 dbt_append(rval, rsz, n->string);
717 if (SEC_NAME == n->sec) {
718 for (n = n->child; n; n = n->next) {
719 if (MDOC_TEXT != n->type)
721 dbt_append(key, ksz, n->string);
723 fl = (uint32_t)MANDOC_NAME;
724 memcpy(val->data, &fl, 4);
726 } else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
729 for (n = n->child; n; n = n->next) {
730 if (MDOC_TEXT != n->type)
732 dbt_append(key, ksz, n->string);
735 fl = (uint32_t)MANDOC_UTILITY;
736 memcpy(val->data, &fl, 4);
740 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
750 if (0 == (*db->put)(db, key, val, 0))
754 exit((int)MANDOCLEVEL_SYSERR);
759 * Call out to per-macro handlers after clearing the persistent database
760 * key. If the macro sets the database key, flush it to the database.
763 pmdoc_node(MDOC_ARGS)
779 if (NULL == mdocs[n->tok])
784 (*mdocs[n->tok])(db, dbn, key, ksz, val, rval, rsz, n);
785 dbt_put(db, dbn, key, val);
791 pmdoc_node(db, dbn, key, ksz, val, rval, rsz, n->child);
792 pmdoc_node(db, dbn, key, ksz, val, rval, rsz, n->next);
798 const struct man_node *head, *body;
799 const char *start, *sv;
807 * We're only searching for one thing: the first text child in
808 * the BODY of a NAME section. Since we don't keep track of
809 * sections in -man, run some hoops to find out whether we're in
810 * the correct section or not.
813 if (MAN_BODY == n->type && MAN_SH == n->tok) {
815 assert(body->parent);
816 if (NULL != (head = body->parent->head) &&
818 NULL != (head = (head->child)) &&
819 MAN_TEXT == head->type &&
820 0 == strcmp(head->string, "NAME") &&
821 NULL != (body = body->child) &&
822 MAN_TEXT == body->type) {
824 fl = (uint32_t)MANDOC_NAME;
825 memcpy(val->data, &fl, 4);
827 assert(body->string);
828 start = sv = body->string;
831 * Go through a special heuristic dance here.
832 * This is why -man manuals are great!
833 * (I'm being sarcastic: my eyes are bleeding.)
834 * Conventionally, one or more manual names are
835 * comma-specified prior to a whitespace, then a
836 * dash, then a description. Try to puzzle out
837 * the name parts here.
841 sz = strcspn(start, " ,");
842 if ('\0' == start[(int)sz])
846 dbt_appendb(key, ksz, start, sz);
847 dbt_appendb(key, ksz, "", 1);
849 dbt_put(db, dbn, key, val);
851 if (' ' == start[(int)sz]) {
852 start += (int)sz + 1;
856 assert(',' == start[(int)sz]);
857 start += (int)sz + 1;
858 while (' ' == *start)
864 dbt_append(key, ksz, start);
868 while (' ' == *start)
871 if (0 == strncmp(start, "-", 1))
873 else if (0 == strncmp(start, "\\-", 2))
875 else if (0 == strncmp(start, "\\(en", 4))
877 else if (0 == strncmp(start, "\\(em", 4))
880 while (' ' == *start)
883 dbt_appendb(rval, rsz, start, strlen(start) + 1);
887 if (pman_node(db, dbn, key, ksz, val, rval, rsz, n->child))
889 if (pman_node(db, dbn, key, ksz, val, rval, rsz, n->next))
896 pman(DB *db, const char *dbn, DBT *key, size_t *ksz,
897 DBT *val, DBT *rval, size_t *rsz, struct man *m)
900 pman_node(db, dbn, key, ksz, val, rval, rsz, man_node(m));
905 pmdoc(DB *db, const char *dbn, DBT *key, size_t *ksz,
906 DBT *val, DBT *rval, size_t *rsz, struct mdoc *m)
909 pmdoc_node(db, dbn, key, ksz, val, rval, rsz, mdoc_node(m));
916 fprintf(stderr, "usage: %s "