2 * Copyright (c) 2002 John Rochester
3 * Copyright (c) 2013 Franco Fichtner <franco@lastsummer.de>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer,
11 * in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 * $FreeBSD: src/usr.bin/makewhatis/makewhatis.c,v 1.9 2002/09/04 23:29:04 dwmalone Exp $
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/queue.h>
44 #include <stringlist.h>
48 #define DEFAULT_MANPATH "/usr/share/man"
49 #define LINE_ALLOC 4096
51 static char blank[] = "";
54 * Information collected about each man page alias.
57 RB_ENTRY(page_alias) entry;
65 * Information collected about each unique man page.
68 RB_HEAD(page_alias_tree, page_alias) head;
69 RB_ENTRY(page_info) entry;
73 static RB_HEAD(page_info_tree, page_info) page_head = RB_INITIALIZER(&page_head);
76 * Sorts page info by inode number.
79 infosort(const struct page_info *a, const struct page_info *b)
81 return (memcmp(&a->inode, &b->inode, sizeof(a->inode)));
84 RB_PROTOTYPE(page_info_tree, page_info, entry, infosort);
85 RB_GENERATE(page_info_tree, page_info, entry, infosort);
88 * Sorts page alias first by suffix, then name.
91 aliassort(const struct page_alias *a, const struct page_alias *b)
93 int ret = strcmp(a->suffix, b->suffix);
98 return (strcmp(a->name, b->name));
101 RB_PROTOTYPE(page_alias_tree, page_alias, entry, aliassort);
102 RB_GENERATE(page_alias_tree, page_alias, entry, aliassort);
105 * An entry kept for each visited directory.
110 SLIST_ENTRY(visited_dir) next;
114 * an expanding string
117 char * content; /* the start of the buffer */
118 char * end; /* just past the end of the content */
119 char * last; /* the last allocated character */
123 * Removes the last amount characters from the sbuf.
125 #define sbuf_retract(sbuf, amount) \
126 ((sbuf)->end -= (amount))
128 * Returns the length of the sbuf content.
130 #define sbuf_length(sbuf) \
131 ((sbuf)->end - (sbuf)->content)
133 typedef char *edited_copy(char *from, char *to, int length);
135 static int append; /* -a flag: append to existing whatis */
136 static int verbose; /* -v flag: be verbose with warnings */
137 static int indent = 24; /* -i option: description indentation */
138 static const char *whatis_name="whatis";/* -n option: the name */
139 static char *common_output; /* -o option: the single output file */
140 static char *locale; /* user's locale if -L is used */
141 static char *lang_locale; /* short form of locale */
142 static const char *machine;
144 static int exit_code; /* exit code to use when finished */
145 static SLIST_HEAD(, visited_dir) visited_dirs =
146 SLIST_HEAD_INITIALIZER(visited_dirs);
149 * While the whatis line is being formed, it is stored in whatis_proto.
150 * When finished, it is reformatted into whatis_final and then appended
153 static struct sbuf *whatis_proto;
154 static struct sbuf *whatis_final;
155 static StringList *whatis_lines; /* collected output lines */
157 static char tmp_file[MAXPATHLEN]; /* path of temporary file, if any */
159 /* A set of possible names for the NAME man page section */
160 static const char *name_section_titles[] = {
161 "NAME", "Name", "NAMN", "BEZEICHNUNG", "\xcc\xbe\xbe\xce",
162 "\xee\xe1\xfa\xf7\xe1\xee\xe9\xe5", NULL
165 /* A subset of the mdoc(7) commands to ignore */
166 static char mdoc_commands[] = "ArDvErEvFlLiNmPa";
169 * Frees a struct page_info and its content.
172 free_page_info(struct page_info *info)
174 struct page_alias *alias;
176 while ((alias = RB_ROOT(&info->head))) {
177 RB_REMOVE(page_alias_tree, &info->head, alias);
178 free(alias->filename);
188 * Allocates and fills in a new struct page_alias given the
189 * full file name of the man page and its dirent.
190 * If the file is not a man page, nothing is added.
193 new_page_alias(struct page_info *info, char *filename, struct dirent *dirent)
195 int gzipped, basename_length;
196 struct page_alias *alias;
199 basename_length = strlen(dirent->d_name);
200 suffix = &dirent->d_name[basename_length];
202 gzipped = basename_length >= 4 &&
203 strcmp(&dirent->d_name[basename_length - 3], ".gz") == 0;
210 if (--suffix == dirent->d_name || !isalnum(*suffix)) {
211 if (*suffix == '.') {
215 warnx("%s: invalid man page name", filename);
223 alias = malloc(sizeof(*alias));
228 alias->name = strdup(dirent->d_name); /* XXX unsafe */
229 alias->filename = strdup(filename); /* XXX unsafe */
230 alias->suffix = strdup(suffix); /* XXX unsafe */
231 alias->gzipped = gzipped;
233 RB_INSERT(page_alias_tree, &info->head, alias);
237 * Reset an sbuf's length to 0.
240 sbuf_clear(struct sbuf *sbuf)
242 sbuf->end = sbuf->content;
246 * Allocate a new sbuf.
251 struct sbuf *sbuf = (struct sbuf *) malloc(sizeof(struct sbuf));
252 sbuf->content = malloc(LINE_ALLOC);
253 sbuf->last = sbuf->content + LINE_ALLOC - 1;
259 * Ensure that there is enough room in the sbuf for nchars more characters.
262 sbuf_need(struct sbuf *sbuf, int nchars)
265 size_t size, cntsize;
267 /* double the size of the allocation until the buffer is big enough */
268 while (sbuf->end + nchars > sbuf->last) {
269 size = sbuf->last + 1 - sbuf->content;
271 cntsize = sbuf->end - sbuf->content;
273 new_content = malloc(size);
274 memcpy(new_content, sbuf->content, cntsize);
276 sbuf->content = new_content;
277 sbuf->end = new_content + cntsize;
278 sbuf->last = new_content + size - 1;
283 * Appends a string of a given length to the sbuf.
286 sbuf_append(struct sbuf *sbuf, const char *text, int length)
289 sbuf_need(sbuf, length);
290 memcpy(sbuf->end, text, length);
296 * Appends a null-terminated string to the sbuf.
299 sbuf_append_str(struct sbuf *sbuf, char *text)
301 sbuf_append(sbuf, text, strlen(text));
305 * Appends an edited null-terminated string to the sbuf.
308 sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy)
310 int length = strlen(text);
312 sbuf_need(sbuf, length);
313 sbuf->end = copy(text, sbuf->end, length);
318 * Strips any of a set of chars from the end of the sbuf.
321 sbuf_strip(struct sbuf *sbuf, const char *set)
323 while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL)
328 * Returns the null-terminated string built by the sbuf.
331 sbuf_content(struct sbuf *sbuf)
334 return(sbuf->content);
338 trap_signal(int sig __unused)
340 if (tmp_file[0] != '\0')
346 * Attempts to open an output file. Returns NULL if unsuccessful.
349 open_output(char *name)
353 whatis_lines = sl_init();
355 char line[LINE_ALLOC];
357 output = fopen(name, "r");
358 if (output == NULL) {
363 while (fgets(line, sizeof line, output) != NULL) {
364 line[strlen(line) - 1] = '\0';
365 sl_add(whatis_lines, strdup(line));
368 if (common_output == NULL) {
369 snprintf(tmp_file, sizeof tmp_file, "%s.tmp", name);
372 output = fopen(name, "w");
373 if (output == NULL) {
382 linesort(const void *a, const void *b)
384 return(strcmp((*(const char * const *)a), (*(const char * const *)b)));
388 * Writes the unique sorted lines to the output file.
391 finish_output(FILE *output, char *name)
396 qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof(char *),
398 for (i = 0; i < whatis_lines->sl_cur; i++) {
399 char *line = whatis_lines->sl_str[i];
400 if (i > 0 && strcmp(line, prev) == 0)
407 sl_free(whatis_lines, 1);
408 if (common_output == NULL) {
409 rename(tmp_file, name);
415 open_whatis(char *mandir)
417 char filename[MAXPATHLEN];
419 snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
420 return(open_output(filename));
424 finish_whatis(FILE *output, char *mandir)
426 char filename[MAXPATHLEN];
428 snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
429 finish_output(output, filename);
433 * Tests to see if the given directory has already been visited.
436 already_visited(char *dir)
439 struct visited_dir *visit;
441 if (stat(dir, &st) < 0) {
446 SLIST_FOREACH(visit, &visited_dirs, next) {
447 if (visit->inode == st.st_ino &&
448 visit->device == st.st_dev) {
449 warnx("already visited %s", dir);
453 visit = (struct visited_dir *) malloc(sizeof(struct visited_dir));
454 visit->device = st.st_dev;
455 visit->inode = st.st_ino;
456 SLIST_INSERT_HEAD(&visited_dirs, visit, next);
461 * Removes trailing spaces from a string, returning a pointer to just
462 * beyond the new last character.
467 char *rhs = &str[strlen(str)];
468 while (--rhs > str && isspace(*rhs))
475 * Returns a pointer to the next non-space character in the string.
480 while (*s != '\0' && isspace(*s))
486 * Returns whether the string contains only digits.
489 only_digits(char *line)
491 if (!isdigit(*line++))
493 while (isdigit(*line))
495 return(*line == '\0');
499 * Returns whether the line is of one of the forms:
503 * assuming that section_start is ".Sh".
506 name_section_line(char *line, const char *section_start)
511 if (strncmp(line, section_start, 3) != 0)
513 line = skip_spaces(line + 3);
514 rhs = trim_rhs(line);
520 for (title = name_section_titles; *title != NULL; title++)
521 if (strcmp(*title, line) == 0)
527 * Copies characters while removing the most common nroff/troff
529 * \(em, \(mi, \s[+-N], \&
530 * \fF, \f(fo, \f[font]
531 * \*s, \*(st, \*[stringvar]
534 de_nroff_copy(char *from, char *to, int fromlen)
536 char *from_end = &from[fromlen];
537 while (from < from_end) {
542 if (strncmp(&from[1], "em", 2) == 0 ||
543 strncmp(&from[1], "mi", 2) == 0) {
551 while (isdigit(*from))
558 else if (*from == '[') {
559 while (*++from != ']' && from < from_end)
577 * Appends a string with the nroff formatting removed.
580 add_nroff(char *text)
582 sbuf_append_edited(whatis_proto, text, de_nroff_copy);
586 * Appends "name(suffix), " to whatis_final.
589 add_whatis_name(char *name, char *suffix)
592 sbuf_append_str(whatis_final, name);
593 sbuf_append(whatis_final, "(", 1);
594 sbuf_append_str(whatis_final, suffix);
595 sbuf_append(whatis_final, "), ", 3);
600 * Processes an old-style man(7) line. This ignores commands with only
601 * a single number argument.
604 process_man_line(char *line)
607 while (isalpha(*++line))
609 line = skip_spaces(line);
610 if (only_digits(line))
613 line = skip_spaces(line);
616 sbuf_append(whatis_proto, " ", 1);
621 * Processes a new-style mdoc(7) line.
624 process_mdoc_line(char *line)
628 char *line_end = &line[strlen(line)];
629 int orig_length = sbuf_length(whatis_proto);
634 if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) {
635 add_nroff(skip_spaces(line));
636 sbuf_append(whatis_proto, " ", 1);
639 xref = strncmp(line, ".Xr", 3) == 0;
641 while ((line = skip_spaces(line)) < line_end) {
645 next = strchr(next, '"');
648 memmove(next, next + 1, strlen(next));
655 next = strpbrk(line, " \t");
660 if (isupper(*line) && islower(line[1]) && line[2] == '\0') {
661 if (strcmp(line, "Ns") == 0) {
666 if (strstr(mdoc_commands, line) != NULL) {
671 if (arg > 0 && strchr(",.:;?!)]", *line) == 0) {
673 sbuf_append(whatis_proto, "(", 1);
675 sbuf_append(whatis_proto, ")", 1);
679 sbuf_append(whatis_proto, " ", 1);
685 if (sbuf_length(whatis_proto) > orig_length)
686 sbuf_append(whatis_proto, " ", 1);
689 enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC };
692 * Processes a man page source into a single whatis line and adds it
696 process_page(struct page_info *info)
698 int state = STATE_UNKNOWN;
699 struct page_alias *alias;
705 * Only read the page once for each inode. It's
706 * safe to assume that page->list is set.
708 alias = RB_MIN(page_alias_tree, &info->head);
711 fprintf(stderr, "\treading %s\n", alias->filename);
714 sbuf_clear(whatis_proto);
715 if ((in = gzopen(alias->filename, "r")) == NULL) {
716 warn("%s", alias->filename);
720 while (gzgets(in, buffer, sizeof(buffer)) != NULL) {
722 if (strncmp(line, ".\\\"", 3) == 0) /* ignore comments */
726 * haven't reached the NAME section yet.
729 if (name_section_line(line, ".SH"))
730 state = STATE_MANSTYLE;
731 else if (name_section_line(line, ".Sh"))
732 state = STATE_MDOCNAME;
735 * Inside an old-style .SH NAME section.
738 if (strncmp(line, ".SH", 3) == 0)
740 if (strncmp(line, ".SS", 3) == 0)
743 if (strcmp(line, ".") == 0)
745 if (strncmp(line, ".IX", 3) == 0) {
747 line = skip_spaces(line);
749 process_man_line(line);
752 * Inside a new-style .Sh NAME section (the .Nm part).
756 if (strncmp(line, ".Nm", 3) == 0) {
757 process_mdoc_line(line);
760 if (strcmp(line, ".") == 0)
762 sbuf_append(whatis_proto, "- ", 2);
763 state = STATE_MDOCDESC;
767 * Inside a new-style .Sh NAME section (after the .Nm-s).
770 if (strncmp(line, ".Sh", 3) == 0)
773 if (strcmp(line, ".") == 0)
775 process_mdoc_line(line);
781 sbuf_strip(whatis_proto, " \t.-");
782 line = sbuf_content(whatis_proto);
784 * line now contains the appropriate data, but without
785 * the proper indentation or the section appended to each name.
787 descr = strstr(line, " - ");
789 descr = strchr(line, ' ');
793 "\tignoring junk description \"%s\"\n",
802 sbuf_clear(whatis_final);
803 RB_FOREACH(alias, page_alias_tree, &info->head) {
805 * This won't append names stored in `line'.
806 * The reason for that is that we cannot be sure
807 * which section they belong to unless we have
808 * a real alias (via MLINKS) in this list.
810 add_whatis_name(alias->name, alias->suffix);
812 sbuf_retract(whatis_final, 2); /* remove last ", " */
813 while (sbuf_length(whatis_final) < indent)
814 sbuf_append(whatis_final, " ", 1);
815 sbuf_append(whatis_final, " - ", 3);
816 sbuf_append_str(whatis_final, skip_spaces(descr));
817 sl_add(whatis_lines, strdup(sbuf_content(whatis_final)));
821 * Processes a single man section.
824 process_section(char *section_dir)
826 struct dirent **entries;
827 struct page_info *info;
832 fprintf(stderr, " %s\n", section_dir);
836 * scan the man section directory for pages
838 nentries = scandir(section_dir, &entries, NULL, alphasort);
840 warn("%s", section_dir);
846 * collect information about man pages
848 for (i = 0; i < nentries; i++) {
849 struct page_info ref;
853 if (asprintf(&filename, "%s/%s", section_dir,
854 entries[i]->d_name) < 0) {
858 if (stat(filename, &st) < 0) {
859 warn("%s", filename);
860 goto process_section_next;
863 if (!S_ISREG(st.st_mode)) {
864 if (verbose && !S_ISDIR(st.st_mode))
865 warnx("%s: not a regular file", filename);
866 goto process_section_next;
869 ref.inode = st.st_ino;
871 info = RB_FIND(page_info_tree, &page_head, &ref);
873 info = malloc(sizeof(*info));
878 bzero(info, sizeof(*info));
879 info->inode = st.st_ino;
880 RB_INIT(&info->head);
882 RB_INSERT(page_info_tree, &page_head, info);
885 new_page_alias(info, filename, entries[i]);
887 process_section_next:
896 * Returns whether the directory entry is a man page section.
899 select_sections(const struct dirent *entry)
901 const char *p = &entry->d_name[3];
903 if (strncmp(entry->d_name, "man", 3) != 0)
913 * Processes a single top-level man directory by finding all the
914 * sub-directories named man* and processing each one in turn.
917 process_mandir(char *dir_name)
919 struct dirent **entries;
920 struct page_info *info;
926 if (already_visited(dir_name))
929 fprintf(stderr, "man directory %s\n", dir_name);
930 nsections = scandir(dir_name, &entries, select_sections, alphasort);
932 warn("%s", dir_name);
936 if (common_output == NULL && (fp = open_whatis(dir_name)) == NULL)
938 for (i = 0; i < nsections; i++) {
939 char section_dir[MAXPATHLEN];
940 snprintf(section_dir, sizeof section_dir, "%s/%s", dir_name,
942 process_section(section_dir);
943 snprintf(section_dir, sizeof section_dir, "%s/%s/%s", dir_name,
944 entries[i]->d_name, machine);
945 if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode))
946 process_section(section_dir);
952 * process and free all pages
954 while ((info = RB_ROOT(&page_head))) {
955 RB_REMOVE(page_info_tree, &page_head, info);
957 free_page_info(info);
960 if (common_output == NULL)
961 finish_whatis(fp, dir_name);
965 * Processes one argument, which may be a colon-separated list of
969 process_argument(const char *arg)
977 err(1, "out of memory");
978 while ((dir = strsep(&parg, ":")) != NULL) {
979 if (locale != NULL) {
980 asprintf(&mandir, "%s/%s", dir, locale);
981 process_mandir(mandir);
983 if (lang_locale != NULL) {
984 asprintf(&mandir, "%s/%s", dir, lang_locale);
985 process_mandir(mandir);
997 main(int argc, char **argv)
1002 while ((opt = getopt(argc, argv, "ai:n:o:vL")) != -1) {
1008 indent = atoi(optarg);
1011 whatis_name = optarg;
1014 common_output = optarg;
1020 locale = getenv("LC_ALL");
1022 locale = getenv("LC_CTYPE");
1024 locale = getenv("LANG");
1025 if (locale != NULL) {
1026 char *sep = strchr(locale, '_');
1027 if (sep != NULL && isupper(sep[1]) &&
1029 asprintf(&lang_locale, "%.*s%s",
1030 (int)(sep - locale),
1036 fprintf(stderr, "usage: %s [-a] [-i indent] [-n name] [-o output_file] [-v] [-L] [directories...]\n", argv[0]);
1041 signal(SIGINT, trap_signal);
1042 signal(SIGHUP, trap_signal);
1043 signal(SIGQUIT, trap_signal);
1044 signal(SIGTERM, trap_signal);
1045 SLIST_INIT(&visited_dirs);
1046 whatis_proto = new_sbuf();
1047 whatis_final = new_sbuf();
1049 if ((machine = getenv("MACHINE")) == NULL)
1052 if (common_output != NULL && (fp = open_output(common_output)) == NULL)
1053 err(1, "%s", common_output);
1054 if (optind == argc) {
1055 const char *manpath = getenv("MANPATH");
1056 if (manpath == NULL)
1057 manpath = DEFAULT_MANPATH;
1058 process_argument(manpath);
1060 while (optind < argc)
1061 process_argument(argv[optind++]);
1063 if (common_output != NULL)
1064 finish_output(fp, common_output);