| Commit | Line | Data |
|---|---|---|
| ae4803a1 JS |
1 | /*- |
| 2 | * Copyright (c) 2002 John Rochester | |
| 3 | * All rights reserved. | |
| 4 | * | |
| 5 | * Redistribution and use in source and binary forms, with or without | |
| 6 | * modification, are permitted provided that the following conditions | |
| 7 | * are met: | |
| 8 | * 1. Redistributions of source code must retain the above copyright | |
| 9 | * notice, this list of conditions and the following disclaimer, | |
| 10 | * in this position and unchanged. | |
| 11 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 12 | * notice, this list of conditions and the following disclaimer in the | |
| 13 | * documentation and/or other materials provided with the distribution. | |
| 14 | * 3. The name of the author may not be used to endorse or promote products | |
| 15 | * derived from this software without specific prior written permission | |
| 16 | * | |
| 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | |
| 18 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
| 19 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | |
| 20 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | |
| 21 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
| 22 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 23 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 24 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | |
| 26 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 27 | * | |
| 28 | * $FreeBSD: src/usr.bin/makewhatis/makewhatis.c,v 1.9 2002/09/04 23:29:04 dwmalone Exp $ | |
| ae4803a1 JS |
29 | */ |
| 30 | ||
| 31 | #include <sys/types.h> | |
| 32 | #include <sys/param.h> | |
| 33 | #include <sys/queue.h> | |
| 34 | #include <sys/stat.h> | |
| 35 | ||
| 36 | #include <ctype.h> | |
| 37 | #include <dirent.h> | |
| 38 | #include <err.h> | |
| 39 | #include <stdio.h> | |
| 40 | #include <stdlib.h> | |
| 41 | #include <string.h> | |
| 42 | #include <stringlist.h> | |
| 43 | #include <unistd.h> | |
| 44 | #include <zlib.h> | |
| 45 | ||
| 46 | #define DEFAULT_MANPATH "/usr/share/man" | |
| 47 | #define LINE_ALLOC 4096 | |
| 48 | ||
| 49 | static char blank[] = ""; | |
| 50 | ||
| 51 | /* | |
| 52 | * Information collected about each man page in a section. | |
| 53 | */ | |
| 54 | struct page_info { | |
| 55 | char * filename; | |
| 56 | char * name; | |
| 57 | char * suffix; | |
| 58 | int gzipped; | |
| 59 | ino_t inode; | |
| 60 | }; | |
| 61 | ||
| 62 | /* | |
| 63 | * An entry kept for each visited directory. | |
| 64 | */ | |
| 65 | struct visited_dir { | |
| 66 | dev_t device; | |
| 67 | ino_t inode; | |
| 68 | SLIST_ENTRY(visited_dir) next; | |
| 69 | }; | |
| 70 | ||
| 71 | /* | |
| 72 | * an expanding string | |
| 73 | */ | |
| 74 | struct sbuf { | |
| 75 | char * content; /* the start of the buffer */ | |
| 76 | char * end; /* just past the end of the content */ | |
| 77 | char * last; /* the last allocated character */ | |
| 78 | }; | |
| 79 | ||
| 80 | /* | |
| 81 | * Removes the last amount characters from the sbuf. | |
| 82 | */ | |
| 83 | #define sbuf_retract(sbuf, amount) \ | |
| 84 | ((sbuf)->end -= (amount)) | |
| 85 | /* | |
| 86 | * Returns the length of the sbuf content. | |
| 87 | */ | |
| 88 | #define sbuf_length(sbuf) \ | |
| 89 | ((sbuf)->end - (sbuf)->content) | |
| 90 | ||
| 91 | typedef char *edited_copy(char *from, char *to, int length); | |
| 92 | ||
| 93 | static int append; /* -a flag: append to existing whatis */ | |
| 94 | static int verbose; /* -v flag: be verbose with warnings */ | |
| 95 | static int indent = 24; /* -i option: description indentation */ | |
| 96 | static const char *whatis_name="whatis";/* -n option: the name */ | |
| 97 | static char *common_output; /* -o option: the single output file */ | |
| 98 | static char *locale; /* user's locale if -L is used */ | |
| 99 | static char *lang_locale; /* short form of locale */ | |
| 100 | static const char *machine; | |
| 101 | ||
| 102 | static int exit_code; /* exit code to use when finished */ | |
| 103 | static SLIST_HEAD(, visited_dir) visited_dirs = | |
| 104 | SLIST_HEAD_INITIALIZER(visited_dirs); | |
| 105 | ||
| 106 | /* | |
| 107 | * While the whatis line is being formed, it is stored in whatis_proto. | |
| 108 | * When finished, it is reformatted into whatis_final and then appended | |
| 109 | * to whatis_lines. | |
| 110 | */ | |
| 111 | static struct sbuf *whatis_proto; | |
| 112 | static struct sbuf *whatis_final; | |
| 113 | static StringList *whatis_lines; /* collected output lines */ | |
| 114 | ||
| 115 | static char tmp_file[MAXPATHLEN]; /* path of temporary file, if any */ | |
| 116 | ||
| 117 | /* A set of possible names for the NAME man page section */ | |
| 118 | static const char *name_section_titles[] = { | |
| 119 | "NAME", "Name", "NAMN", "BEZEICHNUNG", "\xcc\xbe\xbe\xce", | |
| 120 | "\xee\xe1\xfa\xf7\xe1\xee\xe9\xe5", NULL | |
| 121 | }; | |
| 122 | ||
| 123 | /* A subset of the mdoc(7) commands to ignore */ | |
| 124 | static char mdoc_commands[] = "ArDvErEvFlLiNmPa"; | |
| 125 | ||
| 126 | /* | |
| 127 | * Frees a struct page_info and its content. | |
| 128 | */ | |
| 129 | static void | |
| 130 | free_page_info(struct page_info *info) | |
| 131 | { | |
| 132 | free(info->filename); | |
| 133 | free(info->name); | |
| 134 | free(info->suffix); | |
| 135 | free(info); | |
| 136 | } | |
| 137 | ||
| 138 | /* | |
| 139 | * Allocates and fills in a new struct page_info given the | |
| 140 | * name of the man section directory and the dirent of the file. | |
| 141 | * If the file is not a man page, returns NULL. | |
| 142 | */ | |
| 143 | static struct page_info * | |
| 144 | new_page_info(char *dir, struct dirent *dirent) | |
| 145 | { | |
| 146 | struct page_info *info; | |
| 147 | int basename_length; | |
| 148 | char *suffix; | |
| 149 | struct stat st; | |
| 150 | ||
| 151 | info = malloc(sizeof(struct page_info)); | |
| 152 | if (info == NULL) | |
| 153 | err(1, "malloc"); | |
| 154 | basename_length = strlen(dirent->d_name); | |
| 155 | suffix = &dirent->d_name[basename_length]; | |
| 156 | asprintf(&info->filename, "%s/%s", dir, dirent->d_name); | |
| 157 | if ((info->gzipped = basename_length >= 4 && | |
| 158 | strcmp(&dirent->d_name[basename_length - 3], ".gz") == 0)) { | |
| 159 | suffix -= 3; | |
| 160 | *suffix = '\0'; | |
| 161 | } | |
| 162 | for (;;) { | |
| 163 | if (--suffix == dirent->d_name || !isalnum(*suffix)) { | |
| 164 | if (*suffix == '.') | |
| 165 | break; | |
| 166 | if (verbose) | |
| 167 | warnx("%s: invalid man page name", | |
| 168 | info->filename); | |
| 169 | free(info->filename); | |
| 170 | free(info); | |
| 171 | return(NULL); | |
| 172 | } | |
| 173 | } | |
| 174 | *suffix++ = '\0'; | |
| 175 | info->name = strdup(dirent->d_name); | |
| 176 | info->suffix = strdup(suffix); | |
| 177 | if (stat(info->filename, &st) < 0) { | |
| 178 | warn("%s", info->filename); | |
| 179 | free_page_info(info); | |
| 180 | return(NULL); | |
| 181 | } | |
| 182 | if (!S_ISREG(st.st_mode)) { | |
| 183 | if (verbose && !S_ISDIR(st.st_mode)) | |
| 184 | warnx("%s: not a regular file", info->filename); | |
| 185 | free_page_info(info); | |
| 186 | return(NULL); | |
| 187 | } | |
| 188 | info->inode = st.st_ino; | |
| 189 | return(info); | |
| 190 | } | |
| 191 | ||
| 192 | /* | |
| 193 | * Reset an sbuf's length to 0. | |
| 194 | */ | |
| 195 | static void | |
| 196 | sbuf_clear(struct sbuf *sbuf) | |
| 197 | { | |
| 198 | sbuf->end = sbuf->content; | |
| 199 | } | |
| 200 | ||
| 201 | /* | |
| 202 | * Allocate a new sbuf. | |
| 203 | */ | |
| 204 | static struct sbuf * | |
| 205 | new_sbuf(void) | |
| 206 | { | |
| 207 | struct sbuf *sbuf = (struct sbuf *) malloc(sizeof(struct sbuf)); | |
| 208 | sbuf->content = malloc(LINE_ALLOC); | |
| 209 | sbuf->last = sbuf->content + LINE_ALLOC - 1; | |
| 210 | sbuf_clear(sbuf); | |
| 211 | return(sbuf); | |
| 212 | } | |
| 213 | ||
| 214 | /* | |
| 215 | * Ensure that there is enough room in the sbuf for nchars more characters. | |
| 216 | */ | |
| 217 | static void | |
| 218 | sbuf_need(struct sbuf *sbuf, int nchars) | |
| 219 | { | |
| 220 | char *new_content; | |
| 221 | size_t size, cntsize; | |
| 222 | ||
| 223 | /* double the size of the allocation until the buffer is big enough */ | |
| 224 | while (sbuf->end + nchars > sbuf->last) { | |
| 225 | size = sbuf->last + 1 - sbuf->content; | |
| 226 | size *= 2; | |
| 227 | cntsize = sbuf->end - sbuf->content; | |
| 228 | ||
| 229 | new_content = malloc(size); | |
| 230 | memcpy(new_content, sbuf->content, cntsize); | |
| 231 | free(sbuf->content); | |
| 232 | sbuf->content = new_content; | |
| 233 | sbuf->end = new_content + cntsize; | |
| 234 | sbuf->last = new_content + size - 1; | |
| 235 | } | |
| 236 | } | |
| 237 | ||
| 238 | /* | |
| 239 | * Appends a string of a given length to the sbuf. | |
| 240 | */ | |
| 241 | static void | |
| 242 | sbuf_append(struct sbuf *sbuf, const char *text, int length) | |
| 243 | { | |
| 244 | if (length > 0) { | |
| 245 | sbuf_need(sbuf, length); | |
| 246 | memcpy(sbuf->end, text, length); | |
| 247 | sbuf->end += length; | |
| 248 | } | |
| 249 | } | |
| 250 | ||
| 251 | /* | |
| 252 | * Appends a null-terminated string to the sbuf. | |
| 253 | */ | |
| 254 | static void | |
| 255 | sbuf_append_str(struct sbuf *sbuf, char *text) | |
| 256 | { | |
| 257 | sbuf_append(sbuf, text, strlen(text)); | |
| 258 | } | |
| 259 | ||
| 260 | /* | |
| 261 | * Appends an edited null-terminated string to the sbuf. | |
| 262 | */ | |
| 263 | static void | |
| 264 | sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy) | |
| 265 | { | |
| 266 | int length = strlen(text); | |
| 267 | if (length > 0) { | |
| 268 | sbuf_need(sbuf, length); | |
| 269 | sbuf->end = copy(text, sbuf->end, length); | |
| 270 | } | |
| 271 | } | |
| 272 | ||
| 273 | /* | |
| 274 | * Strips any of a set of chars from the end of the sbuf. | |
| 275 | */ | |
| 276 | static void | |
| 277 | sbuf_strip(struct sbuf *sbuf, const char *set) | |
| 278 | { | |
| 279 | while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL) | |
| 280 | sbuf->end--; | |
| 281 | } | |
| 282 | ||
| 283 | /* | |
| 284 | * Returns the null-terminated string built by the sbuf. | |
| 285 | */ | |
| 286 | static char * | |
| 287 | sbuf_content(struct sbuf *sbuf) | |
| 288 | { | |
| 289 | *sbuf->end = '\0'; | |
| 290 | return(sbuf->content); | |
| 291 | } | |
| 292 | ||
| 293 | /* | |
| 294 | * Returns true if no man page exists in the directory with | |
| 295 | * any of the names in the StringList. | |
| 296 | */ | |
| 297 | static int | |
| 298 | no_page_exists(char *dir, StringList *names, char *suffix) | |
| 299 | { | |
| 300 | char path[MAXPATHLEN]; | |
| 301 | size_t i; | |
| 302 | ||
| 303 | for (i = 0; i < names->sl_cur; i++) { | |
| 304 | snprintf(path, sizeof path, "%s/%s.%s.gz", dir, names->sl_str[i], suffix); | |
| 305 | if (access(path, F_OK) < 0) { | |
| 306 | path[strlen(path) - 3] = '\0'; | |
| 307 | if (access(path, F_OK) < 0) | |
| 308 | continue; | |
| 309 | } | |
| 310 | return(0); | |
| 311 | } | |
| 312 | return(1); | |
| 313 | } | |
| 314 | ||
| 315 | static void | |
| 316 | trap_signal(int sig __unused) | |
| 317 | { | |
| 318 | if (tmp_file[0] != '\0') | |
| 319 | unlink(tmp_file); | |
| 320 | exit(1); | |
| 321 | } | |
| 322 | ||
| 323 | /* | |
| 324 | * Attempts to open an output file. Returns NULL if unsuccessful. | |
| 325 | */ | |
| 326 | static FILE * | |
| 327 | open_output(char *name) | |
| 328 | { | |
| 329 | FILE *output; | |
| 330 | ||
| 331 | whatis_lines = sl_init(); | |
| 332 | if (append) { | |
| 333 | char line[LINE_ALLOC]; | |
| 334 | ||
| 335 | output = fopen(name, "r"); | |
| 336 | if (output == NULL) { | |
| 337 | warn("%s", name); | |
| 338 | exit_code = 1; | |
| 339 | return(NULL); | |
| 340 | } | |
| 341 | while (fgets(line, sizeof line, output) != NULL) { | |
| 342 | line[strlen(line) - 1] = '\0'; | |
| 343 | sl_add(whatis_lines, strdup(line)); | |
| 344 | } | |
| 345 | } | |
| 346 | if (common_output == NULL) { | |
| 347 | snprintf(tmp_file, sizeof tmp_file, "%s.tmp", name); | |
| 348 | name = tmp_file; | |
| 349 | } | |
| 350 | output = fopen(name, "w"); | |
| 351 | if (output == NULL) { | |
| 352 | warn("%s", name); | |
| 353 | exit_code = 1; | |
| 354 | return(NULL); | |
| 355 | } | |
| 356 | return(output); | |
| 357 | } | |
| 358 | ||
| 359 | static int | |
| 360 | linesort(const void *a, const void *b) | |
| 361 | { | |
| 362 | return(strcmp((*(const char * const *)a), (*(const char * const *)b))); | |
| 363 | } | |
| 364 | ||
| 365 | /* | |
| 366 | * Writes the unique sorted lines to the output file. | |
| 367 | */ | |
| 368 | static void | |
| 369 | finish_output(FILE *output, char *name) | |
| 370 | { | |
| 371 | size_t i; | |
| 372 | char *prev = NULL; | |
| 373 | ||
| 374 | qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof(char *), | |
| 375 | linesort); | |
| 376 | for (i = 0; i < whatis_lines->sl_cur; i++) { | |
| 377 | char *line = whatis_lines->sl_str[i]; | |
| 378 | if (i > 0 && strcmp(line, prev) == 0) | |
| 379 | continue; | |
| 380 | prev = line; | |
| 381 | fputs(line, output); | |
| 382 | putc('\n', output); | |
| 383 | } | |
| 384 | fclose(output); | |
| 385 | sl_free(whatis_lines, 1); | |
| 386 | if (common_output == NULL) { | |
| 387 | rename(tmp_file, name); | |
| 388 | unlink(tmp_file); | |
| 389 | } | |
| 390 | } | |
| 391 | ||
| 392 | static FILE * | |
| 393 | open_whatis(char *mandir) | |
| 394 | { | |
| 395 | char filename[MAXPATHLEN]; | |
| 396 | ||
| 397 | snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name); | |
| 398 | return(open_output(filename)); | |
| 399 | } | |
| 400 | ||
| 401 | static void | |
| 402 | finish_whatis(FILE *output, char *mandir) | |
| 403 | { | |
| 404 | char filename[MAXPATHLEN]; | |
| 405 | ||
| 406 | snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name); | |
| 407 | finish_output(output, filename); | |
| 408 | } | |
| 409 | ||
| 410 | /* | |
| 411 | * Tests to see if the given directory has already been visited. | |
| 412 | */ | |
| 413 | static int | |
| 414 | already_visited(char *dir) | |
| 415 | { | |
| 416 | struct stat st; | |
| 417 | struct visited_dir *visit; | |
| 418 | ||
| 419 | if (stat(dir, &st) < 0) { | |
| 420 | warn("%s", dir); | |
| 421 | exit_code = 1; | |
| 422 | return(1); | |
| 423 | } | |
| 424 | SLIST_FOREACH(visit, &visited_dirs, next) { | |
| 425 | if (visit->inode == st.st_ino && | |
| 426 | visit->device == st.st_dev) { | |
| 427 | warnx("already visited %s", dir); | |
| 428 | return(1); | |
| 429 | } | |
| 430 | } | |
| 431 | visit = (struct visited_dir *) malloc(sizeof(struct visited_dir)); | |
| 432 | visit->device = st.st_dev; | |
| 433 | visit->inode = st.st_ino; | |
| 434 | SLIST_INSERT_HEAD(&visited_dirs, visit, next); | |
| 435 | return(0); | |
| 436 | } | |
| 437 | ||
| 438 | /* | |
| 439 | * Removes trailing spaces from a string, returning a pointer to just | |
| 440 | * beyond the new last character. | |
| 441 | */ | |
| 442 | static char * | |
| 443 | trim_rhs(char *str) | |
| 444 | { | |
| 445 | char *rhs = &str[strlen(str)]; | |
| 446 | while (--rhs > str && isspace(*rhs)) | |
| 447 | ; | |
| 448 | *++rhs = '\0'; | |
| 449 | return(rhs); | |
| 450 | } | |
| 451 | ||
| 452 | /* | |
| 453 | * Returns a pointer to the next non-space character in the string. | |
| 454 | */ | |
| 455 | static char * | |
| 456 | skip_spaces(char *s) | |
| 457 | { | |
| 458 | while (*s != '\0' && isspace(*s)) | |
| 459 | s++; | |
| 460 | return(s); | |
| 461 | } | |
| 462 | ||
| 463 | /* | |
| 464 | * Returns whether the string contains only digits. | |
| 465 | */ | |
| 466 | static int | |
| 467 | only_digits(char *line) | |
| 468 | { | |
| 469 | if (!isdigit(*line++)) | |
| 470 | return(0); | |
| 471 | while (isdigit(*line)) | |
| 472 | line++; | |
| 473 | return(*line == '\0'); | |
| 474 | } | |
| 475 | ||
| 476 | /* | |
| 477 | * Returns whether the line is of one of the forms: | |
| 478 | * .Sh NAME | |
| 479 | * .Sh "NAME" | |
| 480 | * etc. | |
| 481 | * assuming that section_start is ".Sh". | |
| 482 | */ | |
| 483 | static int | |
| 484 | name_section_line(char *line, const char *section_start) | |
| 485 | { | |
| 486 | char *rhs; | |
| 487 | const char **title; | |
| 488 | ||
| 489 | if (strncmp(line, section_start, 3) != 0) | |
| 490 | return(0); | |
| 491 | line = skip_spaces(line + 3); | |
| 492 | rhs = trim_rhs(line); | |
| 493 | if (*line == '"') { | |
| 494 | line++; | |
| 495 | if (*--rhs == '"') | |
| 496 | *rhs = '\0'; | |
| 497 | } | |
| 498 | for (title = name_section_titles; *title != NULL; title++) | |
| 499 | if (strcmp(*title, line) == 0) | |
| 500 | return(1); | |
| 501 | return(0); | |
| 502 | } | |
| 503 | ||
| 504 | /* | |
| 505 | * Copies characters while removing the most common nroff/troff | |
| 506 | * markup: | |
| 507 | * \(em, \(mi, \s[+-N], \& | |
| 508 | * \fF, \f(fo, \f[font] | |
| 509 | * \*s, \*(st, \*[stringvar] | |
| 510 | */ | |
| 511 | static char * | |
| 512 | de_nroff_copy(char *from, char *to, int fromlen) | |
| 513 | { | |
| 514 | char *from_end = &from[fromlen]; | |
| 515 | while (from < from_end) { | |
| 516 | switch (*from) { | |
| 517 | case '\\': | |
| 518 | switch (*++from) { | |
| 519 | case '(': | |
| 520 | if (strncmp(&from[1], "em", 2) == 0 || | |
| 521 | strncmp(&from[1], "mi", 2) == 0) { | |
| 522 | from += 3; | |
| 523 | continue; | |
| 524 | } | |
| 525 | break; | |
| 526 | case 's': | |
| 527 | if (*++from == '-') | |
| 528 | from++; | |
| 529 | while (isdigit(*from)) | |
| 530 | from++; | |
| 531 | continue; | |
| 532 | case 'f': | |
| 533 | case '*': | |
| 534 | if (*++from == '(') | |
| 535 | from += 3; | |
| 536 | else if (*from == '[') { | |
| 537 | while (*++from != ']' && from < from_end) | |
| 538 | ; | |
| 539 | from++; | |
| 540 | } else | |
| 541 | from++; | |
| 542 | continue; | |
| 543 | case '&': | |
| 544 | from++; | |
| 545 | continue; | |
| 546 | } | |
| 547 | break; | |
| 548 | } | |
| 549 | *to++ = *from++; | |
| 550 | } | |
| 551 | return(to); | |
| 552 | } | |
| 553 | ||
| 554 | /* | |
| 555 | * Appends a string with the nroff formatting removed. | |
| 556 | */ | |
| 557 | static void | |
| 558 | add_nroff(char *text) | |
| 559 | { | |
| 560 | sbuf_append_edited(whatis_proto, text, de_nroff_copy); | |
| 561 | } | |
| 562 | ||
| 563 | /* | |
| 564 | * Appends "name(suffix), " to whatis_final. | |
| 565 | */ | |
| 566 | static void | |
| 567 | add_whatis_name(char *name, char *suffix) | |
| 568 | { | |
| 569 | if (*name != '\0') { | |
| 570 | sbuf_append_str(whatis_final, name); | |
| 571 | sbuf_append(whatis_final, "(", 1); | |
| 572 | sbuf_append_str(whatis_final, suffix); | |
| 573 | sbuf_append(whatis_final, "), ", 3); | |
| 574 | } | |
| 575 | } | |
| 576 | ||
| 577 | /* | |
| 578 | * Processes an old-style man(7) line. This ignores commands with only | |
| 579 | * a single number argument. | |
| 580 | */ | |
| 581 | static void | |
| 582 | process_man_line(char *line) | |
| 583 | { | |
| 584 | if (*line == '.') { | |
| 585 | while (isalpha(*++line)) | |
| 586 | ; | |
| 587 | line = skip_spaces(line); | |
| 588 | if (only_digits(line)) | |
| 589 | return; | |
| 590 | } else | |
| 591 | line = skip_spaces(line); | |
| 592 | if (*line != '\0') { | |
| 593 | add_nroff(line); | |
| 594 | sbuf_append(whatis_proto, " ", 1); | |
| 595 | } | |
| 596 | } | |
| 597 | ||
| 598 | /* | |
| 599 | * Processes a new-style mdoc(7) line. | |
| 600 | */ | |
| 601 | static void | |
| 602 | process_mdoc_line(char *line) | |
| 603 | { | |
| 604 | int xref; | |
| 605 | int arg = 0; | |
| 606 | char *line_end = &line[strlen(line)]; | |
| 607 | int orig_length = sbuf_length(whatis_proto); | |
| 608 | char *next; | |
| 609 | ||
| 610 | if (*line == '\0') | |
| 611 | return; | |
| 612 | if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) { | |
| 613 | add_nroff(skip_spaces(line)); | |
| 614 | sbuf_append(whatis_proto, " ", 1); | |
| 615 | return; | |
| 616 | } | |
| 617 | xref = strncmp(line, ".Xr", 3) == 0; | |
| 618 | line += 3; | |
| 619 | while ((line = skip_spaces(line)) < line_end) { | |
| 620 | if (*line == '"') { | |
| 621 | next = ++line; | |
| 622 | for (;;) { | |
| 623 | next = strchr(next, '"'); | |
| 624 | if (next == NULL) | |
| 625 | break; | |
| 626 | memmove(next, next + 1, strlen(next)); | |
| 627 | line_end--; | |
| 628 | if (*next != '"') | |
| 629 | break; | |
| 630 | next++; | |
| 631 | } | |
| 632 | } else | |
| 633 | next = strpbrk(line, " \t"); | |
| 634 | if (next != NULL) | |
| 635 | *next++ = '\0'; | |
| 636 | else | |
| 637 | next = line_end; | |
| 638 | if (isupper(*line) && islower(line[1]) && line[2] == '\0') { | |
| 639 | if (strcmp(line, "Ns") == 0) { | |
| 640 | arg = 0; | |
| 641 | line = next; | |
| 642 | continue; | |
| 643 | } | |
| 644 | if (strstr(mdoc_commands, line) != NULL) { | |
| 645 | line = next; | |
| 646 | continue; | |
| 647 | } | |
| 648 | } | |
| 649 | if (arg > 0 && strchr(",.:;?!)]", *line) == 0) { | |
| 650 | if (xref) { | |
| 651 | sbuf_append(whatis_proto, "(", 1); | |
| 652 | add_nroff(line); | |
| 653 | sbuf_append(whatis_proto, ")", 1); | |
| 654 | xref = 0; | |
| 655 | line = blank; | |
| 656 | } else | |
| 657 | sbuf_append(whatis_proto, " ", 1); | |
| 658 | } | |
| 659 | add_nroff(line); | |
| 660 | arg++; | |
| 661 | line = next; | |
| 662 | } | |
| 663 | if (sbuf_length(whatis_proto) > orig_length) | |
| 664 | sbuf_append(whatis_proto, " ", 1); | |
| 665 | } | |
| 666 | ||
| 667 | /* | |
| 668 | * Collects a list of comma-separated names from the text. | |
| 669 | */ | |
| 670 | static void | |
| 671 | collect_names(StringList *names, char *text) | |
| 672 | { | |
| 673 | char *arg; | |
| 674 | ||
| 675 | for (;;) { | |
| 676 | arg = text; | |
| 677 | text = strchr(text, ','); | |
| 678 | if (text != NULL) | |
| 679 | *text++ = '\0'; | |
| 680 | sl_add(names, arg); | |
| 681 | if (text == NULL) | |
| 682 | return; | |
| 683 | if (*text == ' ') | |
| 684 | text++; | |
| 685 | } | |
| 686 | } | |
| 687 | ||
| 688 | enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC }; | |
| 689 | ||
| 690 | /* | |
| 691 | * Processes a man page source into a single whatis line and adds it | |
| 692 | * to whatis_lines. | |
| 693 | */ | |
| 694 | static void | |
| 695 | process_page(struct page_info *page, char *section_dir) | |
| 696 | { | |
| ab767382 | 697 | gzFile in; |
| ae4803a1 JS |
698 | char buffer[4096]; |
| 699 | char *line; | |
| 700 | StringList *names; | |
| 701 | char *descr; | |
| 702 | int state = STATE_UNKNOWN; | |
| 703 | size_t i; | |
| 704 | ||
| 705 | sbuf_clear(whatis_proto); | |
| 706 | if ((in = gzopen(page->filename, "r")) == NULL) { | |
| 707 | warn("%s", page->filename); | |
| 708 | exit_code = 1; | |
| 709 | return; | |
| 710 | } | |
| 711 | while (gzgets(in, buffer, sizeof buffer) != NULL) { | |
| 712 | line = buffer; | |
| 713 | if (strncmp(line, ".\\\"", 3) == 0) /* ignore comments */ | |
| 714 | continue; | |
| 715 | switch (state) { | |
| 716 | /* | |
| 717 | * haven't reached the NAME section yet. | |
| 718 | */ | |
| 719 | case STATE_UNKNOWN: | |
| 720 | if (name_section_line(line, ".SH")) | |
| 721 | state = STATE_MANSTYLE; | |
| 722 | else if (name_section_line(line, ".Sh")) | |
| 723 | state = STATE_MDOCNAME; | |
| 724 | continue; | |
| 725 | /* | |
| 726 | * Inside an old-style .SH NAME section. | |
| 727 | */ | |
| 728 | case STATE_MANSTYLE: | |
| 729 | if (strncmp(line, ".SH", 3) == 0) | |
| 730 | break; | |
| 731 | trim_rhs(line); | |
| 732 | if (strcmp(line, ".") == 0) | |
| 733 | continue; | |
| 734 | if (strncmp(line, ".IX", 3) == 0) { | |
| 735 | line += 3; | |
| 736 | line = skip_spaces(line); | |
| 737 | } | |
| 738 | process_man_line(line); | |
| 739 | continue; | |
| 740 | /* | |
| 741 | * Inside a new-style .Sh NAME section (the .Nm part). | |
| 742 | */ | |
| 743 | case STATE_MDOCNAME: | |
| 744 | trim_rhs(line); | |
| 745 | if (strncmp(line, ".Nm", 3) == 0) { | |
| 746 | process_mdoc_line(line); | |
| 747 | continue; | |
| 748 | } else { | |
| 749 | if (strcmp(line, ".") == 0) | |
| 750 | continue; | |
| 751 | sbuf_append(whatis_proto, "- ", 2); | |
| 752 | state = STATE_MDOCDESC; | |
| 753 | } | |
| 754 | /* fall through */ | |
| 755 | /* | |
| 756 | * Inside a new-style .Sh NAME section (after the .Nm-s). | |
| 757 | */ | |
| 758 | case STATE_MDOCDESC: | |
| 759 | if (strncmp(line, ".Sh", 3) == 0) | |
| 760 | break; | |
| 761 | trim_rhs(line); | |
| 762 | if (strcmp(line, ".") == 0) | |
| 763 | continue; | |
| 764 | process_mdoc_line(line); | |
| 765 | continue; | |
| 766 | } | |
| 767 | break; | |
| 768 | } | |
| 769 | gzclose(in); | |
| 770 | sbuf_strip(whatis_proto, " \t.-"); | |
| 771 | line = sbuf_content(whatis_proto); | |
| 772 | /* | |
| 773 | * line now contains the appropriate data, but without | |
| 774 | * the proper indentation or the section appended to each name. | |
| 775 | */ | |
| 776 | descr = strstr(line, " - "); | |
| 777 | if (descr == NULL) { | |
| 778 | descr = strchr(line, ' '); | |
| 779 | if (descr == NULL) { | |
| 780 | if (verbose) | |
| 781 | fprintf(stderr, | |
| 782 | "\tignoring junk description \"%s\"\n", | |
| 783 | line); | |
| 784 | return; | |
| 785 | } | |
| 786 | *descr++ = '\0'; | |
| 787 | } else { | |
| 788 | *descr = '\0'; | |
| 789 | descr += 3; | |
| 790 | } | |
| 791 | names = sl_init(); | |
| 792 | collect_names(names, line); | |
| 793 | sbuf_clear(whatis_final); | |
| 794 | if (!sl_find(names, page->name) && | |
| 795 | no_page_exists(section_dir, names, page->suffix)) { | |
| 796 | /* | |
| 797 | * Add the page name since that's the only thing that | |
| 798 | * man(1) will find. | |
| 799 | */ | |
| 800 | add_whatis_name(page->name, page->suffix); | |
| 801 | } | |
| 802 | for (i = 0; i < names->sl_cur; i++) | |
| 803 | add_whatis_name(names->sl_str[i], page->suffix); | |
| 804 | sl_free(names, 0); | |
| 805 | sbuf_retract(whatis_final, 2); /* remove last ", " */ | |
| 806 | while (sbuf_length(whatis_final) < indent) | |
| 807 | sbuf_append(whatis_final, " ", 1); | |
| 808 | sbuf_append(whatis_final, " - ", 3); | |
| 809 | sbuf_append_str(whatis_final, skip_spaces(descr)); | |
| 810 | sl_add(whatis_lines, strdup(sbuf_content(whatis_final))); | |
| 811 | } | |
| 812 | ||
| 813 | /* | |
| 814 | * Sorts pages first by inode number, then by name. | |
| 815 | */ | |
| 816 | static int | |
| 817 | pagesort(const void *a, const void *b) | |
| 818 | { | |
| 7ac00cc9 CP |
819 | const struct page_info *p1 = *(const struct page_info * const *)a; |
| 820 | const struct page_info *p2 = *(const struct page_info * const *)b; | |
| ae4803a1 JS |
821 | if (p1->inode == p2->inode) |
| 822 | return(strcmp(p1->name, p2->name)); | |
| 823 | return(p1->inode - p2->inode); | |
| 824 | } | |
| 825 | ||
| 826 | /* | |
| 827 | * Processes a single man section. | |
| 828 | */ | |
| 829 | static void | |
| 830 | process_section(char *section_dir) | |
| 831 | { | |
| 832 | struct dirent **entries; | |
| 833 | int nentries; | |
| 834 | struct page_info **pages; | |
| 835 | int npages = 0; | |
| 836 | int i; | |
| 837 | ino_t prev_inode = 0; | |
| 838 | ||
| 839 | if (verbose) | |
| 840 | fprintf(stderr, " %s\n", section_dir); | |
| 841 | ||
| 842 | /* | |
| 843 | * scan the man section directory for pages | |
| 844 | */ | |
| 845 | nentries = scandir(section_dir, &entries, NULL, alphasort); | |
| 846 | if (nentries < 0) { | |
| 847 | warn("%s", section_dir); | |
| 848 | exit_code = 1; | |
| 849 | return; | |
| 850 | } | |
| 851 | /* | |
| 852 | * collect information about man pages | |
| 853 | */ | |
| 854 | pages = calloc(nentries, sizeof(struct page_info *)); | |
| 855 | for (i = 0; i < nentries; i++) { | |
| 856 | struct page_info *info = new_page_info(section_dir, entries[i]); | |
| 857 | if (info != NULL) | |
| 858 | pages[npages++] = info; | |
| 859 | free(entries[i]); | |
| 860 | } | |
| 861 | free(entries); | |
| 862 | qsort(pages, npages, sizeof(struct page_info *), pagesort); | |
| 863 | /* | |
| 864 | * process each unique page | |
| 865 | */ | |
| 866 | for (i = 0; i < npages; i++) { | |
| 867 | struct page_info *page = pages[i]; | |
| 868 | if (page->inode != prev_inode) { | |
| 869 | prev_inode = page->inode; | |
| 870 | if (verbose) | |
| 871 | fprintf(stderr, "\treading %s\n", | |
| 872 | page->filename); | |
| 873 | process_page(page, section_dir); | |
| 874 | } else if (verbose) | |
| 875 | fprintf(stderr, "\tskipping %s, duplicate\n", | |
| 876 | page->filename); | |
| 877 | free_page_info(page); | |
| 878 | } | |
| 879 | free(pages); | |
| 880 | } | |
| 881 | ||
| 882 | /* | |
| 883 | * Returns whether the directory entry is a man page section. | |
| 884 | */ | |
| 885 | static int | |
| edacbda6 | 886 | select_sections(const struct dirent *entry) |
| ae4803a1 | 887 | { |
| edacbda6 | 888 | const char *p = &entry->d_name[3]; |
| ae4803a1 JS |
889 | |
| 890 | if (strncmp(entry->d_name, "man", 3) != 0) | |
| 891 | return(0); | |
| 892 | while (*p != '\0') { | |
| 893 | if (!isalnum(*p++)) | |
| 894 | return(0); | |
| 895 | } | |
| 896 | return(1); | |
| 897 | } | |
| 898 | ||
| 899 | /* | |
| 900 | * Processes a single top-level man directory by finding all the | |
| 901 | * sub-directories named man* and processing each one in turn. | |
| 902 | */ | |
| 903 | static void | |
| 904 | process_mandir(char *dir_name) | |
| 905 | { | |
| 906 | struct dirent **entries; | |
| 907 | int nsections; | |
| 908 | FILE *fp = NULL; | |
| 909 | int i; | |
| 910 | struct stat st; | |
| 911 | ||
| 912 | if (already_visited(dir_name)) | |
| 913 | return; | |
| 914 | if (verbose) | |
| 915 | fprintf(stderr, "man directory %s\n", dir_name); | |
| 916 | nsections = scandir(dir_name, &entries, select_sections, alphasort); | |
| 917 | if (nsections < 0) { | |
| 918 | warn("%s", dir_name); | |
| 919 | exit_code = 1; | |
| 920 | return; | |
| 921 | } | |
| 922 | if (common_output == NULL && (fp = open_whatis(dir_name)) == NULL) | |
| 923 | return; | |
| 924 | for (i = 0; i < nsections; i++) { | |
| 925 | char section_dir[MAXPATHLEN]; | |
| 926 | snprintf(section_dir, sizeof section_dir, "%s/%s", dir_name, | |
| 927 | entries[i]->d_name); | |
| 928 | process_section(section_dir); | |
| 929 | snprintf(section_dir, sizeof section_dir, "%s/%s/%s", dir_name, | |
| 930 | entries[i]->d_name, machine); | |
| 931 | if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode)) | |
| 932 | process_section(section_dir); | |
| 933 | free(entries[i]); | |
| 934 | } | |
| 935 | free(entries); | |
| 936 | if (common_output == NULL) | |
| 937 | finish_whatis(fp, dir_name); | |
| 938 | } | |
| 939 | ||
| 940 | /* | |
| 941 | * Processes one argument, which may be a colon-separated list of | |
| 942 | * directories. | |
| 943 | */ | |
| 944 | static void | |
| 945 | process_argument(const char *arg) | |
| 946 | { | |
| 947 | char *dir; | |
| 948 | char *mandir; | |
| 949 | char *parg; | |
| 950 | ||
| 951 | parg = strdup(arg); | |
| 952 | if (parg == NULL) | |
| 953 | err(1, "out of memory"); | |
| 954 | while ((dir = strsep(&parg, ":")) != NULL) { | |
| 955 | if (locale != NULL) { | |
| 956 | asprintf(&mandir, "%s/%s", dir, locale); | |
| 957 | process_mandir(mandir); | |
| 958 | free(mandir); | |
| 959 | if (lang_locale != NULL) { | |
| 960 | asprintf(&mandir, "%s/%s", dir, lang_locale); | |
| 961 | process_mandir(mandir); | |
| 962 | free(mandir); | |
| 963 | } | |
| 964 | } else { | |
| 965 | process_mandir(dir); | |
| 966 | } | |
| 967 | } | |
| 968 | free(parg); | |
| 969 | } | |
| 970 | ||
| 971 | ||
| 972 | int | |
| 973 | main(int argc, char **argv) | |
| 974 | { | |
| 975 | int opt; | |
| 976 | FILE *fp = NULL; | |
| 977 | ||
| 978 | while ((opt = getopt(argc, argv, "ai:n:o:vL")) != -1) { | |
| 979 | switch (opt) { | |
| 980 | case 'a': | |
| 981 | append++; | |
| 982 | break; | |
| 983 | case 'i': | |
| 984 | indent = atoi(optarg); | |
| 985 | break; | |
| 986 | case 'n': | |
| 987 | whatis_name = optarg; | |
| 988 | break; | |
| 989 | case 'o': | |
| 990 | common_output = optarg; | |
| 991 | break; | |
| 992 | case 'v': | |
| 993 | verbose++; | |
| 994 | break; | |
| 995 | case 'L': | |
| 996 | locale = getenv("LC_ALL"); | |
| 997 | if (locale == NULL) | |
| 998 | locale = getenv("LC_CTYPE"); | |
| 999 | if (locale == NULL) | |
| 1000 | locale = getenv("LANG"); | |
| 1001 | if (locale != NULL) { | |
| 1002 | char *sep = strchr(locale, '_'); | |
| 1003 | if (sep != NULL && isupper(sep[1]) && | |
| 1004 | isupper(sep[2])) { | |
| b8d548b2 SW |
1005 | asprintf(&lang_locale, "%.*s%s", |
| 1006 | (int)(sep - locale), | |
| 1007 | locale, &sep[3]); | |
| ae4803a1 JS |
1008 | } |
| 1009 | } | |
| 1010 | break; | |
| 1011 | default: | |
| 1012 | fprintf(stderr, "usage: %s [-a] [-i indent] [-n name] [-o output_file] [-v] [-L] [directories...]\n", argv[0]); | |
| 1013 | exit(1); | |
| 1014 | } | |
| 1015 | } | |
| 1016 | ||
| 1017 | signal(SIGINT, trap_signal); | |
| 1018 | signal(SIGHUP, trap_signal); | |
| 1019 | signal(SIGQUIT, trap_signal); | |
| 1020 | signal(SIGTERM, trap_signal); | |
| 1021 | SLIST_INIT(&visited_dirs); | |
| 1022 | whatis_proto = new_sbuf(); | |
| 1023 | whatis_final = new_sbuf(); | |
| 1024 | ||
| 1025 | if ((machine = getenv("MACHINE")) == NULL) | |
| 1026 | machine = MACHINE; | |
| 1027 | ||
| 1028 | if (common_output != NULL && (fp = open_output(common_output)) == NULL) | |
| 1029 | err(1, "%s", common_output); | |
| 1030 | if (optind == argc) { | |
| 1031 | const char *manpath = getenv("MANPATH"); | |
| 1032 | if (manpath == NULL) | |
| 1033 | manpath = DEFAULT_MANPATH; | |
| 1034 | process_argument(manpath); | |
| 1035 | } else { | |
| 1036 | while (optind < argc) | |
| 1037 | process_argument(argv[optind++]); | |
| 1038 | } | |
| 1039 | if (common_output != NULL) | |
| 1040 | finish_output(fp, common_output); | |
| 1041 | exit(exit_code); | |
| 1042 | } |