| Commit | Line | Data |
|---|---|---|
| 984263bc MD |
1 | /* |
| 2 | * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. | |
| 3 | * Copyright (c) 1989, 1993 | |
| 4 | * The Regents of the University of California. All rights reserved. | |
| 5 | * | |
| 6 | * This code is derived from software contributed to Berkeley by | |
| 7 | * James A. Woods. | |
| 8 | * | |
| 9 | * Redistribution and use in source and binary forms, with or without | |
| 10 | * modification, are permitted provided that the following conditions | |
| 11 | * are met: | |
| 12 | * 1. Redistributions of source code must retain the above copyright | |
| 13 | * notice, this list of conditions and the following disclaimer. | |
| 14 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 15 | * notice, this list of conditions and the following disclaimer in the | |
| 16 | * documentation and/or other materials provided with the distribution. | |
| 17 | * 3. All advertising materials mentioning features or use of this software | |
| 18 | * must display the following acknowledgement: | |
| 19 | * This product includes software developed by the University of | |
| 20 | * California, Berkeley and its contributors. | |
| 21 | * 4. Neither the name of the University nor the names of its contributors | |
| 22 | * may be used to endorse or promote products derived from this software | |
| 23 | * without specific prior written permission. | |
| 24 | * | |
| 25 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 26 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 27 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 28 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 29 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 30 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 31 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 33 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 34 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 35 | * SUCH DAMAGE. | |
| 1de703da MD |
36 | * |
| 37 | * @(#) Copyright (c) 1995-1996 Wolfram Schneider, Berlin. @(#) Copyright (c) 1989, 1993 The Regents of the University of California. All rights reserved. | |
| 38 | * @(#)locate.c 8.1 (Berkeley) 6/6/93 | |
| 39 | * $FreeBSD: src/usr.bin/locate/locate/locate.c,v 1.12.2.1 2001/03/04 08:47:25 kris Exp $ | |
| 984263bc MD |
40 | */ |
| 41 | ||
| 984263bc MD |
42 | /* |
| 43 | * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. | |
| 44 | * | |
| 45 | * Locate scans a file list for the full pathname of a file given only part | |
| 46 | * of the name. The list has been processed with with "front-compression" | |
| 47 | * and bigram coding. Front compression reduces space by a factor of 4-5, | |
| 48 | * bigram coding by a further 20-25%. | |
| 49 | * | |
| 50 | * The codes are: | |
| 51 | * | |
| 52 | * 0-28 likeliest differential counts + offset to make nonnegative | |
| 53 | * 30 switch code for out-of-range count to follow in next word | |
| 54 | * 31 an 8 bit char followed | |
| 55 | * 128-255 bigram codes (128 most common, as determined by 'updatedb') | |
| 56 | * 32-127 single character (printable) ascii residue (ie, literal) | |
| 57 | * | |
| 58 | * A novel two-tiered string search technique is employed: | |
| 59 | * | |
| 60 | * First, a metacharacter-free subpattern and partial pathname is matched | |
| 61 | * BACKWARDS to avoid full expansion of the pathname list. The time savings | |
| 62 | * is 40-50% over forward matching, which cannot efficiently handle | |
| 63 | * overlapped search patterns and compressed path residue. | |
| 64 | * | |
| 65 | * Then, the actual shell glob-style regular expression (if in this form) is | |
| 66 | * matched against the candidate pathnames using the slower routines provided | |
| 67 | * in the standard 'find'. | |
| 68 | */ | |
| 69 | ||
| 70 | #include <sys/param.h> | |
| 71 | #include <ctype.h> | |
| 72 | #include <err.h> | |
| 73 | #include <fnmatch.h> | |
| 74 | #include <locale.h> | |
| 75 | #include <stdio.h> | |
| 76 | #include <stdlib.h> | |
| 77 | #include <string.h> | |
| 78 | #include <unistd.h> | |
| 79 | ||
| 80 | #ifdef MMAP | |
| 81 | # include <sys/types.h> | |
| 82 | # include <sys/stat.h> | |
| 83 | # include <sys/mman.h> | |
| 84 | # include <fcntl.h> | |
| 85 | #endif | |
| 86 | ||
| 87 | ||
| 88 | #ifdef sun | |
| 89 | #include <netinet/in.h> /* SunOS byteorder(3) htohl(3) */ | |
| 984263bc MD |
90 | #endif |
| 91 | ||
| 92 | #include "locate.h" | |
| 93 | #include "pathnames.h" | |
| 94 | ||
| 95 | #ifdef DEBUG | |
| 96 | # include <sys/time.h> | |
| 97 | # include <sys/types.h> | |
| 98 | # include <sys/resource.h> | |
| 99 | #endif | |
| 100 | ||
| 101 | char *path_fcodes; /* locate database */ | |
| 102 | int f_mmap; /* use mmap */ | |
| 103 | int f_icase; /* ignore case */ | |
| 104 | int f_stdin; /* read database from stdin */ | |
| 105 | int f_statistic; /* print statistic */ | |
| 106 | int f_silent; /* suppress output, show only count of matches */ | |
| 107 | int f_limit; /* limit number of output lines, 0 == infinite */ | |
| 108 | u_int counter; /* counter for matches [-c] */ | |
| 109 | ||
| 110 | ||
| 2d8a3be7 EN |
111 | void usage(void); |
| 112 | void statistic(FILE *, char *); | |
| 113 | void fastfind(FILE *, char *, char *); | |
| 114 | void fastfind_icase(FILE *, char *, char *); | |
| 115 | void fastfind_mmap(char *, caddr_t, int, char *); | |
| 116 | void fastfind_mmap_icase(char *, caddr_t, int, char *); | |
| 117 | void search_mmap(char *, char **); | |
| 118 | void search_fopen(char *, char **); | |
| 119 | unsigned long cputime(void); | |
| 984263bc | 120 | |
| 2d8a3be7 EN |
121 | extern char **colon(char **, char*, char*); |
| 122 | extern void print_matches(u_int); | |
| 123 | extern int getwm(caddr_t); | |
| 124 | extern int getwf(FILE *); | |
| 125 | extern u_char *tolower_word(u_char *); | |
| 126 | extern int check_bigram_char(int); | |
| 127 | extern char *patprep(char *); | |
| 984263bc MD |
128 | |
| 129 | int | |
| 89a89091 | 130 | main(int argc, char **argv) |
| 984263bc MD |
131 | { |
| 132 | register int ch; | |
| 133 | char **dbv = NULL; | |
| 134 | #ifdef MMAP | |
| 135 | f_mmap = 1; /* mmap is default */ | |
| 136 | #endif | |
| 137 | (void) setlocale(LC_ALL, ""); | |
| 138 | ||
| 139 | while ((ch = getopt(argc, argv, "Scd:il:ms")) != -1) | |
| 140 | switch(ch) { | |
| 141 | case 'S': /* statistic lines */ | |
| 142 | f_statistic = 1; | |
| 143 | break; | |
| 144 | case 'l': /* limit number of output lines, 0 == infinite */ | |
| 145 | f_limit = atoi(optarg); | |
| 146 | break; | |
| 147 | case 'd': /* database */ | |
| 148 | dbv = colon(dbv, optarg, _PATH_FCODES); | |
| 149 | break; | |
| 150 | case 'i': /* ignore case */ | |
| 151 | f_icase = 1; | |
| 152 | break; | |
| 153 | case 'm': /* mmap */ | |
| 154 | #ifdef MMAP | |
| 155 | f_mmap = 1; | |
| 156 | #else | |
| 157 | warnx("mmap(2) not implemented"); | |
| 158 | #endif | |
| 159 | break; | |
| 160 | case 's': /* stdio lib */ | |
| 161 | f_mmap = 0; | |
| 162 | break; | |
| 163 | case 'c': /* suppress output, show only count of matches */ | |
| 164 | f_silent = 1; | |
| 165 | break; | |
| 166 | default: | |
| 167 | usage(); | |
| 168 | } | |
| 169 | argv += optind; | |
| 170 | argc -= optind; | |
| 171 | ||
| 172 | /* to few arguments */ | |
| 173 | if (argc < 1 && !(f_statistic)) | |
| 174 | usage(); | |
| 175 | ||
| 176 | /* no (valid) database as argument */ | |
| 177 | if (dbv == NULL || *dbv == NULL) { | |
| 178 | /* try to read database from enviroment */ | |
| 179 | if ((path_fcodes = getenv("LOCATE_PATH")) == NULL || | |
| 180 | *path_fcodes == '\0') | |
| 181 | /* use default database */ | |
| 182 | dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES); | |
| 183 | else /* $LOCATE_PATH */ | |
| 184 | dbv = colon(dbv, path_fcodes, _PATH_FCODES); | |
| 185 | } | |
| 186 | ||
| 187 | if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */ | |
| 188 | for (ch = 0; ch < UCHAR_MAX + 1; ch++) | |
| 189 | myctype[ch] = tolower(ch); | |
| 190 | ||
| 191 | /* foreach database ... */ | |
| 192 | while((path_fcodes = *dbv) != NULL) { | |
| 193 | dbv++; | |
| 194 | ||
| 195 | if (!strcmp(path_fcodes, "-")) | |
| 196 | f_stdin = 1; | |
| 197 | else | |
| 198 | f_stdin = 0; | |
| 199 | ||
| 200 | #ifndef MMAP | |
| 201 | f_mmap = 0; /* be paranoid */ | |
| 202 | #endif | |
| 203 | if (!f_mmap || f_stdin || f_statistic) | |
| 204 | search_fopen(path_fcodes, argv); | |
| 205 | else | |
| 206 | search_mmap(path_fcodes, argv); | |
| 207 | } | |
| 208 | ||
| 209 | if (f_silent) | |
| 210 | print_matches(counter); | |
| 211 | exit(0); | |
| 212 | } | |
| 213 | ||
| 214 | ||
| 215 | void | |
| 89a89091 | 216 | search_fopen(char *db, char **s) |
| 984263bc MD |
217 | { |
| 218 | FILE *fp; | |
| 219 | #ifdef DEBUG | |
| 220 | long t0; | |
| 221 | #endif | |
| 222 | ||
| 223 | /* can only read stdin once */ | |
| 224 | if (f_stdin) { | |
| 225 | fp = stdin; | |
| 226 | if (*(s+1) != NULL) { | |
| 227 | warnx("read database from stdin, use only `%s' as pattern", *s); | |
| 228 | *(s+1) = NULL; | |
| 229 | } | |
| 230 | } | |
| 231 | else if ((fp = fopen(path_fcodes, "r")) == NULL) | |
| 232 | err(1, "`%s'", path_fcodes); | |
| 233 | ||
| 234 | /* count only chars or lines */ | |
| 235 | if (f_statistic) { | |
| 236 | statistic(fp, path_fcodes); | |
| 237 | (void)fclose(fp); | |
| 238 | return; | |
| 239 | } | |
| 240 | ||
| 241 | /* foreach search string ... */ | |
| 242 | while(*s != NULL) { | |
| 243 | #ifdef DEBUG | |
| 244 | t0 = cputime(); | |
| 245 | #endif | |
| 246 | if (!f_stdin && | |
| 247 | fseek(fp, (long)0, SEEK_SET) == -1) | |
| 248 | err(1, "fseek to begin of ``%s''\n", path_fcodes); | |
| 249 | ||
| 250 | if (f_icase) | |
| 251 | fastfind_icase(fp, *s, path_fcodes); | |
| 252 | else | |
| 253 | fastfind(fp, *s, path_fcodes); | |
| 254 | #ifdef DEBUG | |
| 255 | warnx("fastfind %ld ms", cputime () - t0); | |
| 256 | #endif | |
| 257 | s++; | |
| 258 | } | |
| 259 | (void)fclose(fp); | |
| 260 | } | |
| 261 | ||
| 262 | #ifdef MMAP | |
| 263 | void | |
| 89a89091 | 264 | search_mmap(char *db, char **s) |
| 984263bc MD |
265 | { |
| 266 | struct stat sb; | |
| 267 | int fd; | |
| 268 | caddr_t p; | |
| 269 | off_t len; | |
| 270 | #ifdef DEBUG | |
| 271 | long t0; | |
| 272 | #endif | |
| 273 | if ((fd = open(path_fcodes, O_RDONLY)) == -1 || | |
| 274 | fstat(fd, &sb) == -1) | |
| 275 | err(1, "`%s'", path_fcodes); | |
| 276 | len = sb.st_size; | |
| 277 | ||
| 278 | if ((p = mmap((caddr_t)0, (size_t)len, | |
| 279 | PROT_READ, MAP_SHARED, | |
| 280 | fd, (off_t)0)) == MAP_FAILED) | |
| 281 | err(1, "mmap ``%s''", path_fcodes); | |
| 282 | ||
| 283 | /* foreach search string ... */ | |
| 284 | while (*s != NULL) { | |
| 285 | #ifdef DEBUG | |
| 286 | t0 = cputime(); | |
| 287 | #endif | |
| 288 | if (f_icase) | |
| 289 | fastfind_mmap_icase(*s, p, (int)len, path_fcodes); | |
| 290 | else | |
| 291 | fastfind_mmap(*s, p, (int)len, path_fcodes); | |
| 292 | #ifdef DEBUG | |
| 293 | warnx("fastfind %ld ms", cputime () - t0); | |
| 294 | #endif | |
| 295 | s++; | |
| 296 | } | |
| 297 | ||
| 298 | if (munmap(p, (size_t)len) == -1) | |
| 299 | warn("munmap %s\n", path_fcodes); | |
| 300 | ||
| 301 | (void)close(fd); | |
| 302 | } | |
| 303 | #endif /* MMAP */ | |
| 304 | ||
| 305 | #ifdef DEBUG | |
| 306 | unsigned long | |
| 89a89091 | 307 | cputime (void) |
| 984263bc MD |
308 | { |
| 309 | struct rusage rus; | |
| 310 | ||
| 311 | getrusage(0, &rus); | |
| 312 | return(rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000); | |
| 313 | } | |
| 314 | #endif /* DEBUG */ | |
| 315 | ||
| 316 | void | |
| 89a89091 | 317 | usage (void) |
| 984263bc MD |
318 | { |
| 319 | (void)fprintf(stderr, | |
| 320 | "usage: locate [-Scims] [-l limit] [-d database] pattern ...\n\n"); | |
| 321 | (void)fprintf(stderr, | |
| 322 | "default database: `%s' or $LOCATE_PATH\n", _PATH_FCODES); | |
| 323 | exit(1); | |
| 324 | } | |
| 325 | ||
| 326 | ||
| 327 | /* load fastfind functions */ | |
| 328 | ||
| 329 | /* statistic */ | |
| 330 | /* fastfind_mmap, fastfind_mmap_icase */ | |
| 331 | #ifdef MMAP | |
| 332 | #undef FF_MMAP | |
| 333 | #undef FF_ICASE | |
| 334 | ||
| 335 | #define FF_MMAP | |
| 336 | #include "fastfind.c" | |
| 337 | #define FF_ICASE | |
| 338 | #include "fastfind.c" | |
| 339 | #endif /* MMAP */ | |
| 340 | ||
| 341 | /* fopen */ | |
| 342 | /* fastfind, fastfind_icase */ | |
| 343 | #undef FF_MMAP | |
| 344 | #undef FF_ICASE | |
| 345 | #include "fastfind.c" | |
| 346 | #define FF_ICASE | |
| 347 | #include "fastfind.c" |