| 1 | /* |
| 2 | * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. |
| 3 | * Copyright (c) 1989, 1993 |
| 4 | * The Regents of the University of California. All rights reserved. |
| 5 | * |
| 6 | * This code is derived from software contributed to Berkeley by |
| 7 | * James A. Woods. |
| 8 | * |
| 9 | * Redistribution and use in source and binary forms, with or without |
| 10 | * modification, are permitted provided that the following conditions |
| 11 | * are met: |
| 12 | * 1. Redistributions of source code must retain the above copyright |
| 13 | * notice, this list of conditions and the following disclaimer. |
| 14 | * 2. Redistributions in binary form must reproduce the above copyright |
| 15 | * notice, this list of conditions and the following disclaimer in the |
| 16 | * documentation and/or other materials provided with the distribution. |
| 17 | * 3. All advertising materials mentioning features or use of this software |
| 18 | * must display the following acknowledgement: |
| 19 | * This product includes software developed by the University of |
| 20 | * California, Berkeley and its contributors. |
| 21 | * 4. Neither the name of the University nor the names of its contributors |
| 22 | * may be used to endorse or promote products derived from this software |
| 23 | * without specific prior written permission. |
| 24 | * |
| 25 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 26 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 27 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 28 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 29 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 30 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 31 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 33 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 34 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 35 | * SUCH DAMAGE. |
| 36 | * |
| 37 | * @(#) Copyright (c) 1995-1996 Wolfram Schneider, Berlin. @(#) Copyright (c) 1989, 1993 The Regents of the University of California. All rights reserved. |
| 38 | * @(#)locate.c 8.1 (Berkeley) 6/6/93 |
| 39 | * $FreeBSD: src/usr.bin/locate/locate/locate.c,v 1.12.2.1 2001/03/04 08:47:25 kris Exp $ |
| 40 | * $DragonFly: src/usr.bin/locate/locate/locate.c,v 1.3 2003/11/03 19:31:30 eirikn Exp $ |
| 41 | */ |
| 42 | |
| 43 | /* |
| 44 | * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. |
| 45 | * |
| 46 | * Locate scans a file list for the full pathname of a file given only part |
| 47 | * of the name. The list has been processed with with "front-compression" |
| 48 | * and bigram coding. Front compression reduces space by a factor of 4-5, |
| 49 | * bigram coding by a further 20-25%. |
| 50 | * |
| 51 | * The codes are: |
| 52 | * |
| 53 | * 0-28 likeliest differential counts + offset to make nonnegative |
| 54 | * 30 switch code for out-of-range count to follow in next word |
| 55 | * 31 an 8 bit char followed |
| 56 | * 128-255 bigram codes (128 most common, as determined by 'updatedb') |
| 57 | * 32-127 single character (printable) ascii residue (ie, literal) |
| 58 | * |
| 59 | * A novel two-tiered string search technique is employed: |
| 60 | * |
| 61 | * First, a metacharacter-free subpattern and partial pathname is matched |
| 62 | * BACKWARDS to avoid full expansion of the pathname list. The time savings |
| 63 | * is 40-50% over forward matching, which cannot efficiently handle |
| 64 | * overlapped search patterns and compressed path residue. |
| 65 | * |
| 66 | * Then, the actual shell glob-style regular expression (if in this form) is |
| 67 | * matched against the candidate pathnames using the slower routines provided |
| 68 | * in the standard 'find'. |
| 69 | */ |
| 70 | |
| 71 | #include <sys/param.h> |
| 72 | #include <ctype.h> |
| 73 | #include <err.h> |
| 74 | #include <fnmatch.h> |
| 75 | #include <locale.h> |
| 76 | #include <stdio.h> |
| 77 | #include <stdlib.h> |
| 78 | #include <string.h> |
| 79 | #include <unistd.h> |
| 80 | |
| 81 | #ifdef MMAP |
| 82 | # include <sys/types.h> |
| 83 | # include <sys/stat.h> |
| 84 | # include <sys/mman.h> |
| 85 | # include <fcntl.h> |
| 86 | #endif |
| 87 | |
| 88 | |
| 89 | #ifdef sun |
| 90 | #include <netinet/in.h> /* SunOS byteorder(3) htohl(3) */ |
| 91 | #ifndef __P |
| 92 | #define __P(x) x |
| 93 | #endif |
| 94 | #endif |
| 95 | |
| 96 | #include "locate.h" |
| 97 | #include "pathnames.h" |
| 98 | |
| 99 | #ifdef DEBUG |
| 100 | # include <sys/time.h> |
| 101 | # include <sys/types.h> |
| 102 | # include <sys/resource.h> |
| 103 | #endif |
| 104 | |
| 105 | char *path_fcodes; /* locate database */ |
| 106 | int f_mmap; /* use mmap */ |
| 107 | int f_icase; /* ignore case */ |
| 108 | int f_stdin; /* read database from stdin */ |
| 109 | int f_statistic; /* print statistic */ |
| 110 | int f_silent; /* suppress output, show only count of matches */ |
| 111 | int f_limit; /* limit number of output lines, 0 == infinite */ |
| 112 | u_int counter; /* counter for matches [-c] */ |
| 113 | |
| 114 | |
| 115 | void usage(void); |
| 116 | void statistic(FILE *, char *); |
| 117 | void fastfind(FILE *, char *, char *); |
| 118 | void fastfind_icase(FILE *, char *, char *); |
| 119 | void fastfind_mmap(char *, caddr_t, int, char *); |
| 120 | void fastfind_mmap_icase(char *, caddr_t, int, char *); |
| 121 | void search_mmap(char *, char **); |
| 122 | void search_fopen(char *, char **); |
| 123 | unsigned long cputime(void); |
| 124 | |
| 125 | extern char **colon(char **, char*, char*); |
| 126 | extern void print_matches(u_int); |
| 127 | extern int getwm(caddr_t); |
| 128 | extern int getwf(FILE *); |
| 129 | extern u_char *tolower_word(u_char *); |
| 130 | extern int check_bigram_char(int); |
| 131 | extern char *patprep(char *); |
| 132 | |
| 133 | int |
| 134 | main(argc, argv) |
| 135 | int argc; |
| 136 | char **argv; |
| 137 | { |
| 138 | register int ch; |
| 139 | char **dbv = NULL; |
| 140 | #ifdef MMAP |
| 141 | f_mmap = 1; /* mmap is default */ |
| 142 | #endif |
| 143 | (void) setlocale(LC_ALL, ""); |
| 144 | |
| 145 | while ((ch = getopt(argc, argv, "Scd:il:ms")) != -1) |
| 146 | switch(ch) { |
| 147 | case 'S': /* statistic lines */ |
| 148 | f_statistic = 1; |
| 149 | break; |
| 150 | case 'l': /* limit number of output lines, 0 == infinite */ |
| 151 | f_limit = atoi(optarg); |
| 152 | break; |
| 153 | case 'd': /* database */ |
| 154 | dbv = colon(dbv, optarg, _PATH_FCODES); |
| 155 | break; |
| 156 | case 'i': /* ignore case */ |
| 157 | f_icase = 1; |
| 158 | break; |
| 159 | case 'm': /* mmap */ |
| 160 | #ifdef MMAP |
| 161 | f_mmap = 1; |
| 162 | #else |
| 163 | warnx("mmap(2) not implemented"); |
| 164 | #endif |
| 165 | break; |
| 166 | case 's': /* stdio lib */ |
| 167 | f_mmap = 0; |
| 168 | break; |
| 169 | case 'c': /* suppress output, show only count of matches */ |
| 170 | f_silent = 1; |
| 171 | break; |
| 172 | default: |
| 173 | usage(); |
| 174 | } |
| 175 | argv += optind; |
| 176 | argc -= optind; |
| 177 | |
| 178 | /* to few arguments */ |
| 179 | if (argc < 1 && !(f_statistic)) |
| 180 | usage(); |
| 181 | |
| 182 | /* no (valid) database as argument */ |
| 183 | if (dbv == NULL || *dbv == NULL) { |
| 184 | /* try to read database from enviroment */ |
| 185 | if ((path_fcodes = getenv("LOCATE_PATH")) == NULL || |
| 186 | *path_fcodes == '\0') |
| 187 | /* use default database */ |
| 188 | dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES); |
| 189 | else /* $LOCATE_PATH */ |
| 190 | dbv = colon(dbv, path_fcodes, _PATH_FCODES); |
| 191 | } |
| 192 | |
| 193 | if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */ |
| 194 | for (ch = 0; ch < UCHAR_MAX + 1; ch++) |
| 195 | myctype[ch] = tolower(ch); |
| 196 | |
| 197 | /* foreach database ... */ |
| 198 | while((path_fcodes = *dbv) != NULL) { |
| 199 | dbv++; |
| 200 | |
| 201 | if (!strcmp(path_fcodes, "-")) |
| 202 | f_stdin = 1; |
| 203 | else |
| 204 | f_stdin = 0; |
| 205 | |
| 206 | #ifndef MMAP |
| 207 | f_mmap = 0; /* be paranoid */ |
| 208 | #endif |
| 209 | if (!f_mmap || f_stdin || f_statistic) |
| 210 | search_fopen(path_fcodes, argv); |
| 211 | else |
| 212 | search_mmap(path_fcodes, argv); |
| 213 | } |
| 214 | |
| 215 | if (f_silent) |
| 216 | print_matches(counter); |
| 217 | exit(0); |
| 218 | } |
| 219 | |
| 220 | |
| 221 | void |
| 222 | search_fopen(db, s) |
| 223 | char *db; /* database */ |
| 224 | char **s; /* search strings */ |
| 225 | { |
| 226 | FILE *fp; |
| 227 | #ifdef DEBUG |
| 228 | long t0; |
| 229 | #endif |
| 230 | |
| 231 | /* can only read stdin once */ |
| 232 | if (f_stdin) { |
| 233 | fp = stdin; |
| 234 | if (*(s+1) != NULL) { |
| 235 | warnx("read database from stdin, use only `%s' as pattern", *s); |
| 236 | *(s+1) = NULL; |
| 237 | } |
| 238 | } |
| 239 | else if ((fp = fopen(path_fcodes, "r")) == NULL) |
| 240 | err(1, "`%s'", path_fcodes); |
| 241 | |
| 242 | /* count only chars or lines */ |
| 243 | if (f_statistic) { |
| 244 | statistic(fp, path_fcodes); |
| 245 | (void)fclose(fp); |
| 246 | return; |
| 247 | } |
| 248 | |
| 249 | /* foreach search string ... */ |
| 250 | while(*s != NULL) { |
| 251 | #ifdef DEBUG |
| 252 | t0 = cputime(); |
| 253 | #endif |
| 254 | if (!f_stdin && |
| 255 | fseek(fp, (long)0, SEEK_SET) == -1) |
| 256 | err(1, "fseek to begin of ``%s''\n", path_fcodes); |
| 257 | |
| 258 | if (f_icase) |
| 259 | fastfind_icase(fp, *s, path_fcodes); |
| 260 | else |
| 261 | fastfind(fp, *s, path_fcodes); |
| 262 | #ifdef DEBUG |
| 263 | warnx("fastfind %ld ms", cputime () - t0); |
| 264 | #endif |
| 265 | s++; |
| 266 | } |
| 267 | (void)fclose(fp); |
| 268 | } |
| 269 | |
| 270 | #ifdef MMAP |
| 271 | void |
| 272 | search_mmap(db, s) |
| 273 | char *db; /* database */ |
| 274 | char **s; /* search strings */ |
| 275 | { |
| 276 | struct stat sb; |
| 277 | int fd; |
| 278 | caddr_t p; |
| 279 | off_t len; |
| 280 | #ifdef DEBUG |
| 281 | long t0; |
| 282 | #endif |
| 283 | if ((fd = open(path_fcodes, O_RDONLY)) == -1 || |
| 284 | fstat(fd, &sb) == -1) |
| 285 | err(1, "`%s'", path_fcodes); |
| 286 | len = sb.st_size; |
| 287 | |
| 288 | if ((p = mmap((caddr_t)0, (size_t)len, |
| 289 | PROT_READ, MAP_SHARED, |
| 290 | fd, (off_t)0)) == MAP_FAILED) |
| 291 | err(1, "mmap ``%s''", path_fcodes); |
| 292 | |
| 293 | /* foreach search string ... */ |
| 294 | while (*s != NULL) { |
| 295 | #ifdef DEBUG |
| 296 | t0 = cputime(); |
| 297 | #endif |
| 298 | if (f_icase) |
| 299 | fastfind_mmap_icase(*s, p, (int)len, path_fcodes); |
| 300 | else |
| 301 | fastfind_mmap(*s, p, (int)len, path_fcodes); |
| 302 | #ifdef DEBUG |
| 303 | warnx("fastfind %ld ms", cputime () - t0); |
| 304 | #endif |
| 305 | s++; |
| 306 | } |
| 307 | |
| 308 | if (munmap(p, (size_t)len) == -1) |
| 309 | warn("munmap %s\n", path_fcodes); |
| 310 | |
| 311 | (void)close(fd); |
| 312 | } |
| 313 | #endif /* MMAP */ |
| 314 | |
| 315 | #ifdef DEBUG |
| 316 | unsigned long |
| 317 | cputime () |
| 318 | { |
| 319 | struct rusage rus; |
| 320 | |
| 321 | getrusage(0, &rus); |
| 322 | return(rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000); |
| 323 | } |
| 324 | #endif /* DEBUG */ |
| 325 | |
| 326 | void |
| 327 | usage () |
| 328 | { |
| 329 | (void)fprintf(stderr, |
| 330 | "usage: locate [-Scims] [-l limit] [-d database] pattern ...\n\n"); |
| 331 | (void)fprintf(stderr, |
| 332 | "default database: `%s' or $LOCATE_PATH\n", _PATH_FCODES); |
| 333 | exit(1); |
| 334 | } |
| 335 | |
| 336 | |
| 337 | /* load fastfind functions */ |
| 338 | |
| 339 | /* statistic */ |
| 340 | /* fastfind_mmap, fastfind_mmap_icase */ |
| 341 | #ifdef MMAP |
| 342 | #undef FF_MMAP |
| 343 | #undef FF_ICASE |
| 344 | |
| 345 | #define FF_MMAP |
| 346 | #include "fastfind.c" |
| 347 | #define FF_ICASE |
| 348 | #include "fastfind.c" |
| 349 | #endif /* MMAP */ |
| 350 | |
| 351 | /* fopen */ |
| 352 | /* fastfind, fastfind_icase */ |
| 353 | #undef FF_MMAP |
| 354 | #undef FF_ICASE |
| 355 | #include "fastfind.c" |
| 356 | #define FF_ICASE |
| 357 | #include "fastfind.c" |