| 1 | /*- |
| 2 | * Copyright (c) 1989, 1993 |
| 3 | * The Regents of the University of California. All rights reserved. |
| 4 | * |
| 5 | * This code is derived from software contributed to Berkeley by |
| 6 | * Ken Arnold. |
| 7 | * |
| 8 | * Redistribution and use in source and binary forms, with or without |
| 9 | * modification, are permitted provided that the following conditions |
| 10 | * are met: |
| 11 | * 1. Redistributions of source code must retain the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer. |
| 13 | * 2. Redistributions in binary form must reproduce the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer in the |
| 15 | * documentation and/or other materials provided with the distribution. |
| 16 | * 3. All advertising materials mentioning features or use of this software |
| 17 | * must display the following acknowledgement: |
| 18 | * This product includes software developed by the University of |
| 19 | * California, Berkeley and its contributors. |
| 20 | * 4. Neither the name of the University nor the names of its contributors |
| 21 | * may be used to endorse or promote products derived from this software |
| 22 | * without specific prior written permission. |
| 23 | * |
| 24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 34 | * SUCH DAMAGE. |
| 35 | * |
| 36 | * $FreeBSD: src/games/fortune/strfile/strfile.c,v 1.15.2.2 2001/03/05 11:52:37 kris Exp $ |
| 37 | * $DragonFly: src/games/fortune/strfile/strfile.c,v 1.6 2008/07/10 18:29:51 swildner Exp $ |
| 38 | * |
| 39 | * @(#) Copyright (c) 1989, 1993 The Regents of the University of California. All rights reserved. |
| 40 | * @(#)strfile.c 8.1 (Berkeley) 5/31/93 |
| 41 | * $FreeBSD: src/games/fortune/strfile/strfile.c,v 1.15.2.2 2001/03/05 11:52:37 kris Exp $ |
| 42 | */ |
| 43 | |
| 44 | # include <sys/param.h> |
| 45 | # include <netinet/in.h> |
| 46 | # include <stdbool.h> |
| 47 | # include <stdio.h> |
| 48 | # include <stdlib.h> |
| 49 | # include <ctype.h> |
| 50 | # include <string.h> |
| 51 | # include <time.h> |
| 52 | # include <locale.h> |
| 53 | # include <unistd.h> |
| 54 | # include "strfile.h" |
| 55 | |
| 56 | /* |
| 57 | * This program takes a file composed of strings separated by |
| 58 | * lines starting with two consecutive delimiting character (default |
| 59 | * character is '%') and creates another file which consists of a table |
| 60 | * describing the file (structure from "strfile.h"), a table of seek |
| 61 | * pointers to the start of the strings, and the strings, each terminated |
| 62 | * by a null byte. Usage: |
| 63 | * |
| 64 | * % strfile [-iorsx] [ -cC ] sourcefile [ datafile ] |
| 65 | * |
| 66 | * C - Allow comments marked by a double delimiter at line's beginning |
| 67 | * c - Change delimiting character from '%' to 'C' |
| 68 | * s - Silent. Give no summary of data processed at the end of |
| 69 | * the run. |
| 70 | * o - order the strings in alphabetic order |
| 71 | * i - if ordering, ignore case |
| 72 | * r - randomize the order of the strings |
| 73 | * x - set rotated bit |
| 74 | * |
| 75 | * Ken Arnold Sept. 7, 1978 -- |
| 76 | * |
| 77 | * Added ordering options. |
| 78 | */ |
| 79 | |
| 80 | # define STORING_PTRS (Oflag || Rflag) |
| 81 | # define CHUNKSIZE 512 |
| 82 | |
| 83 | # define ALLOC(ptr,sz) { \ |
| 84 | if (ptr == NULL) \ |
| 85 | ptr = malloc((unsigned int) (CHUNKSIZE * sizeof *ptr)); \ |
| 86 | else if (((sz) + 1) % CHUNKSIZE == 0) \ |
| 87 | ptr = realloc((void *) ptr, ((unsigned int) ((sz) + CHUNKSIZE) * sizeof *ptr)); \ |
| 88 | if (ptr == NULL) { \ |
| 89 | fprintf(stderr, "out of space\n"); \ |
| 90 | exit(1); \ |
| 91 | } \ |
| 92 | } |
| 93 | |
| 94 | typedef struct { |
| 95 | char first; |
| 96 | long pos; |
| 97 | } STR; |
| 98 | |
| 99 | char *Infile = NULL, /* input file name */ |
| 100 | Outfile[MAXPATHLEN] = "", /* output file name */ |
| 101 | Delimch = '%'; /* delimiting character */ |
| 102 | |
| 103 | int Cflag = false; /* embedded comments */ |
| 104 | int Sflag = false; /* silent run flag */ |
| 105 | int Oflag = false; /* ordering flag */ |
| 106 | int Iflag = false; /* ignore case flag */ |
| 107 | int Rflag = false; /* randomize order flag */ |
| 108 | int Xflag = false; /* set rotated bit */ |
| 109 | long Num_pts = 0; /* number of pointers/strings */ |
| 110 | |
| 111 | long *Seekpts; |
| 112 | |
| 113 | FILE *Sort_1, *Sort_2; /* pointers for sorting */ |
| 114 | |
| 115 | STRFILE Tbl; /* statistics table */ |
| 116 | |
| 117 | STR *Firstch; /* first chars of each string */ |
| 118 | |
| 119 | void add_offset (FILE *, long); |
| 120 | int cmp_str (const void *, const void *); |
| 121 | static int collate_range_cmp (int, int); |
| 122 | void do_order (void); |
| 123 | void getargs (int, char **); |
| 124 | void randomize (void); |
| 125 | void usage (void); |
| 126 | |
| 127 | /* |
| 128 | * main: |
| 129 | * Drive the sucker. There are two main modes -- either we store |
| 130 | * the seek pointers, if the table is to be sorted or randomized, |
| 131 | * or we write the pointer directly to the file, if we are to stay |
| 132 | * in file order. If the former, we allocate and re-allocate in |
| 133 | * CHUNKSIZE blocks; if the latter, we just write each pointer, |
| 134 | * and then seek back to the beginning to write in the table. |
| 135 | */ |
| 136 | int |
| 137 | main(int ac, char **av) |
| 138 | { |
| 139 | char *sp, dc; |
| 140 | FILE *inf, *outf; |
| 141 | long last_off, length, pos, *p; |
| 142 | int first, cnt; |
| 143 | char *nsp; |
| 144 | STR *fp; |
| 145 | static char string[257]; |
| 146 | |
| 147 | (void) setlocale(LC_ALL, ""); |
| 148 | |
| 149 | getargs(ac, av); /* evalute arguments */ |
| 150 | dc = Delimch; |
| 151 | if ((inf = fopen(Infile, "r")) == NULL) { |
| 152 | perror(Infile); |
| 153 | exit(1); |
| 154 | } |
| 155 | |
| 156 | if ((outf = fopen(Outfile, "w")) == NULL) { |
| 157 | perror(Outfile); |
| 158 | exit(1); |
| 159 | } |
| 160 | if (!STORING_PTRS) |
| 161 | (void) fseek(outf, (long) sizeof Tbl, 0); |
| 162 | |
| 163 | /* |
| 164 | * Write the strings onto the file |
| 165 | */ |
| 166 | |
| 167 | Tbl.str_longlen = 0; |
| 168 | Tbl.str_shortlen = ~((unsigned long) 0); |
| 169 | Tbl.str_delim = dc; |
| 170 | Tbl.str_version = VERSION; |
| 171 | first = Oflag; |
| 172 | add_offset(outf, ftell(inf)); |
| 173 | last_off = 0; |
| 174 | do { |
| 175 | sp = fgets(string, 256, inf); |
| 176 | if (sp == NULL || (sp[0] == dc && sp[1] == '\n')) { |
| 177 | pos = ftell(inf); |
| 178 | length = pos - last_off - (sp ? strlen(sp) : 0); |
| 179 | last_off = pos; |
| 180 | if (!length) |
| 181 | continue; |
| 182 | add_offset(outf, pos); |
| 183 | if (Tbl.str_longlen < (unsigned long)length) |
| 184 | Tbl.str_longlen = length; |
| 185 | if (Tbl.str_shortlen > (unsigned long)length) |
| 186 | Tbl.str_shortlen = length; |
| 187 | first = Oflag; |
| 188 | } |
| 189 | else if (first) { |
| 190 | for (nsp = sp; !isalnum((unsigned char)*nsp); nsp++) |
| 191 | continue; |
| 192 | ALLOC(Firstch, Num_pts); |
| 193 | fp = &Firstch[Num_pts - 1]; |
| 194 | if (Iflag && isupper((unsigned char)*nsp)) |
| 195 | fp->first = tolower((unsigned char)*nsp); |
| 196 | else |
| 197 | fp->first = *nsp; |
| 198 | fp->pos = Seekpts[Num_pts - 1]; |
| 199 | first = false; |
| 200 | } |
| 201 | } while (sp != NULL); |
| 202 | |
| 203 | /* |
| 204 | * write the tables in |
| 205 | */ |
| 206 | |
| 207 | (void) fclose(inf); |
| 208 | Tbl.str_numstr = Num_pts - 1; |
| 209 | |
| 210 | if (Cflag) |
| 211 | Tbl.str_flags |= STR_COMMENTS; |
| 212 | |
| 213 | if (Oflag) |
| 214 | do_order(); |
| 215 | else if (Rflag) |
| 216 | randomize(); |
| 217 | |
| 218 | if (Xflag) |
| 219 | Tbl.str_flags |= STR_ROTATED; |
| 220 | |
| 221 | if (!Sflag) { |
| 222 | printf("\"%s\" created\n", Outfile); |
| 223 | if (Num_pts == 2) |
| 224 | puts("There was 1 string"); |
| 225 | else |
| 226 | printf("There were %ld strings\n", Num_pts - 1); |
| 227 | printf("Longest string: %lu byte%s\n", Tbl.str_longlen, |
| 228 | Tbl.str_longlen == 1 ? "" : "s"); |
| 229 | printf("Shortest string: %lu byte%s\n", Tbl.str_shortlen, |
| 230 | Tbl.str_shortlen == 1 ? "" : "s"); |
| 231 | } |
| 232 | |
| 233 | rewind(outf); |
| 234 | Tbl.str_version = htonl(Tbl.str_version); |
| 235 | Tbl.str_numstr = htonl(Tbl.str_numstr); |
| 236 | Tbl.str_longlen = htonl(Tbl.str_longlen); |
| 237 | Tbl.str_shortlen = htonl(Tbl.str_shortlen); |
| 238 | Tbl.str_flags = htonl(Tbl.str_flags); |
| 239 | (void) fwrite((char *) &Tbl, sizeof Tbl, 1, outf); |
| 240 | if (STORING_PTRS) { |
| 241 | for (p = Seekpts, cnt = Num_pts; cnt--; ++p) |
| 242 | *p = htonl(*p); |
| 243 | (void) fwrite((char *) Seekpts, sizeof *Seekpts, (int) Num_pts, outf); |
| 244 | } |
| 245 | (void) fclose(outf); |
| 246 | exit(0); |
| 247 | } |
| 248 | |
| 249 | /* |
| 250 | * This routine evaluates arguments from the command line |
| 251 | */ |
| 252 | void |
| 253 | getargs(int argc, char **argv) |
| 254 | { |
| 255 | int ch; |
| 256 | |
| 257 | while ((ch = getopt(argc, argv, "Cc:iorsx")) != -1) |
| 258 | switch(ch) { |
| 259 | case 'C': /* embedded comments */ |
| 260 | Cflag++; |
| 261 | break; |
| 262 | case 'c': /* new delimiting char */ |
| 263 | Delimch = *optarg; |
| 264 | if (!isascii(Delimch)) { |
| 265 | printf("bad delimiting character: '\\%o\n'", |
| 266 | (unsigned char)Delimch); |
| 267 | } |
| 268 | break; |
| 269 | case 'i': /* ignore case in ordering */ |
| 270 | Iflag++; |
| 271 | break; |
| 272 | case 'o': /* order strings */ |
| 273 | Oflag++; |
| 274 | break; |
| 275 | case 'r': /* randomize pointers */ |
| 276 | Rflag++; |
| 277 | break; |
| 278 | case 's': /* silent */ |
| 279 | Sflag++; |
| 280 | break; |
| 281 | case 'x': /* set the rotated bit */ |
| 282 | Xflag++; |
| 283 | break; |
| 284 | case '?': |
| 285 | default: |
| 286 | usage(); |
| 287 | } |
| 288 | argv += optind; |
| 289 | |
| 290 | if (*argv) { |
| 291 | Infile = *argv; |
| 292 | if (*++argv) |
| 293 | (void) strcpy(Outfile, *argv); |
| 294 | } |
| 295 | if (!Infile) { |
| 296 | puts("No input file name"); |
| 297 | usage(); |
| 298 | } |
| 299 | if (*Outfile == '\0') { |
| 300 | (void) strcpy(Outfile, Infile); |
| 301 | (void) strcat(Outfile, ".dat"); |
| 302 | } |
| 303 | } |
| 304 | |
| 305 | void |
| 306 | usage(void) |
| 307 | { |
| 308 | (void) fprintf(stderr, |
| 309 | "strfile [-Ciorsx] [-c char] sourcefile [datafile]\n"); |
| 310 | exit(1); |
| 311 | } |
| 312 | |
| 313 | /* |
| 314 | * add_offset: |
| 315 | * Add an offset to the list, or write it out, as appropriate. |
| 316 | */ |
| 317 | void |
| 318 | add_offset(FILE *fp, long off) |
| 319 | { |
| 320 | long net; |
| 321 | |
| 322 | if (!STORING_PTRS) { |
| 323 | net = htonl(off); |
| 324 | fwrite(&net, 1, sizeof net, fp); |
| 325 | } else { |
| 326 | ALLOC(Seekpts, Num_pts + 1); |
| 327 | Seekpts[Num_pts] = off; |
| 328 | } |
| 329 | Num_pts++; |
| 330 | } |
| 331 | |
| 332 | /* |
| 333 | * do_order: |
| 334 | * Order the strings alphabetically (possibly ignoring case). |
| 335 | */ |
| 336 | void |
| 337 | do_order(void) |
| 338 | { |
| 339 | int i; |
| 340 | long *lp; |
| 341 | STR *fp; |
| 342 | |
| 343 | Sort_1 = fopen(Infile, "r"); |
| 344 | Sort_2 = fopen(Infile, "r"); |
| 345 | qsort((char *) Firstch, (int) Tbl.str_numstr, sizeof *Firstch, cmp_str); |
| 346 | i = Tbl.str_numstr; |
| 347 | lp = Seekpts; |
| 348 | fp = Firstch; |
| 349 | while (i--) |
| 350 | *lp++ = fp++->pos; |
| 351 | (void) fclose(Sort_1); |
| 352 | (void) fclose(Sort_2); |
| 353 | Tbl.str_flags |= STR_ORDERED; |
| 354 | } |
| 355 | |
| 356 | static int |
| 357 | collate_range_cmp (int c1, int c2) |
| 358 | { |
| 359 | static char s1[2], s2[2]; |
| 360 | int ret; |
| 361 | |
| 362 | c1 &= UCHAR_MAX; |
| 363 | c2 &= UCHAR_MAX; |
| 364 | if (c1 == c2) |
| 365 | return (0); |
| 366 | s1[0] = c1; |
| 367 | s2[0] = c2; |
| 368 | if ((ret = strcoll(s1, s2)) != 0) |
| 369 | return (ret); |
| 370 | return (c1 - c2); |
| 371 | } |
| 372 | |
| 373 | /* |
| 374 | * cmp_str: |
| 375 | * Compare two strings in the file |
| 376 | */ |
| 377 | int |
| 378 | cmp_str(const void *s1, const void *s2) |
| 379 | { |
| 380 | const STR *p1, *p2; |
| 381 | int c1, c2; |
| 382 | int n1, n2; |
| 383 | int r; |
| 384 | |
| 385 | # define SET_N(nf,ch) (nf = (ch == '\n')) |
| 386 | # define IS_END(ch,nf) (ch == EOF || (ch == (unsigned char) Delimch && nf)) |
| 387 | |
| 388 | p1 = (const STR *) s1; |
| 389 | p2 = (const STR *) s2; |
| 390 | |
| 391 | c1 = (unsigned char) p1->first; |
| 392 | c2 = (unsigned char) p2->first; |
| 393 | if ((r = collate_range_cmp(c1, c2)) != 0) |
| 394 | return r; |
| 395 | |
| 396 | (void) fseek(Sort_1, p1->pos, 0); |
| 397 | (void) fseek(Sort_2, p2->pos, 0); |
| 398 | |
| 399 | n1 = false; |
| 400 | n2 = false; |
| 401 | while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0' && c1 != EOF) |
| 402 | SET_N(n1, c1); |
| 403 | while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0' && c2 != EOF) |
| 404 | SET_N(n2, c2); |
| 405 | |
| 406 | while (!IS_END(c1, n1) && !IS_END(c2, n2)) { |
| 407 | if (Iflag) { |
| 408 | if (isupper(c1)) |
| 409 | c1 = tolower(c1); |
| 410 | if (isupper(c2)) |
| 411 | c2 = tolower(c2); |
| 412 | } |
| 413 | if ((r = collate_range_cmp(c1, c2)) != 0) |
| 414 | return r; |
| 415 | SET_N(n1, c1); |
| 416 | SET_N(n2, c2); |
| 417 | c1 = getc(Sort_1); |
| 418 | c2 = getc(Sort_2); |
| 419 | } |
| 420 | if (IS_END(c1, n1)) |
| 421 | c1 = 0; |
| 422 | if (IS_END(c2, n2)) |
| 423 | c2 = 0; |
| 424 | return collate_range_cmp(c1, c2); |
| 425 | } |
| 426 | |
| 427 | /* |
| 428 | * randomize: |
| 429 | * Randomize the order of the string table. We must be careful |
| 430 | * not to randomize across delimiter boundaries. All |
| 431 | * randomization is done within each block. |
| 432 | */ |
| 433 | void |
| 434 | randomize(void) |
| 435 | { |
| 436 | int cnt, i; |
| 437 | long tmp; |
| 438 | long *sp; |
| 439 | |
| 440 | srandomdev(); |
| 441 | |
| 442 | Tbl.str_flags |= STR_RANDOM; |
| 443 | cnt = Tbl.str_numstr; |
| 444 | |
| 445 | /* |
| 446 | * move things around randomly |
| 447 | */ |
| 448 | |
| 449 | for (sp = Seekpts; cnt > 0; cnt--, sp++) { |
| 450 | i = random() % cnt; |
| 451 | tmp = sp[0]; |
| 452 | sp[0] = sp[i]; |
| 453 | sp[i] = tmp; |
| 454 | } |
| 455 | } |