| Commit | Line | Data |
|---|---|---|
| 984263bc MD |
1 | /*- |
| 2 | * Copyright (c) 1989, 1993 | |
| 3 | * The Regents of the University of California. All rights reserved. | |
| 4 | * | |
| 5 | * This code is derived from software contributed to Berkeley by | |
| 6 | * Ken Arnold. | |
| 7 | * | |
| 8 | * Redistribution and use in source and binary forms, with or without | |
| 9 | * modification, are permitted provided that the following conditions | |
| 10 | * are met: | |
| 11 | * 1. Redistributions of source code must retain the above copyright | |
| 12 | * notice, this list of conditions and the following disclaimer. | |
| 13 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 14 | * notice, this list of conditions and the following disclaimer in the | |
| 15 | * documentation and/or other materials provided with the distribution. | |
| 16 | * 3. All advertising materials mentioning features or use of this software | |
| 17 | * must display the following acknowledgement: | |
| 18 | * This product includes software developed by the University of | |
| 19 | * California, Berkeley and its contributors. | |
| 20 | * 4. Neither the name of the University nor the names of its contributors | |
| 21 | * may be used to endorse or promote products derived from this software | |
| 22 | * without specific prior written permission. | |
| 23 | * | |
| 24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 34 | * SUCH DAMAGE. | |
| 35 | * | |
| 36 | * $FreeBSD: src/games/fortune/strfile/strfile.c,v 1.15.2.2 2001/03/05 11:52:37 kris Exp $ | |
| 9b0ec895 | 37 | * $DragonFly: src/games/fortune/strfile/strfile.c,v 1.6 2008/07/10 18:29:51 swildner Exp $ |
| 1de703da MD |
38 | * |
| 39 | * @(#) Copyright (c) 1989, 1993 The Regents of the University of California. All rights reserved. | |
| 40 | * @(#)strfile.c 8.1 (Berkeley) 5/31/93 | |
| 41 | * $FreeBSD: src/games/fortune/strfile/strfile.c,v 1.15.2.2 2001/03/05 11:52:37 kris Exp $ | |
| 984263bc MD |
42 | */ |
| 43 | ||
| 984263bc | 44 | # include <sys/param.h> |
| 235099c3 | 45 | # include <netinet/in.h> |
| 2cd69caa | 46 | # include <stdbool.h> |
| 984263bc MD |
47 | # include <stdio.h> |
| 48 | # include <stdlib.h> | |
| 49 | # include <ctype.h> | |
| 50 | # include <string.h> | |
| 51 | # include <time.h> | |
| 52 | # include <locale.h> | |
| 53 | # include <unistd.h> | |
| 54 | # include "strfile.h" | |
| 55 | ||
| 56 | /* | |
| 57 | * This program takes a file composed of strings separated by | |
| 58 | * lines starting with two consecutive delimiting character (default | |
| 59 | * character is '%') and creates another file which consists of a table | |
| 60 | * describing the file (structure from "strfile.h"), a table of seek | |
| 61 | * pointers to the start of the strings, and the strings, each terminated | |
| 62 | * by a null byte. Usage: | |
| 63 | * | |
| 64 | * % strfile [-iorsx] [ -cC ] sourcefile [ datafile ] | |
| 65 | * | |
| 66 | * C - Allow comments marked by a double delimiter at line's beginning | |
| 67 | * c - Change delimiting character from '%' to 'C' | |
| 68 | * s - Silent. Give no summary of data processed at the end of | |
| 69 | * the run. | |
| 70 | * o - order the strings in alphabetic order | |
| 71 | * i - if ordering, ignore case | |
| 72 | * r - randomize the order of the strings | |
| 73 | * x - set rotated bit | |
| 74 | * | |
| 75 | * Ken Arnold Sept. 7, 1978 -- | |
| 76 | * | |
| 77 | * Added ordering options. | |
| 78 | */ | |
| 79 | ||
| 984263bc MD |
80 | # define STORING_PTRS (Oflag || Rflag) |
| 81 | # define CHUNKSIZE 512 | |
| 82 | ||
| 83 | # define ALLOC(ptr,sz) { \ | |
| 84 | if (ptr == NULL) \ | |
| 85 | ptr = malloc((unsigned int) (CHUNKSIZE * sizeof *ptr)); \ | |
| 86 | else if (((sz) + 1) % CHUNKSIZE == 0) \ | |
| 87 | ptr = realloc((void *) ptr, ((unsigned int) ((sz) + CHUNKSIZE) * sizeof *ptr)); \ | |
| 88 | if (ptr == NULL) { \ | |
| 89 | fprintf(stderr, "out of space\n"); \ | |
| 90 | exit(1); \ | |
| 91 | } \ | |
| 92 | } | |
| 93 | ||
| 984263bc MD |
94 | typedef struct { |
| 95 | char first; | |
| 96 | long pos; | |
| 97 | } STR; | |
| 98 | ||
| 99 | char *Infile = NULL, /* input file name */ | |
| 100 | Outfile[MAXPATHLEN] = "", /* output file name */ | |
| 101 | Delimch = '%'; /* delimiting character */ | |
| 102 | ||
| 2cd69caa PA |
103 | int Cflag = false; /* embedded comments */ |
| 104 | int Sflag = false; /* silent run flag */ | |
| 105 | int Oflag = false; /* ordering flag */ | |
| 106 | int Iflag = false; /* ignore case flag */ | |
| 107 | int Rflag = false; /* randomize order flag */ | |
| 108 | int Xflag = false; /* set rotated bit */ | |
| 984263bc MD |
109 | long Num_pts = 0; /* number of pointers/strings */ |
| 110 | ||
| 111 | long *Seekpts; | |
| 112 | ||
| 113 | FILE *Sort_1, *Sort_2; /* pointers for sorting */ | |
| 114 | ||
| 115 | STRFILE Tbl; /* statistics table */ | |
| 116 | ||
| 117 | STR *Firstch; /* first chars of each string */ | |
| 118 | ||
| 851dc90d EN |
119 | void add_offset (FILE *, long); |
| 120 | int cmp_str (const void *, const void *); | |
| 121 | static int collate_range_cmp (int, int); | |
| 122 | void do_order (void); | |
| 123 | void getargs (int, char **); | |
| 124 | void randomize (void); | |
| 125 | void usage (void); | |
| 984263bc MD |
126 | |
| 127 | /* | |
| 128 | * main: | |
| 129 | * Drive the sucker. There are two main modes -- either we store | |
| 130 | * the seek pointers, if the table is to be sorted or randomized, | |
| 131 | * or we write the pointer directly to the file, if we are to stay | |
| 132 | * in file order. If the former, we allocate and re-allocate in | |
| 133 | * CHUNKSIZE blocks; if the latter, we just write each pointer, | |
| 134 | * and then seek back to the beginning to write in the table. | |
| 135 | */ | |
| b1e9d17a LF |
136 | int |
| 137 | main(int ac, char **av) | |
| 984263bc MD |
138 | { |
| 139 | char *sp, dc; | |
| 140 | FILE *inf, *outf; | |
| 141 | long last_off, length, pos, *p; | |
| 142 | int first, cnt; | |
| 143 | char *nsp; | |
| 144 | STR *fp; | |
| 145 | static char string[257]; | |
| 146 | ||
| 147 | (void) setlocale(LC_ALL, ""); | |
| 148 | ||
| 149 | getargs(ac, av); /* evalute arguments */ | |
| 150 | dc = Delimch; | |
| 151 | if ((inf = fopen(Infile, "r")) == NULL) { | |
| 152 | perror(Infile); | |
| 153 | exit(1); | |
| 154 | } | |
| 155 | ||
| 156 | if ((outf = fopen(Outfile, "w")) == NULL) { | |
| 157 | perror(Outfile); | |
| 158 | exit(1); | |
| 159 | } | |
| 160 | if (!STORING_PTRS) | |
| 161 | (void) fseek(outf, (long) sizeof Tbl, 0); | |
| 162 | ||
| 163 | /* | |
| 164 | * Write the strings onto the file | |
| 165 | */ | |
| 166 | ||
| 167 | Tbl.str_longlen = 0; | |
| 168 | Tbl.str_shortlen = ~((unsigned long) 0); | |
| 169 | Tbl.str_delim = dc; | |
| 170 | Tbl.str_version = VERSION; | |
| 171 | first = Oflag; | |
| 172 | add_offset(outf, ftell(inf)); | |
| 173 | last_off = 0; | |
| 174 | do { | |
| 175 | sp = fgets(string, 256, inf); | |
| 176 | if (sp == NULL || (sp[0] == dc && sp[1] == '\n')) { | |
| 177 | pos = ftell(inf); | |
| 178 | length = pos - last_off - (sp ? strlen(sp) : 0); | |
| 179 | last_off = pos; | |
| 180 | if (!length) | |
| 181 | continue; | |
| 182 | add_offset(outf, pos); | |
| 2cd69caa | 183 | if (Tbl.str_longlen < (unsigned long)length) |
| 984263bc | 184 | Tbl.str_longlen = length; |
| 2cd69caa | 185 | if (Tbl.str_shortlen > (unsigned long)length) |
| 984263bc MD |
186 | Tbl.str_shortlen = length; |
| 187 | first = Oflag; | |
| 188 | } | |
| 189 | else if (first) { | |
| 190 | for (nsp = sp; !isalnum((unsigned char)*nsp); nsp++) | |
| 191 | continue; | |
| 192 | ALLOC(Firstch, Num_pts); | |
| 193 | fp = &Firstch[Num_pts - 1]; | |
| 194 | if (Iflag && isupper((unsigned char)*nsp)) | |
| 195 | fp->first = tolower((unsigned char)*nsp); | |
| 196 | else | |
| 197 | fp->first = *nsp; | |
| 198 | fp->pos = Seekpts[Num_pts - 1]; | |
| 2cd69caa | 199 | first = false; |
| 984263bc MD |
200 | } |
| 201 | } while (sp != NULL); | |
| 202 | ||
| 203 | /* | |
| 204 | * write the tables in | |
| 205 | */ | |
| 206 | ||
| 207 | (void) fclose(inf); | |
| 208 | Tbl.str_numstr = Num_pts - 1; | |
| 209 | ||
| 210 | if (Cflag) | |
| 211 | Tbl.str_flags |= STR_COMMENTS; | |
| 212 | ||
| 213 | if (Oflag) | |
| 214 | do_order(); | |
| 215 | else if (Rflag) | |
| 216 | randomize(); | |
| 217 | ||
| 218 | if (Xflag) | |
| 219 | Tbl.str_flags |= STR_ROTATED; | |
| 220 | ||
| 221 | if (!Sflag) { | |
| 222 | printf("\"%s\" created\n", Outfile); | |
| 223 | if (Num_pts == 2) | |
| 224 | puts("There was 1 string"); | |
| 225 | else | |
| 226 | printf("There were %ld strings\n", Num_pts - 1); | |
| 227 | printf("Longest string: %lu byte%s\n", Tbl.str_longlen, | |
| 228 | Tbl.str_longlen == 1 ? "" : "s"); | |
| 229 | printf("Shortest string: %lu byte%s\n", Tbl.str_shortlen, | |
| 230 | Tbl.str_shortlen == 1 ? "" : "s"); | |
| 231 | } | |
| 232 | ||
| 233 | rewind(outf); | |
| 234 | Tbl.str_version = htonl(Tbl.str_version); | |
| 235 | Tbl.str_numstr = htonl(Tbl.str_numstr); | |
| 236 | Tbl.str_longlen = htonl(Tbl.str_longlen); | |
| 237 | Tbl.str_shortlen = htonl(Tbl.str_shortlen); | |
| 238 | Tbl.str_flags = htonl(Tbl.str_flags); | |
| 239 | (void) fwrite((char *) &Tbl, sizeof Tbl, 1, outf); | |
| 240 | if (STORING_PTRS) { | |
| 241 | for (p = Seekpts, cnt = Num_pts; cnt--; ++p) | |
| 242 | *p = htonl(*p); | |
| 243 | (void) fwrite((char *) Seekpts, sizeof *Seekpts, (int) Num_pts, outf); | |
| 244 | } | |
| 245 | (void) fclose(outf); | |
| 246 | exit(0); | |
| 247 | } | |
| 248 | ||
| 249 | /* | |
| 250 | * This routine evaluates arguments from the command line | |
| 251 | */ | |
| b1e9d17a LF |
252 | void |
| 253 | getargs(int argc, char **argv) | |
| 984263bc MD |
254 | { |
| 255 | int ch; | |
| 256 | ||
| 9b0ec895 | 257 | while ((ch = getopt(argc, argv, "Cc:iorsx")) != -1) |
| 984263bc MD |
258 | switch(ch) { |
| 259 | case 'C': /* embedded comments */ | |
| 260 | Cflag++; | |
| 261 | break; | |
| 262 | case 'c': /* new delimiting char */ | |
| 263 | Delimch = *optarg; | |
| 264 | if (!isascii(Delimch)) { | |
| 265 | printf("bad delimiting character: '\\%o\n'", | |
| 266 | (unsigned char)Delimch); | |
| 267 | } | |
| 268 | break; | |
| 269 | case 'i': /* ignore case in ordering */ | |
| 270 | Iflag++; | |
| 271 | break; | |
| 272 | case 'o': /* order strings */ | |
| 273 | Oflag++; | |
| 274 | break; | |
| 275 | case 'r': /* randomize pointers */ | |
| 276 | Rflag++; | |
| 277 | break; | |
| 278 | case 's': /* silent */ | |
| 279 | Sflag++; | |
| 280 | break; | |
| 281 | case 'x': /* set the rotated bit */ | |
| 282 | Xflag++; | |
| 283 | break; | |
| 284 | case '?': | |
| 285 | default: | |
| 286 | usage(); | |
| 287 | } | |
| 288 | argv += optind; | |
| 289 | ||
| 290 | if (*argv) { | |
| 291 | Infile = *argv; | |
| 292 | if (*++argv) | |
| 293 | (void) strcpy(Outfile, *argv); | |
| 294 | } | |
| 295 | if (!Infile) { | |
| 296 | puts("No input file name"); | |
| 297 | usage(); | |
| 298 | } | |
| 299 | if (*Outfile == '\0') { | |
| 300 | (void) strcpy(Outfile, Infile); | |
| 301 | (void) strcat(Outfile, ".dat"); | |
| 302 | } | |
| 303 | } | |
| 304 | ||
| b1e9d17a LF |
305 | void |
| 306 | usage(void) | |
| 984263bc MD |
307 | { |
| 308 | (void) fprintf(stderr, | |
| 309 | "strfile [-Ciorsx] [-c char] sourcefile [datafile]\n"); | |
| 310 | exit(1); | |
| 311 | } | |
| 312 | ||
| 313 | /* | |
| 314 | * add_offset: | |
| 315 | * Add an offset to the list, or write it out, as appropriate. | |
| 316 | */ | |
| b1e9d17a LF |
317 | void |
| 318 | add_offset(FILE *fp, long off) | |
| 984263bc MD |
319 | { |
| 320 | long net; | |
| 321 | ||
| 322 | if (!STORING_PTRS) { | |
| 323 | net = htonl(off); | |
| 324 | fwrite(&net, 1, sizeof net, fp); | |
| 325 | } else { | |
| 326 | ALLOC(Seekpts, Num_pts + 1); | |
| 327 | Seekpts[Num_pts] = off; | |
| 328 | } | |
| 329 | Num_pts++; | |
| 330 | } | |
| 331 | ||
| 332 | /* | |
| 333 | * do_order: | |
| 334 | * Order the strings alphabetically (possibly ignoring case). | |
| 335 | */ | |
| b1e9d17a LF |
336 | void |
| 337 | do_order(void) | |
| 984263bc MD |
338 | { |
| 339 | int i; | |
| 340 | long *lp; | |
| 341 | STR *fp; | |
| 342 | ||
| 343 | Sort_1 = fopen(Infile, "r"); | |
| 344 | Sort_2 = fopen(Infile, "r"); | |
| 345 | qsort((char *) Firstch, (int) Tbl.str_numstr, sizeof *Firstch, cmp_str); | |
| 346 | i = Tbl.str_numstr; | |
| 347 | lp = Seekpts; | |
| 348 | fp = Firstch; | |
| 349 | while (i--) | |
| 350 | *lp++ = fp++->pos; | |
| 351 | (void) fclose(Sort_1); | |
| 352 | (void) fclose(Sort_2); | |
| 353 | Tbl.str_flags |= STR_ORDERED; | |
| 354 | } | |
| 355 | ||
| b1e9d17a LF |
356 | static int |
| 357 | collate_range_cmp (int c1, int c2) | |
| 984263bc MD |
358 | { |
| 359 | static char s1[2], s2[2]; | |
| 360 | int ret; | |
| 361 | ||
| 362 | c1 &= UCHAR_MAX; | |
| 363 | c2 &= UCHAR_MAX; | |
| 364 | if (c1 == c2) | |
| 365 | return (0); | |
| 366 | s1[0] = c1; | |
| 367 | s2[0] = c2; | |
| 368 | if ((ret = strcoll(s1, s2)) != 0) | |
| 369 | return (ret); | |
| 370 | return (c1 - c2); | |
| 371 | } | |
| 372 | ||
| 373 | /* | |
| 374 | * cmp_str: | |
| 375 | * Compare two strings in the file | |
| 376 | */ | |
| b1e9d17a LF |
377 | int |
| 378 | cmp_str(const void *s1, const void *s2) | |
| 984263bc MD |
379 | { |
| 380 | const STR *p1, *p2; | |
| 381 | int c1, c2; | |
| 382 | int n1, n2; | |
| 383 | int r; | |
| 384 | ||
| 385 | # define SET_N(nf,ch) (nf = (ch == '\n')) | |
| 386 | # define IS_END(ch,nf) (ch == EOF || (ch == (unsigned char) Delimch && nf)) | |
| 387 | ||
| 388 | p1 = (const STR *) s1; | |
| 389 | p2 = (const STR *) s2; | |
| 390 | ||
| 391 | c1 = (unsigned char) p1->first; | |
| 392 | c2 = (unsigned char) p2->first; | |
| 393 | if ((r = collate_range_cmp(c1, c2)) != 0) | |
| 394 | return r; | |
| 395 | ||
| 396 | (void) fseek(Sort_1, p1->pos, 0); | |
| 397 | (void) fseek(Sort_2, p2->pos, 0); | |
| 398 | ||
| 2cd69caa PA |
399 | n1 = false; |
| 400 | n2 = false; | |
| 984263bc MD |
401 | while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0' && c1 != EOF) |
| 402 | SET_N(n1, c1); | |
| 403 | while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0' && c2 != EOF) | |
| 404 | SET_N(n2, c2); | |
| 405 | ||
| 406 | while (!IS_END(c1, n1) && !IS_END(c2, n2)) { | |
| 407 | if (Iflag) { | |
| 408 | if (isupper(c1)) | |
| 409 | c1 = tolower(c1); | |
| 410 | if (isupper(c2)) | |
| 411 | c2 = tolower(c2); | |
| 412 | } | |
| 413 | if ((r = collate_range_cmp(c1, c2)) != 0) | |
| 414 | return r; | |
| 415 | SET_N(n1, c1); | |
| 416 | SET_N(n2, c2); | |
| 417 | c1 = getc(Sort_1); | |
| 418 | c2 = getc(Sort_2); | |
| 419 | } | |
| 420 | if (IS_END(c1, n1)) | |
| 421 | c1 = 0; | |
| 422 | if (IS_END(c2, n2)) | |
| 423 | c2 = 0; | |
| 424 | return collate_range_cmp(c1, c2); | |
| 425 | } | |
| 426 | ||
| 427 | /* | |
| 428 | * randomize: | |
| 429 | * Randomize the order of the string table. We must be careful | |
| 430 | * not to randomize across delimiter boundaries. All | |
| 431 | * randomization is done within each block. | |
| 432 | */ | |
| b1e9d17a LF |
433 | void |
| 434 | randomize(void) | |
| 984263bc MD |
435 | { |
| 436 | int cnt, i; | |
| 437 | long tmp; | |
| 438 | long *sp; | |
| 439 | ||
| 440 | srandomdev(); | |
| 441 | ||
| 442 | Tbl.str_flags |= STR_RANDOM; | |
| 443 | cnt = Tbl.str_numstr; | |
| 444 | ||
| 445 | /* | |
| 446 | * move things around randomly | |
| 447 | */ | |
| 448 | ||
| 449 | for (sp = Seekpts; cnt > 0; cnt--, sp++) { | |
| 450 | i = random() % cnt; | |
| 451 | tmp = sp[0]; | |
| 452 | sp[0] = sp[i]; | |
| 453 | sp[i] = tmp; | |
| 454 | } | |
| 455 | } |