| 1 | /* |
| 2 | * Copyright (c) Ian F. Darwin 1986-1995. |
| 3 | * Software written by Ian F. Darwin and others; |
| 4 | * maintained 1995-present by Christos Zoulas and others. |
| 5 | * |
| 6 | * Redistribution and use in source and binary forms, with or without |
| 7 | * modification, are permitted provided that the following conditions |
| 8 | * are met: |
| 9 | * 1. Redistributions of source code must retain the above copyright |
| 10 | * notice immediately at the beginning of the file, without modification, |
| 11 | * this list of conditions, and the following disclaimer. |
| 12 | * 2. Redistributions in binary form must reproduce the above copyright |
| 13 | * notice, this list of conditions and the following disclaimer in the |
| 14 | * documentation and/or other materials provided with the distribution. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
| 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR |
| 20 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 21 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 22 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 23 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 24 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 25 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 26 | * SUCH DAMAGE. |
| 27 | */ |
| 28 | /* |
| 29 | * apprentice - make one pass through /etc/magic, learning its secrets. |
| 30 | */ |
| 31 | |
| 32 | #include "file.h" |
| 33 | |
| 34 | #ifndef lint |
| 35 | FILE_RCSID("@(#)$File: apprentice.c,v 1.173 2011/12/08 12:38:24 rrt Exp $") |
| 36 | #endif /* lint */ |
| 37 | |
| 38 | #include "magic.h" |
| 39 | #include <stdlib.h> |
| 40 | #ifdef HAVE_UNISTD_H |
| 41 | #include <unistd.h> |
| 42 | #endif |
| 43 | #include <string.h> |
| 44 | #include <assert.h> |
| 45 | #include <ctype.h> |
| 46 | #include <fcntl.h> |
| 47 | #ifdef QUICK |
| 48 | #include <sys/mman.h> |
| 49 | #endif |
| 50 | #include <dirent.h> |
| 51 | |
| 52 | #define EATAB {while (isascii((unsigned char) *l) && \ |
| 53 | isspace((unsigned char) *l)) ++l;} |
| 54 | #define LOWCASE(l) (isupper((unsigned char) (l)) ? \ |
| 55 | tolower((unsigned char) (l)) : (l)) |
| 56 | /* |
| 57 | * Work around a bug in headers on Digital Unix. |
| 58 | * At least confirmed for: OSF1 V4.0 878 |
| 59 | */ |
| 60 | #if defined(__osf__) && defined(__DECC) |
| 61 | #ifdef MAP_FAILED |
| 62 | #undef MAP_FAILED |
| 63 | #endif |
| 64 | #endif |
| 65 | |
| 66 | #ifndef MAP_FAILED |
| 67 | #define MAP_FAILED (void *) -1 |
| 68 | #endif |
| 69 | |
| 70 | #ifndef MAP_FILE |
| 71 | #define MAP_FILE 0 |
| 72 | #endif |
| 73 | |
| 74 | struct magic_entry { |
| 75 | struct magic *mp; |
| 76 | uint32_t cont_count; |
| 77 | uint32_t max_count; |
| 78 | }; |
| 79 | |
| 80 | int file_formats[FILE_NAMES_SIZE]; |
| 81 | const size_t file_nformats = FILE_NAMES_SIZE; |
| 82 | const char *file_names[FILE_NAMES_SIZE]; |
| 83 | const size_t file_nnames = FILE_NAMES_SIZE; |
| 84 | |
| 85 | private int getvalue(struct magic_set *ms, struct magic *, const char **, int); |
| 86 | private int hextoint(int); |
| 87 | private const char *getstr(struct magic_set *, struct magic *, const char *, |
| 88 | int); |
| 89 | private int parse(struct magic_set *, struct magic_entry **, uint32_t *, |
| 90 | const char *, size_t, int); |
| 91 | private void eatsize(const char **); |
| 92 | private int apprentice_1(struct magic_set *, const char *, int, struct mlist *); |
| 93 | private size_t apprentice_magic_strength(const struct magic *); |
| 94 | private int apprentice_sort(const void *, const void *); |
| 95 | private void apprentice_list(struct mlist *, int ); |
| 96 | private int apprentice_load(struct magic_set *, struct magic **, uint32_t *, |
| 97 | const char *, int); |
| 98 | private void byteswap(struct magic *, uint32_t); |
| 99 | private void bs1(struct magic *); |
| 100 | private uint16_t swap2(uint16_t); |
| 101 | private uint32_t swap4(uint32_t); |
| 102 | private uint64_t swap8(uint64_t); |
| 103 | private char *mkdbname(struct magic_set *, const char *, int); |
| 104 | private int apprentice_map(struct magic_set *, struct magic **, uint32_t *, |
| 105 | const char *); |
| 106 | private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *, |
| 107 | const char *); |
| 108 | private int check_format_type(const char *, int); |
| 109 | private int check_format(struct magic_set *, struct magic *); |
| 110 | private int get_op(char); |
| 111 | private int parse_mime(struct magic_set *, struct magic_entry *, const char *); |
| 112 | private int parse_strength(struct magic_set *, struct magic_entry *, const char *); |
| 113 | private int parse_apple(struct magic_set *, struct magic_entry *, const char *); |
| 114 | |
| 115 | |
| 116 | private size_t maxmagic = 0; |
| 117 | private size_t magicsize = sizeof(struct magic); |
| 118 | |
| 119 | private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; |
| 120 | |
| 121 | private struct { |
| 122 | const char *name; |
| 123 | size_t len; |
| 124 | int (*fun)(struct magic_set *, struct magic_entry *, const char *); |
| 125 | } bang[] = { |
| 126 | #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name } |
| 127 | DECLARE_FIELD(mime), |
| 128 | DECLARE_FIELD(apple), |
| 129 | DECLARE_FIELD(strength), |
| 130 | #undef DECLARE_FIELD |
| 131 | { NULL, 0, NULL } |
| 132 | }; |
| 133 | |
| 134 | #ifdef COMPILE_ONLY |
| 135 | |
| 136 | int main(int, char *[]); |
| 137 | |
| 138 | int |
| 139 | main(int argc, char *argv[]) |
| 140 | { |
| 141 | int ret; |
| 142 | struct magic_set *ms; |
| 143 | char *progname; |
| 144 | |
| 145 | if ((progname = strrchr(argv[0], '/')) != NULL) |
| 146 | progname++; |
| 147 | else |
| 148 | progname = argv[0]; |
| 149 | |
| 150 | if (argc != 2) { |
| 151 | (void)fprintf(stderr, "Usage: %s file\n", progname); |
| 152 | return 1; |
| 153 | } |
| 154 | |
| 155 | if ((ms = magic_open(MAGIC_CHECK)) == NULL) { |
| 156 | (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno)); |
| 157 | return 1; |
| 158 | } |
| 159 | ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0; |
| 160 | if (ret == 1) |
| 161 | (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms)); |
| 162 | magic_close(ms); |
| 163 | return ret; |
| 164 | } |
| 165 | #endif /* COMPILE_ONLY */ |
| 166 | |
| 167 | static const struct type_tbl_s { |
| 168 | const char name[16]; |
| 169 | const size_t len; |
| 170 | const int type; |
| 171 | const int format; |
| 172 | } type_tbl[] = { |
| 173 | # define XX(s) s, (sizeof(s) - 1) |
| 174 | # define XX_NULL "", 0 |
| 175 | { XX("byte"), FILE_BYTE, FILE_FMT_NUM }, |
| 176 | { XX("short"), FILE_SHORT, FILE_FMT_NUM }, |
| 177 | { XX("default"), FILE_DEFAULT, FILE_FMT_STR }, |
| 178 | { XX("long"), FILE_LONG, FILE_FMT_NUM }, |
| 179 | { XX("string"), FILE_STRING, FILE_FMT_STR }, |
| 180 | { XX("date"), FILE_DATE, FILE_FMT_STR }, |
| 181 | { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM }, |
| 182 | { XX("belong"), FILE_BELONG, FILE_FMT_NUM }, |
| 183 | { XX("bedate"), FILE_BEDATE, FILE_FMT_STR }, |
| 184 | { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM }, |
| 185 | { XX("lelong"), FILE_LELONG, FILE_FMT_NUM }, |
| 186 | { XX("ledate"), FILE_LEDATE, FILE_FMT_STR }, |
| 187 | { XX("pstring"), FILE_PSTRING, FILE_FMT_STR }, |
| 188 | { XX("ldate"), FILE_LDATE, FILE_FMT_STR }, |
| 189 | { XX("beldate"), FILE_BELDATE, FILE_FMT_STR }, |
| 190 | { XX("leldate"), FILE_LELDATE, FILE_FMT_STR }, |
| 191 | { XX("regex"), FILE_REGEX, FILE_FMT_STR }, |
| 192 | { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR }, |
| 193 | { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR }, |
| 194 | { XX("search"), FILE_SEARCH, FILE_FMT_STR }, |
| 195 | { XX("medate"), FILE_MEDATE, FILE_FMT_STR }, |
| 196 | { XX("meldate"), FILE_MELDATE, FILE_FMT_STR }, |
| 197 | { XX("melong"), FILE_MELONG, FILE_FMT_NUM }, |
| 198 | { XX("quad"), FILE_QUAD, FILE_FMT_QUAD }, |
| 199 | { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD }, |
| 200 | { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD }, |
| 201 | { XX("qdate"), FILE_QDATE, FILE_FMT_STR }, |
| 202 | { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR }, |
| 203 | { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR }, |
| 204 | { XX("qldate"), FILE_QLDATE, FILE_FMT_STR }, |
| 205 | { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR }, |
| 206 | { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR }, |
| 207 | { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT }, |
| 208 | { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT }, |
| 209 | { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT }, |
| 210 | { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE }, |
| 211 | { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE }, |
| 212 | { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE }, |
| 213 | { XX("leid3"), FILE_LEID3, FILE_FMT_NUM }, |
| 214 | { XX("beid3"), FILE_BEID3, FILE_FMT_NUM }, |
| 215 | { XX("indirect"), FILE_INDIRECT, FILE_FMT_NONE }, |
| 216 | { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, |
| 217 | # undef XX |
| 218 | # undef XX_NULL |
| 219 | }; |
| 220 | |
| 221 | private int |
| 222 | get_type(const char *l, const char **t) |
| 223 | { |
| 224 | const struct type_tbl_s *p; |
| 225 | |
| 226 | for (p = type_tbl; p->len; p++) { |
| 227 | if (strncmp(l, p->name, p->len) == 0) { |
| 228 | if (t) |
| 229 | *t = l + p->len; |
| 230 | break; |
| 231 | } |
| 232 | } |
| 233 | return p->type; |
| 234 | } |
| 235 | |
| 236 | private void |
| 237 | init_file_tables(void) |
| 238 | { |
| 239 | static int done = 0; |
| 240 | const struct type_tbl_s *p; |
| 241 | |
| 242 | if (done) |
| 243 | return; |
| 244 | done++; |
| 245 | |
| 246 | for (p = type_tbl; p->len; p++) { |
| 247 | assert(p->type < FILE_NAMES_SIZE); |
| 248 | file_names[p->type] = p->name; |
| 249 | file_formats[p->type] = p->format; |
| 250 | } |
| 251 | } |
| 252 | |
| 253 | /* |
| 254 | * Handle one file or directory. |
| 255 | */ |
| 256 | private int |
| 257 | apprentice_1(struct magic_set *ms, const char *fn, int action, |
| 258 | struct mlist *mlist) |
| 259 | { |
| 260 | struct magic *magic = NULL; |
| 261 | uint32_t nmagic = 0; |
| 262 | struct mlist *ml; |
| 263 | int rv = -1; |
| 264 | int mapped; |
| 265 | |
| 266 | if (magicsize != FILE_MAGICSIZE) { |
| 267 | file_error(ms, 0, "magic element size %lu != %lu", |
| 268 | (unsigned long)sizeof(*magic), |
| 269 | (unsigned long)FILE_MAGICSIZE); |
| 270 | return -1; |
| 271 | } |
| 272 | |
| 273 | if (action == FILE_COMPILE) { |
| 274 | rv = apprentice_load(ms, &magic, &nmagic, fn, action); |
| 275 | if (rv != 0) |
| 276 | return -1; |
| 277 | rv = apprentice_compile(ms, &magic, &nmagic, fn); |
| 278 | free(magic); |
| 279 | return rv; |
| 280 | } |
| 281 | |
| 282 | #ifndef COMPILE_ONLY |
| 283 | if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) { |
| 284 | if (ms->flags & MAGIC_CHECK) |
| 285 | file_magwarn(ms, "using regular magic file `%s'", fn); |
| 286 | rv = apprentice_load(ms, &magic, &nmagic, fn, action); |
| 287 | if (rv != 0) |
| 288 | return -1; |
| 289 | } |
| 290 | |
| 291 | mapped = rv; |
| 292 | |
| 293 | if (magic == NULL) { |
| 294 | file_delmagic(magic, mapped, nmagic); |
| 295 | return -1; |
| 296 | } |
| 297 | |
| 298 | if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL) { |
| 299 | file_delmagic(magic, mapped, nmagic); |
| 300 | file_oomem(ms, sizeof(*ml)); |
| 301 | return -1; |
| 302 | } |
| 303 | |
| 304 | ml->magic = magic; |
| 305 | ml->nmagic = nmagic; |
| 306 | ml->mapped = mapped; |
| 307 | |
| 308 | mlist->prev->next = ml; |
| 309 | ml->prev = mlist->prev; |
| 310 | ml->next = mlist; |
| 311 | mlist->prev = ml; |
| 312 | |
| 313 | if (action == FILE_LIST) { |
| 314 | printf("Binary patterns:\n"); |
| 315 | apprentice_list(mlist, BINTEST); |
| 316 | printf("Text patterns:\n"); |
| 317 | apprentice_list(mlist, TEXTTEST); |
| 318 | } |
| 319 | |
| 320 | return 0; |
| 321 | #endif /* COMPILE_ONLY */ |
| 322 | } |
| 323 | |
| 324 | protected void |
| 325 | file_delmagic(struct magic *p, int type, size_t entries) |
| 326 | { |
| 327 | if (p == NULL) |
| 328 | return; |
| 329 | switch (type) { |
| 330 | case 2: |
| 331 | #ifdef QUICK |
| 332 | p--; |
| 333 | (void)munmap((void *)p, sizeof(*p) * (entries + 1)); |
| 334 | break; |
| 335 | #else |
| 336 | (void)&entries; |
| 337 | abort(); |
| 338 | /*NOTREACHED*/ |
| 339 | #endif |
| 340 | case 1: |
| 341 | p--; |
| 342 | /*FALLTHROUGH*/ |
| 343 | case 0: |
| 344 | free(p); |
| 345 | break; |
| 346 | default: |
| 347 | abort(); |
| 348 | } |
| 349 | } |
| 350 | |
| 351 | /* const char *fn: list of magic files and directories */ |
| 352 | protected struct mlist * |
| 353 | file_apprentice(struct magic_set *ms, const char *fn, int action) |
| 354 | { |
| 355 | char *p, *mfn; |
| 356 | int file_err, errs = -1; |
| 357 | struct mlist *mlist; |
| 358 | |
| 359 | if ((fn = magic_getpath(fn, action)) == NULL) |
| 360 | return NULL; |
| 361 | |
| 362 | init_file_tables(); |
| 363 | |
| 364 | if ((mfn = strdup(fn)) == NULL) { |
| 365 | file_oomem(ms, strlen(fn)); |
| 366 | return NULL; |
| 367 | } |
| 368 | fn = mfn; |
| 369 | |
| 370 | if ((mlist = CAST(struct mlist *, malloc(sizeof(*mlist)))) == NULL) { |
| 371 | free(mfn); |
| 372 | file_oomem(ms, sizeof(*mlist)); |
| 373 | return NULL; |
| 374 | } |
| 375 | mlist->next = mlist->prev = mlist; |
| 376 | |
| 377 | while (fn) { |
| 378 | p = strchr(fn, PATHSEP); |
| 379 | if (p) |
| 380 | *p++ = '\0'; |
| 381 | if (*fn == '\0') |
| 382 | break; |
| 383 | file_err = apprentice_1(ms, fn, action, mlist); |
| 384 | errs = MAX(errs, file_err); |
| 385 | fn = p; |
| 386 | } |
| 387 | if (errs == -1) { |
| 388 | free(mfn); |
| 389 | free(mlist); |
| 390 | mlist = NULL; |
| 391 | file_error(ms, 0, "could not find any magic files!"); |
| 392 | return NULL; |
| 393 | } |
| 394 | free(mfn); |
| 395 | return mlist; |
| 396 | } |
| 397 | |
| 398 | /* |
| 399 | * Get weight of this magic entry, for sorting purposes. |
| 400 | */ |
| 401 | private size_t |
| 402 | apprentice_magic_strength(const struct magic *m) |
| 403 | { |
| 404 | #define MULT 10 |
| 405 | size_t val = 2 * MULT; /* baseline strength */ |
| 406 | |
| 407 | switch (m->type) { |
| 408 | case FILE_DEFAULT: /* make sure this sorts last */ |
| 409 | if (m->factor_op != FILE_FACTOR_OP_NONE) |
| 410 | abort(); |
| 411 | return 0; |
| 412 | |
| 413 | case FILE_BYTE: |
| 414 | val += 1 * MULT; |
| 415 | break; |
| 416 | |
| 417 | case FILE_SHORT: |
| 418 | case FILE_LESHORT: |
| 419 | case FILE_BESHORT: |
| 420 | val += 2 * MULT; |
| 421 | break; |
| 422 | |
| 423 | case FILE_LONG: |
| 424 | case FILE_LELONG: |
| 425 | case FILE_BELONG: |
| 426 | case FILE_MELONG: |
| 427 | val += 4 * MULT; |
| 428 | break; |
| 429 | |
| 430 | case FILE_PSTRING: |
| 431 | case FILE_STRING: |
| 432 | val += m->vallen * MULT; |
| 433 | break; |
| 434 | |
| 435 | case FILE_BESTRING16: |
| 436 | case FILE_LESTRING16: |
| 437 | val += m->vallen * MULT / 2; |
| 438 | break; |
| 439 | |
| 440 | case FILE_SEARCH: |
| 441 | case FILE_REGEX: |
| 442 | val += m->vallen * MAX(MULT / m->vallen, 1); |
| 443 | break; |
| 444 | |
| 445 | case FILE_DATE: |
| 446 | case FILE_LEDATE: |
| 447 | case FILE_BEDATE: |
| 448 | case FILE_MEDATE: |
| 449 | case FILE_LDATE: |
| 450 | case FILE_LELDATE: |
| 451 | case FILE_BELDATE: |
| 452 | case FILE_MELDATE: |
| 453 | case FILE_FLOAT: |
| 454 | case FILE_BEFLOAT: |
| 455 | case FILE_LEFLOAT: |
| 456 | val += 4 * MULT; |
| 457 | break; |
| 458 | |
| 459 | case FILE_QUAD: |
| 460 | case FILE_BEQUAD: |
| 461 | case FILE_LEQUAD: |
| 462 | case FILE_QDATE: |
| 463 | case FILE_LEQDATE: |
| 464 | case FILE_BEQDATE: |
| 465 | case FILE_QLDATE: |
| 466 | case FILE_LEQLDATE: |
| 467 | case FILE_BEQLDATE: |
| 468 | case FILE_DOUBLE: |
| 469 | case FILE_BEDOUBLE: |
| 470 | case FILE_LEDOUBLE: |
| 471 | val += 8 * MULT; |
| 472 | break; |
| 473 | |
| 474 | default: |
| 475 | val = 0; |
| 476 | (void)fprintf(stderr, "Bad type %d\n", m->type); |
| 477 | abort(); |
| 478 | } |
| 479 | |
| 480 | switch (m->reln) { |
| 481 | case 'x': /* matches anything penalize */ |
| 482 | case '!': /* matches almost anything penalize */ |
| 483 | val = 0; |
| 484 | break; |
| 485 | |
| 486 | case '=': /* Exact match, prefer */ |
| 487 | val += MULT; |
| 488 | break; |
| 489 | |
| 490 | case '>': |
| 491 | case '<': /* comparison match reduce strength */ |
| 492 | val -= 2 * MULT; |
| 493 | break; |
| 494 | |
| 495 | case '^': |
| 496 | case '&': /* masking bits, we could count them too */ |
| 497 | val -= MULT; |
| 498 | break; |
| 499 | |
| 500 | default: |
| 501 | (void)fprintf(stderr, "Bad relation %c\n", m->reln); |
| 502 | abort(); |
| 503 | } |
| 504 | |
| 505 | if (val == 0) /* ensure we only return 0 for FILE_DEFAULT */ |
| 506 | val = 1; |
| 507 | |
| 508 | switch (m->factor_op) { |
| 509 | case FILE_FACTOR_OP_NONE: |
| 510 | break; |
| 511 | case FILE_FACTOR_OP_PLUS: |
| 512 | val += m->factor; |
| 513 | break; |
| 514 | case FILE_FACTOR_OP_MINUS: |
| 515 | val -= m->factor; |
| 516 | break; |
| 517 | case FILE_FACTOR_OP_TIMES: |
| 518 | val *= m->factor; |
| 519 | break; |
| 520 | case FILE_FACTOR_OP_DIV: |
| 521 | val /= m->factor; |
| 522 | break; |
| 523 | default: |
| 524 | abort(); |
| 525 | } |
| 526 | |
| 527 | /* |
| 528 | * Magic entries with no description get a bonus because they depend |
| 529 | * on subsequent magic entries to print something. |
| 530 | */ |
| 531 | if (m->desc[0] == '\0') |
| 532 | val++; |
| 533 | return val; |
| 534 | } |
| 535 | |
| 536 | /* |
| 537 | * Sort callback for sorting entries by "strength" (basically length) |
| 538 | */ |
| 539 | private int |
| 540 | apprentice_sort(const void *a, const void *b) |
| 541 | { |
| 542 | const struct magic_entry *ma = CAST(const struct magic_entry *, a); |
| 543 | const struct magic_entry *mb = CAST(const struct magic_entry *, b); |
| 544 | size_t sa = apprentice_magic_strength(ma->mp); |
| 545 | size_t sb = apprentice_magic_strength(mb->mp); |
| 546 | if (sa == sb) |
| 547 | return 0; |
| 548 | else if (sa > sb) |
| 549 | return -1; |
| 550 | else |
| 551 | return 1; |
| 552 | } |
| 553 | |
| 554 | /* |
| 555 | * Shows sorted patterns list in the order which is used for the matching |
| 556 | */ |
| 557 | private void |
| 558 | apprentice_list(struct mlist *mlist, int mode) |
| 559 | { |
| 560 | uint32_t magindex = 0; |
| 561 | struct mlist *ml; |
| 562 | for (ml = mlist->next; ml != mlist; ml = ml->next) { |
| 563 | for (magindex = 0; magindex < ml->nmagic; magindex++) { |
| 564 | struct magic *m = &ml->magic[magindex]; |
| 565 | if ((m->flag & mode) != mode) { |
| 566 | /* Skip sub-tests */ |
| 567 | while (magindex + 1 < ml->nmagic && |
| 568 | ml->magic[magindex + 1].cont_level != 0) |
| 569 | ++magindex; |
| 570 | continue; /* Skip to next top-level test*/ |
| 571 | } |
| 572 | |
| 573 | /* |
| 574 | * Try to iterate over the tree until we find item with |
| 575 | * description/mimetype. |
| 576 | */ |
| 577 | while (magindex + 1 < ml->nmagic && |
| 578 | ml->magic[magindex + 1].cont_level != 0 && |
| 579 | *ml->magic[magindex].desc == '\0' && |
| 580 | *ml->magic[magindex].mimetype == '\0') |
| 581 | magindex++; |
| 582 | |
| 583 | printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n", |
| 584 | apprentice_magic_strength(m), |
| 585 | ml->magic[magindex].desc, |
| 586 | ml->magic[magindex].mimetype); |
| 587 | } |
| 588 | } |
| 589 | } |
| 590 | |
| 591 | private void |
| 592 | set_test_type(struct magic *mstart, struct magic *m) |
| 593 | { |
| 594 | switch (m->type) { |
| 595 | case FILE_BYTE: |
| 596 | case FILE_SHORT: |
| 597 | case FILE_LONG: |
| 598 | case FILE_DATE: |
| 599 | case FILE_BESHORT: |
| 600 | case FILE_BELONG: |
| 601 | case FILE_BEDATE: |
| 602 | case FILE_LESHORT: |
| 603 | case FILE_LELONG: |
| 604 | case FILE_LEDATE: |
| 605 | case FILE_LDATE: |
| 606 | case FILE_BELDATE: |
| 607 | case FILE_LELDATE: |
| 608 | case FILE_MEDATE: |
| 609 | case FILE_MELDATE: |
| 610 | case FILE_MELONG: |
| 611 | case FILE_QUAD: |
| 612 | case FILE_LEQUAD: |
| 613 | case FILE_BEQUAD: |
| 614 | case FILE_QDATE: |
| 615 | case FILE_LEQDATE: |
| 616 | case FILE_BEQDATE: |
| 617 | case FILE_QLDATE: |
| 618 | case FILE_LEQLDATE: |
| 619 | case FILE_BEQLDATE: |
| 620 | case FILE_FLOAT: |
| 621 | case FILE_BEFLOAT: |
| 622 | case FILE_LEFLOAT: |
| 623 | case FILE_DOUBLE: |
| 624 | case FILE_BEDOUBLE: |
| 625 | case FILE_LEDOUBLE: |
| 626 | mstart->flag |= BINTEST; |
| 627 | break; |
| 628 | case FILE_STRING: |
| 629 | case FILE_PSTRING: |
| 630 | case FILE_BESTRING16: |
| 631 | case FILE_LESTRING16: |
| 632 | /* Allow text overrides */ |
| 633 | if (mstart->str_flags & STRING_TEXTTEST) |
| 634 | mstart->flag |= TEXTTEST; |
| 635 | else |
| 636 | mstart->flag |= BINTEST; |
| 637 | break; |
| 638 | case FILE_REGEX: |
| 639 | case FILE_SEARCH: |
| 640 | /* Check for override */ |
| 641 | if (mstart->str_flags & STRING_BINTEST) |
| 642 | mstart->flag |= BINTEST; |
| 643 | if (mstart->str_flags & STRING_TEXTTEST) |
| 644 | mstart->flag |= TEXTTEST; |
| 645 | |
| 646 | if (mstart->flag & (TEXTTEST|BINTEST)) |
| 647 | break; |
| 648 | |
| 649 | /* binary test if pattern is not text */ |
| 650 | if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL, |
| 651 | NULL) <= 0) |
| 652 | mstart->flag |= BINTEST; |
| 653 | else |
| 654 | mstart->flag |= TEXTTEST; |
| 655 | break; |
| 656 | case FILE_DEFAULT: |
| 657 | /* can't deduce anything; we shouldn't see this at the |
| 658 | top level anyway */ |
| 659 | break; |
| 660 | case FILE_INVALID: |
| 661 | default: |
| 662 | /* invalid search type, but no need to complain here */ |
| 663 | break; |
| 664 | } |
| 665 | } |
| 666 | |
| 667 | /* |
| 668 | * Load and parse one file. |
| 669 | */ |
| 670 | private void |
| 671 | load_1(struct magic_set *ms, int action, const char *fn, int *errs, |
| 672 | struct magic_entry **marray, uint32_t *marraycount) |
| 673 | { |
| 674 | size_t lineno = 0, llen = 0; |
| 675 | char *line = NULL; |
| 676 | ssize_t len; |
| 677 | |
| 678 | FILE *f = fopen(ms->file = fn, "r"); |
| 679 | if (f == NULL) { |
| 680 | if (errno != ENOENT) |
| 681 | file_error(ms, errno, "cannot read magic file `%s'", |
| 682 | fn); |
| 683 | (*errs)++; |
| 684 | return; |
| 685 | } |
| 686 | |
| 687 | /* read and parse this file */ |
| 688 | for (ms->line = 1; (len = getline(&line, &llen, f)) != -1; |
| 689 | ms->line++) { |
| 690 | if (len == 0) /* null line, garbage, etc */ |
| 691 | continue; |
| 692 | if (line[len - 1] == '\n') { |
| 693 | lineno++; |
| 694 | line[len - 1] = '\0'; /* delete newline */ |
| 695 | } |
| 696 | switch (line[0]) { |
| 697 | case '\0': /* empty, do not parse */ |
| 698 | case '#': /* comment, do not parse */ |
| 699 | continue; |
| 700 | case '!': |
| 701 | if (line[1] == ':') { |
| 702 | size_t i; |
| 703 | |
| 704 | for (i = 0; bang[i].name != NULL; i++) { |
| 705 | if ((size_t)(len - 2) > bang[i].len && |
| 706 | memcmp(bang[i].name, line + 2, |
| 707 | bang[i].len) == 0) |
| 708 | break; |
| 709 | } |
| 710 | if (bang[i].name == NULL) { |
| 711 | file_error(ms, 0, |
| 712 | "Unknown !: entry `%s'", line); |
| 713 | (*errs)++; |
| 714 | continue; |
| 715 | } |
| 716 | if (*marraycount == 0) { |
| 717 | file_error(ms, 0, |
| 718 | "No current entry for :!%s type", |
| 719 | bang[i].name); |
| 720 | (*errs)++; |
| 721 | continue; |
| 722 | } |
| 723 | if ((*bang[i].fun)(ms, |
| 724 | &(*marray)[*marraycount - 1], |
| 725 | line + bang[i].len + 2) != 0) { |
| 726 | (*errs)++; |
| 727 | continue; |
| 728 | } |
| 729 | continue; |
| 730 | } |
| 731 | /*FALLTHROUGH*/ |
| 732 | default: |
| 733 | if (parse(ms, marray, marraycount, line, lineno, |
| 734 | action) != 0) |
| 735 | (*errs)++; |
| 736 | break; |
| 737 | } |
| 738 | } |
| 739 | free(line); |
| 740 | (void)fclose(f); |
| 741 | } |
| 742 | |
| 743 | /* |
| 744 | * parse a file or directory of files |
| 745 | * const char *fn: name of magic file or directory |
| 746 | */ |
| 747 | private int |
| 748 | cmpstrp(const void *p1, const void *p2) |
| 749 | { |
| 750 | return strcmp(*(char *const *)p1, *(char *const *)p2); |
| 751 | } |
| 752 | |
| 753 | private int |
| 754 | apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, |
| 755 | const char *fn, int action) |
| 756 | { |
| 757 | int errs = 0; |
| 758 | struct magic_entry *marray; |
| 759 | uint32_t marraycount, i, mentrycount = 0, starttest; |
| 760 | size_t slen, files = 0, maxfiles = 0; |
| 761 | char **filearr = NULL, *mfn; |
| 762 | struct stat st; |
| 763 | DIR *dir; |
| 764 | struct dirent *d; |
| 765 | |
| 766 | ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */ |
| 767 | |
| 768 | maxmagic = MAXMAGIS; |
| 769 | if ((marray = CAST(struct magic_entry *, calloc(maxmagic, |
| 770 | sizeof(*marray)))) == NULL) { |
| 771 | file_oomem(ms, maxmagic * sizeof(*marray)); |
| 772 | return -1; |
| 773 | } |
| 774 | marraycount = 0; |
| 775 | |
| 776 | /* print silly verbose header for USG compat. */ |
| 777 | if (action == FILE_CHECK) |
| 778 | (void)fprintf(stderr, "%s\n", usg_hdr); |
| 779 | |
| 780 | /* load directory or file */ |
| 781 | if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) { |
| 782 | dir = opendir(fn); |
| 783 | if (!dir) { |
| 784 | errs++; |
| 785 | goto out; |
| 786 | } |
| 787 | while ((d = readdir(dir)) != NULL) { |
| 788 | if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) { |
| 789 | file_oomem(ms, |
| 790 | strlen(fn) + strlen(d->d_name) + 2); |
| 791 | errs++; |
| 792 | closedir(dir); |
| 793 | goto out; |
| 794 | } |
| 795 | if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) { |
| 796 | free(mfn); |
| 797 | continue; |
| 798 | } |
| 799 | if (files >= maxfiles) { |
| 800 | size_t mlen; |
| 801 | maxfiles = (maxfiles + 1) * 2; |
| 802 | mlen = maxfiles * sizeof(*filearr); |
| 803 | if ((filearr = CAST(char **, |
| 804 | realloc(filearr, mlen))) == NULL) { |
| 805 | file_oomem(ms, mlen); |
| 806 | free(mfn); |
| 807 | closedir(dir); |
| 808 | errs++; |
| 809 | goto out; |
| 810 | } |
| 811 | } |
| 812 | filearr[files++] = mfn; |
| 813 | } |
| 814 | closedir(dir); |
| 815 | qsort(filearr, files, sizeof(*filearr), cmpstrp); |
| 816 | for (i = 0; i < files; i++) { |
| 817 | load_1(ms, action, filearr[i], &errs, &marray, |
| 818 | &marraycount); |
| 819 | free(filearr[i]); |
| 820 | } |
| 821 | free(filearr); |
| 822 | } else |
| 823 | load_1(ms, action, fn, &errs, &marray, &marraycount); |
| 824 | if (errs) |
| 825 | goto out; |
| 826 | |
| 827 | /* Set types of tests */ |
| 828 | for (i = 0; i < marraycount; ) { |
| 829 | if (marray[i].mp->cont_level != 0) { |
| 830 | i++; |
| 831 | continue; |
| 832 | } |
| 833 | |
| 834 | starttest = i; |
| 835 | do { |
| 836 | static const char text[] = "text"; |
| 837 | static const char binary[] = "binary"; |
| 838 | static const size_t len = sizeof(text); |
| 839 | set_test_type(marray[starttest].mp, marray[i].mp); |
| 840 | if ((ms->flags & MAGIC_DEBUG) == 0) |
| 841 | continue; |
| 842 | (void)fprintf(stderr, "%s%s%s: %s\n", |
| 843 | marray[i].mp->mimetype, |
| 844 | marray[i].mp->mimetype[0] == '\0' ? "" : "; ", |
| 845 | marray[i].mp->desc[0] ? marray[i].mp->desc : |
| 846 | "(no description)", |
| 847 | marray[i].mp->flag & BINTEST ? binary : text); |
| 848 | if (marray[i].mp->flag & BINTEST) { |
| 849 | char *p = strstr(marray[i].mp->desc, text); |
| 850 | if (p && (p == marray[i].mp->desc || |
| 851 | isspace((unsigned char)p[-1])) && |
| 852 | (p + len - marray[i].mp->desc == |
| 853 | MAXstring || (p[len] == '\0' || |
| 854 | isspace((unsigned char)p[len])))) |
| 855 | (void)fprintf(stderr, "*** Possible " |
| 856 | "binary test for text type\n"); |
| 857 | } |
| 858 | } while (++i < marraycount && marray[i].mp->cont_level != 0); |
| 859 | } |
| 860 | |
| 861 | qsort(marray, marraycount, sizeof(*marray), apprentice_sort); |
| 862 | |
| 863 | /* |
| 864 | * Make sure that any level 0 "default" line is last (if one exists). |
| 865 | */ |
| 866 | for (i = 0; i < marraycount; i++) { |
| 867 | if (marray[i].mp->cont_level == 0 && |
| 868 | marray[i].mp->type == FILE_DEFAULT) { |
| 869 | while (++i < marraycount) |
| 870 | if (marray[i].mp->cont_level == 0) |
| 871 | break; |
| 872 | if (i != marraycount) { |
| 873 | /* XXX - Ugh! */ |
| 874 | ms->line = marray[i].mp->lineno; |
| 875 | file_magwarn(ms, |
| 876 | "level 0 \"default\" did not sort last"); |
| 877 | } |
| 878 | break; |
| 879 | } |
| 880 | } |
| 881 | |
| 882 | for (i = 0; i < marraycount; i++) |
| 883 | mentrycount += marray[i].cont_count; |
| 884 | |
| 885 | slen = sizeof(**magicp) * mentrycount; |
| 886 | if ((*magicp = CAST(struct magic *, malloc(slen))) == NULL) { |
| 887 | file_oomem(ms, slen); |
| 888 | errs++; |
| 889 | goto out; |
| 890 | } |
| 891 | |
| 892 | mentrycount = 0; |
| 893 | for (i = 0; i < marraycount; i++) { |
| 894 | (void)memcpy(*magicp + mentrycount, marray[i].mp, |
| 895 | marray[i].cont_count * sizeof(**magicp)); |
| 896 | mentrycount += marray[i].cont_count; |
| 897 | } |
| 898 | out: |
| 899 | for (i = 0; i < marraycount; i++) |
| 900 | free(marray[i].mp); |
| 901 | free(marray); |
| 902 | if (errs) { |
| 903 | *magicp = NULL; |
| 904 | *nmagicp = 0; |
| 905 | return errs; |
| 906 | } else { |
| 907 | *nmagicp = mentrycount; |
| 908 | return 0; |
| 909 | } |
| 910 | |
| 911 | } |
| 912 | |
| 913 | /* |
| 914 | * extend the sign bit if the comparison is to be signed |
| 915 | */ |
| 916 | protected uint64_t |
| 917 | file_signextend(struct magic_set *ms, struct magic *m, uint64_t v) |
| 918 | { |
| 919 | if (!(m->flag & UNSIGNED)) { |
| 920 | switch(m->type) { |
| 921 | /* |
| 922 | * Do not remove the casts below. They are |
| 923 | * vital. When later compared with the data, |
| 924 | * the sign extension must have happened. |
| 925 | */ |
| 926 | case FILE_BYTE: |
| 927 | v = (char) v; |
| 928 | break; |
| 929 | case FILE_SHORT: |
| 930 | case FILE_BESHORT: |
| 931 | case FILE_LESHORT: |
| 932 | v = (short) v; |
| 933 | break; |
| 934 | case FILE_DATE: |
| 935 | case FILE_BEDATE: |
| 936 | case FILE_LEDATE: |
| 937 | case FILE_MEDATE: |
| 938 | case FILE_LDATE: |
| 939 | case FILE_BELDATE: |
| 940 | case FILE_LELDATE: |
| 941 | case FILE_MELDATE: |
| 942 | case FILE_LONG: |
| 943 | case FILE_BELONG: |
| 944 | case FILE_LELONG: |
| 945 | case FILE_MELONG: |
| 946 | case FILE_FLOAT: |
| 947 | case FILE_BEFLOAT: |
| 948 | case FILE_LEFLOAT: |
| 949 | v = (int32_t) v; |
| 950 | break; |
| 951 | case FILE_QUAD: |
| 952 | case FILE_BEQUAD: |
| 953 | case FILE_LEQUAD: |
| 954 | case FILE_QDATE: |
| 955 | case FILE_QLDATE: |
| 956 | case FILE_BEQDATE: |
| 957 | case FILE_BEQLDATE: |
| 958 | case FILE_LEQDATE: |
| 959 | case FILE_LEQLDATE: |
| 960 | case FILE_DOUBLE: |
| 961 | case FILE_BEDOUBLE: |
| 962 | case FILE_LEDOUBLE: |
| 963 | v = (int64_t) v; |
| 964 | break; |
| 965 | case FILE_STRING: |
| 966 | case FILE_PSTRING: |
| 967 | case FILE_BESTRING16: |
| 968 | case FILE_LESTRING16: |
| 969 | case FILE_REGEX: |
| 970 | case FILE_SEARCH: |
| 971 | case FILE_DEFAULT: |
| 972 | case FILE_INDIRECT: |
| 973 | break; |
| 974 | default: |
| 975 | if (ms->flags & MAGIC_CHECK) |
| 976 | file_magwarn(ms, "cannot happen: m->type=%d\n", |
| 977 | m->type); |
| 978 | return ~0U; |
| 979 | } |
| 980 | } |
| 981 | return v; |
| 982 | } |
| 983 | |
| 984 | private int |
| 985 | string_modifier_check(struct magic_set *ms, struct magic *m) |
| 986 | { |
| 987 | if ((ms->flags & MAGIC_CHECK) == 0) |
| 988 | return 0; |
| 989 | |
| 990 | if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) { |
| 991 | file_magwarn(ms, |
| 992 | "'/BHhLl' modifiers are only allowed for pascal strings\n"); |
| 993 | return -1; |
| 994 | } |
| 995 | switch (m->type) { |
| 996 | case FILE_BESTRING16: |
| 997 | case FILE_LESTRING16: |
| 998 | if (m->str_flags != 0) { |
| 999 | file_magwarn(ms, |
| 1000 | "no modifiers allowed for 16-bit strings\n"); |
| 1001 | return -1; |
| 1002 | } |
| 1003 | break; |
| 1004 | case FILE_STRING: |
| 1005 | case FILE_PSTRING: |
| 1006 | if ((m->str_flags & REGEX_OFFSET_START) != 0) { |
| 1007 | file_magwarn(ms, |
| 1008 | "'/%c' only allowed on regex and search\n", |
| 1009 | CHAR_REGEX_OFFSET_START); |
| 1010 | return -1; |
| 1011 | } |
| 1012 | break; |
| 1013 | case FILE_SEARCH: |
| 1014 | if (m->str_range == 0) { |
| 1015 | file_magwarn(ms, |
| 1016 | "missing range; defaulting to %d\n", |
| 1017 | STRING_DEFAULT_RANGE); |
| 1018 | m->str_range = STRING_DEFAULT_RANGE; |
| 1019 | return -1; |
| 1020 | } |
| 1021 | break; |
| 1022 | case FILE_REGEX: |
| 1023 | if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) { |
| 1024 | file_magwarn(ms, "'/%c' not allowed on regex\n", |
| 1025 | CHAR_COMPACT_WHITESPACE); |
| 1026 | return -1; |
| 1027 | } |
| 1028 | if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) { |
| 1029 | file_magwarn(ms, "'/%c' not allowed on regex\n", |
| 1030 | CHAR_COMPACT_OPTIONAL_WHITESPACE); |
| 1031 | return -1; |
| 1032 | } |
| 1033 | break; |
| 1034 | default: |
| 1035 | file_magwarn(ms, "coding error: m->type=%d\n", |
| 1036 | m->type); |
| 1037 | return -1; |
| 1038 | } |
| 1039 | return 0; |
| 1040 | } |
| 1041 | |
| 1042 | private int |
| 1043 | get_op(char c) |
| 1044 | { |
| 1045 | switch (c) { |
| 1046 | case '&': |
| 1047 | return FILE_OPAND; |
| 1048 | case '|': |
| 1049 | return FILE_OPOR; |
| 1050 | case '^': |
| 1051 | return FILE_OPXOR; |
| 1052 | case '+': |
| 1053 | return FILE_OPADD; |
| 1054 | case '-': |
| 1055 | return FILE_OPMINUS; |
| 1056 | case '*': |
| 1057 | return FILE_OPMULTIPLY; |
| 1058 | case '/': |
| 1059 | return FILE_OPDIVIDE; |
| 1060 | case '%': |
| 1061 | return FILE_OPMODULO; |
| 1062 | default: |
| 1063 | return -1; |
| 1064 | } |
| 1065 | } |
| 1066 | |
| 1067 | #ifdef ENABLE_CONDITIONALS |
| 1068 | private int |
| 1069 | get_cond(const char *l, const char **t) |
| 1070 | { |
| 1071 | static const struct cond_tbl_s { |
| 1072 | char name[8]; |
| 1073 | size_t len; |
| 1074 | int cond; |
| 1075 | } cond_tbl[] = { |
| 1076 | { "if", 2, COND_IF }, |
| 1077 | { "elif", 4, COND_ELIF }, |
| 1078 | { "else", 4, COND_ELSE }, |
| 1079 | { "", 0, COND_NONE }, |
| 1080 | }; |
| 1081 | const struct cond_tbl_s *p; |
| 1082 | |
| 1083 | for (p = cond_tbl; p->len; p++) { |
| 1084 | if (strncmp(l, p->name, p->len) == 0 && |
| 1085 | isspace((unsigned char)l[p->len])) { |
| 1086 | if (t) |
| 1087 | *t = l + p->len; |
| 1088 | break; |
| 1089 | } |
| 1090 | } |
| 1091 | return p->cond; |
| 1092 | } |
| 1093 | |
| 1094 | private int |
| 1095 | check_cond(struct magic_set *ms, int cond, uint32_t cont_level) |
| 1096 | { |
| 1097 | int last_cond; |
| 1098 | last_cond = ms->c.li[cont_level].last_cond; |
| 1099 | |
| 1100 | switch (cond) { |
| 1101 | case COND_IF: |
| 1102 | if (last_cond != COND_NONE && last_cond != COND_ELIF) { |
| 1103 | if (ms->flags & MAGIC_CHECK) |
| 1104 | file_magwarn(ms, "syntax error: `if'"); |
| 1105 | return -1; |
| 1106 | } |
| 1107 | last_cond = COND_IF; |
| 1108 | break; |
| 1109 | |
| 1110 | case COND_ELIF: |
| 1111 | if (last_cond != COND_IF && last_cond != COND_ELIF) { |
| 1112 | if (ms->flags & MAGIC_CHECK) |
| 1113 | file_magwarn(ms, "syntax error: `elif'"); |
| 1114 | return -1; |
| 1115 | } |
| 1116 | last_cond = COND_ELIF; |
| 1117 | break; |
| 1118 | |
| 1119 | case COND_ELSE: |
| 1120 | if (last_cond != COND_IF && last_cond != COND_ELIF) { |
| 1121 | if (ms->flags & MAGIC_CHECK) |
| 1122 | file_magwarn(ms, "syntax error: `else'"); |
| 1123 | return -1; |
| 1124 | } |
| 1125 | last_cond = COND_NONE; |
| 1126 | break; |
| 1127 | |
| 1128 | case COND_NONE: |
| 1129 | last_cond = COND_NONE; |
| 1130 | break; |
| 1131 | } |
| 1132 | |
| 1133 | ms->c.li[cont_level].last_cond = last_cond; |
| 1134 | return 0; |
| 1135 | } |
| 1136 | #endif /* ENABLE_CONDITIONALS */ |
| 1137 | |
| 1138 | /* |
| 1139 | * parse one line from magic file, put into magic[index++] if valid |
| 1140 | */ |
| 1141 | private int |
| 1142 | parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, |
| 1143 | const char *line, size_t lineno, int action) |
| 1144 | { |
| 1145 | #ifdef ENABLE_CONDITIONALS |
| 1146 | static uint32_t last_cont_level = 0; |
| 1147 | #endif |
| 1148 | size_t i; |
| 1149 | struct magic_entry *me; |
| 1150 | struct magic *m; |
| 1151 | const char *l = line; |
| 1152 | char *t; |
| 1153 | int op; |
| 1154 | uint32_t cont_level; |
| 1155 | |
| 1156 | cont_level = 0; |
| 1157 | |
| 1158 | while (*l == '>') { |
| 1159 | ++l; /* step over */ |
| 1160 | cont_level++; |
| 1161 | } |
| 1162 | #ifdef ENABLE_CONDITIONALS |
| 1163 | if (cont_level == 0 || cont_level > last_cont_level) |
| 1164 | if (file_check_mem(ms, cont_level) == -1) |
| 1165 | return -1; |
| 1166 | last_cont_level = cont_level; |
| 1167 | #endif |
| 1168 | |
| 1169 | #define ALLOC_CHUNK (size_t)10 |
| 1170 | #define ALLOC_INCR (size_t)200 |
| 1171 | |
| 1172 | if (cont_level != 0) { |
| 1173 | if (*nmentryp == 0) { |
| 1174 | file_error(ms, 0, "No current entry for continuation"); |
| 1175 | return -1; |
| 1176 | } |
| 1177 | me = &(*mentryp)[*nmentryp - 1]; |
| 1178 | if (me->cont_count == me->max_count) { |
| 1179 | struct magic *nm; |
| 1180 | size_t cnt = me->max_count + ALLOC_CHUNK; |
| 1181 | if ((nm = CAST(struct magic *, realloc(me->mp, |
| 1182 | sizeof(*nm) * cnt))) == NULL) { |
| 1183 | file_oomem(ms, sizeof(*nm) * cnt); |
| 1184 | return -1; |
| 1185 | } |
| 1186 | me->mp = m = nm; |
| 1187 | me->max_count = CAST(uint32_t, cnt); |
| 1188 | } |
| 1189 | m = &me->mp[me->cont_count++]; |
| 1190 | (void)memset(m, 0, sizeof(*m)); |
| 1191 | m->cont_level = cont_level; |
| 1192 | } else { |
| 1193 | if (*nmentryp == maxmagic) { |
| 1194 | struct magic_entry *mp; |
| 1195 | |
| 1196 | maxmagic += ALLOC_INCR; |
| 1197 | if ((mp = CAST(struct magic_entry *, |
| 1198 | realloc(*mentryp, sizeof(*mp) * maxmagic))) == |
| 1199 | NULL) { |
| 1200 | file_oomem(ms, sizeof(*mp) * maxmagic); |
| 1201 | return -1; |
| 1202 | } |
| 1203 | (void)memset(&mp[*nmentryp], 0, sizeof(*mp) * |
| 1204 | ALLOC_INCR); |
| 1205 | *mentryp = mp; |
| 1206 | } |
| 1207 | me = &(*mentryp)[*nmentryp]; |
| 1208 | if (me->mp == NULL) { |
| 1209 | size_t len = sizeof(*m) * ALLOC_CHUNK; |
| 1210 | if ((m = CAST(struct magic *, malloc(len))) == NULL) { |
| 1211 | file_oomem(ms, len); |
| 1212 | return -1; |
| 1213 | } |
| 1214 | me->mp = m; |
| 1215 | me->max_count = ALLOC_CHUNK; |
| 1216 | } else |
| 1217 | m = me->mp; |
| 1218 | (void)memset(m, 0, sizeof(*m)); |
| 1219 | m->factor_op = FILE_FACTOR_OP_NONE; |
| 1220 | m->cont_level = 0; |
| 1221 | me->cont_count = 1; |
| 1222 | } |
| 1223 | m->lineno = CAST(uint32_t, lineno); |
| 1224 | |
| 1225 | if (*l == '&') { /* m->cont_level == 0 checked below. */ |
| 1226 | ++l; /* step over */ |
| 1227 | m->flag |= OFFADD; |
| 1228 | } |
| 1229 | if (*l == '(') { |
| 1230 | ++l; /* step over */ |
| 1231 | m->flag |= INDIR; |
| 1232 | if (m->flag & OFFADD) |
| 1233 | m->flag = (m->flag & ~OFFADD) | INDIROFFADD; |
| 1234 | |
| 1235 | if (*l == '&') { /* m->cont_level == 0 checked below */ |
| 1236 | ++l; /* step over */ |
| 1237 | m->flag |= OFFADD; |
| 1238 | } |
| 1239 | } |
| 1240 | /* Indirect offsets are not valid at level 0. */ |
| 1241 | if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) |
| 1242 | if (ms->flags & MAGIC_CHECK) |
| 1243 | file_magwarn(ms, "relative offset at level 0"); |
| 1244 | |
| 1245 | /* get offset, then skip over it */ |
| 1246 | m->offset = (uint32_t)strtoul(l, &t, 0); |
| 1247 | if (l == t) |
| 1248 | if (ms->flags & MAGIC_CHECK) |
| 1249 | file_magwarn(ms, "offset `%s' invalid", l); |
| 1250 | l = t; |
| 1251 | |
| 1252 | if (m->flag & INDIR) { |
| 1253 | m->in_type = FILE_LONG; |
| 1254 | m->in_offset = 0; |
| 1255 | /* |
| 1256 | * read [.lbs][+-]nnnnn) |
| 1257 | */ |
| 1258 | if (*l == '.') { |
| 1259 | l++; |
| 1260 | switch (*l) { |
| 1261 | case 'l': |
| 1262 | m->in_type = FILE_LELONG; |
| 1263 | break; |
| 1264 | case 'L': |
| 1265 | m->in_type = FILE_BELONG; |
| 1266 | break; |
| 1267 | case 'm': |
| 1268 | m->in_type = FILE_MELONG; |
| 1269 | break; |
| 1270 | case 'h': |
| 1271 | case 's': |
| 1272 | m->in_type = FILE_LESHORT; |
| 1273 | break; |
| 1274 | case 'H': |
| 1275 | case 'S': |
| 1276 | m->in_type = FILE_BESHORT; |
| 1277 | break; |
| 1278 | case 'c': |
| 1279 | case 'b': |
| 1280 | case 'C': |
| 1281 | case 'B': |
| 1282 | m->in_type = FILE_BYTE; |
| 1283 | break; |
| 1284 | case 'e': |
| 1285 | case 'f': |
| 1286 | case 'g': |
| 1287 | m->in_type = FILE_LEDOUBLE; |
| 1288 | break; |
| 1289 | case 'E': |
| 1290 | case 'F': |
| 1291 | case 'G': |
| 1292 | m->in_type = FILE_BEDOUBLE; |
| 1293 | break; |
| 1294 | case 'i': |
| 1295 | m->in_type = FILE_LEID3; |
| 1296 | break; |
| 1297 | case 'I': |
| 1298 | m->in_type = FILE_BEID3; |
| 1299 | break; |
| 1300 | default: |
| 1301 | if (ms->flags & MAGIC_CHECK) |
| 1302 | file_magwarn(ms, |
| 1303 | "indirect offset type `%c' invalid", |
| 1304 | *l); |
| 1305 | break; |
| 1306 | } |
| 1307 | l++; |
| 1308 | } |
| 1309 | |
| 1310 | m->in_op = 0; |
| 1311 | if (*l == '~') { |
| 1312 | m->in_op |= FILE_OPINVERSE; |
| 1313 | l++; |
| 1314 | } |
| 1315 | if ((op = get_op(*l)) != -1) { |
| 1316 | m->in_op |= op; |
| 1317 | l++; |
| 1318 | } |
| 1319 | if (*l == '(') { |
| 1320 | m->in_op |= FILE_OPINDIRECT; |
| 1321 | l++; |
| 1322 | } |
| 1323 | if (isdigit((unsigned char)*l) || *l == '-') { |
| 1324 | m->in_offset = (int32_t)strtol(l, &t, 0); |
| 1325 | if (l == t) |
| 1326 | if (ms->flags & MAGIC_CHECK) |
| 1327 | file_magwarn(ms, |
| 1328 | "in_offset `%s' invalid", l); |
| 1329 | l = t; |
| 1330 | } |
| 1331 | if (*l++ != ')' || |
| 1332 | ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) |
| 1333 | if (ms->flags & MAGIC_CHECK) |
| 1334 | file_magwarn(ms, |
| 1335 | "missing ')' in indirect offset"); |
| 1336 | } |
| 1337 | EATAB; |
| 1338 | |
| 1339 | #ifdef ENABLE_CONDITIONALS |
| 1340 | m->cond = get_cond(l, &l); |
| 1341 | if (check_cond(ms, m->cond, cont_level) == -1) |
| 1342 | return -1; |
| 1343 | |
| 1344 | EATAB; |
| 1345 | #endif |
| 1346 | |
| 1347 | if (*l == 'u') { |
| 1348 | ++l; |
| 1349 | m->flag |= UNSIGNED; |
| 1350 | } |
| 1351 | |
| 1352 | m->type = get_type(l, &l); |
| 1353 | if (m->type == FILE_INVALID) { |
| 1354 | if (ms->flags & MAGIC_CHECK) |
| 1355 | file_magwarn(ms, "type `%s' invalid", l); |
| 1356 | return -1; |
| 1357 | } |
| 1358 | |
| 1359 | /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ |
| 1360 | /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ |
| 1361 | |
| 1362 | m->mask_op = 0; |
| 1363 | if (*l == '~') { |
| 1364 | if (!IS_STRING(m->type)) |
| 1365 | m->mask_op |= FILE_OPINVERSE; |
| 1366 | else if (ms->flags & MAGIC_CHECK) |
| 1367 | file_magwarn(ms, "'~' invalid for string types"); |
| 1368 | ++l; |
| 1369 | } |
| 1370 | m->str_range = 0; |
| 1371 | m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0; |
| 1372 | if ((op = get_op(*l)) != -1) { |
| 1373 | if (!IS_STRING(m->type)) { |
| 1374 | uint64_t val; |
| 1375 | ++l; |
| 1376 | m->mask_op |= op; |
| 1377 | val = (uint64_t)strtoull(l, &t, 0); |
| 1378 | l = t; |
| 1379 | m->num_mask = file_signextend(ms, m, val); |
| 1380 | eatsize(&l); |
| 1381 | } |
| 1382 | else if (op == FILE_OPDIVIDE) { |
| 1383 | int have_range = 0; |
| 1384 | while (!isspace((unsigned char)*++l)) { |
| 1385 | switch (*l) { |
| 1386 | case '0': case '1': case '2': |
| 1387 | case '3': case '4': case '5': |
| 1388 | case '6': case '7': case '8': |
| 1389 | case '9': |
| 1390 | if (have_range && |
| 1391 | (ms->flags & MAGIC_CHECK)) |
| 1392 | file_magwarn(ms, |
| 1393 | "multiple ranges"); |
| 1394 | have_range = 1; |
| 1395 | m->str_range = CAST(uint32_t, |
| 1396 | strtoul(l, &t, 0)); |
| 1397 | if (m->str_range == 0) |
| 1398 | file_magwarn(ms, |
| 1399 | "zero range"); |
| 1400 | l = t - 1; |
| 1401 | break; |
| 1402 | case CHAR_COMPACT_WHITESPACE: |
| 1403 | m->str_flags |= |
| 1404 | STRING_COMPACT_WHITESPACE; |
| 1405 | break; |
| 1406 | case CHAR_COMPACT_OPTIONAL_WHITESPACE: |
| 1407 | m->str_flags |= |
| 1408 | STRING_COMPACT_OPTIONAL_WHITESPACE; |
| 1409 | break; |
| 1410 | case CHAR_IGNORE_LOWERCASE: |
| 1411 | m->str_flags |= STRING_IGNORE_LOWERCASE; |
| 1412 | break; |
| 1413 | case CHAR_IGNORE_UPPERCASE: |
| 1414 | m->str_flags |= STRING_IGNORE_UPPERCASE; |
| 1415 | break; |
| 1416 | case CHAR_REGEX_OFFSET_START: |
| 1417 | m->str_flags |= REGEX_OFFSET_START; |
| 1418 | break; |
| 1419 | case CHAR_BINTEST: |
| 1420 | m->str_flags |= STRING_BINTEST; |
| 1421 | break; |
| 1422 | case CHAR_TEXTTEST: |
| 1423 | m->str_flags |= STRING_TEXTTEST; |
| 1424 | break; |
| 1425 | case CHAR_PSTRING_1_LE: |
| 1426 | if (m->type != FILE_PSTRING) |
| 1427 | goto bad; |
| 1428 | m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE; |
| 1429 | break; |
| 1430 | case CHAR_PSTRING_2_BE: |
| 1431 | if (m->type != FILE_PSTRING) |
| 1432 | goto bad; |
| 1433 | m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE; |
| 1434 | break; |
| 1435 | case CHAR_PSTRING_2_LE: |
| 1436 | if (m->type != FILE_PSTRING) |
| 1437 | goto bad; |
| 1438 | m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE; |
| 1439 | break; |
| 1440 | case CHAR_PSTRING_4_BE: |
| 1441 | if (m->type != FILE_PSTRING) |
| 1442 | goto bad; |
| 1443 | m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE; |
| 1444 | break; |
| 1445 | case CHAR_PSTRING_4_LE: |
| 1446 | if (m->type != FILE_PSTRING) |
| 1447 | goto bad; |
| 1448 | m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE; |
| 1449 | break; |
| 1450 | case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF: |
| 1451 | if (m->type != FILE_PSTRING) |
| 1452 | goto bad; |
| 1453 | m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF; |
| 1454 | break; |
| 1455 | default: |
| 1456 | bad: |
| 1457 | if (ms->flags & MAGIC_CHECK) |
| 1458 | file_magwarn(ms, |
| 1459 | "string extension `%c' " |
| 1460 | "invalid", *l); |
| 1461 | return -1; |
| 1462 | } |
| 1463 | /* allow multiple '/' for readability */ |
| 1464 | if (l[1] == '/' && |
| 1465 | !isspace((unsigned char)l[2])) |
| 1466 | l++; |
| 1467 | } |
| 1468 | if (string_modifier_check(ms, m) == -1) |
| 1469 | return -1; |
| 1470 | } |
| 1471 | else { |
| 1472 | if (ms->flags & MAGIC_CHECK) |
| 1473 | file_magwarn(ms, "invalid string op: %c", *t); |
| 1474 | return -1; |
| 1475 | } |
| 1476 | } |
| 1477 | /* |
| 1478 | * We used to set mask to all 1's here, instead let's just not do |
| 1479 | * anything if mask = 0 (unless you have a better idea) |
| 1480 | */ |
| 1481 | EATAB; |
| 1482 | |
| 1483 | switch (*l) { |
| 1484 | case '>': |
| 1485 | case '<': |
| 1486 | m->reln = *l; |
| 1487 | ++l; |
| 1488 | if (*l == '=') { |
| 1489 | if (ms->flags & MAGIC_CHECK) { |
| 1490 | file_magwarn(ms, "%c= not supported", |
| 1491 | m->reln); |
| 1492 | return -1; |
| 1493 | } |
| 1494 | ++l; |
| 1495 | } |
| 1496 | break; |
| 1497 | /* Old-style anding: "0 byte &0x80 dynamically linked" */ |
| 1498 | case '&': |
| 1499 | case '^': |
| 1500 | case '=': |
| 1501 | m->reln = *l; |
| 1502 | ++l; |
| 1503 | if (*l == '=') { |
| 1504 | /* HP compat: ignore &= etc. */ |
| 1505 | ++l; |
| 1506 | } |
| 1507 | break; |
| 1508 | case '!': |
| 1509 | m->reln = *l; |
| 1510 | ++l; |
| 1511 | break; |
| 1512 | default: |
| 1513 | m->reln = '='; /* the default relation */ |
| 1514 | if (*l == 'x' && ((isascii((unsigned char)l[1]) && |
| 1515 | isspace((unsigned char)l[1])) || !l[1])) { |
| 1516 | m->reln = *l; |
| 1517 | ++l; |
| 1518 | } |
| 1519 | break; |
| 1520 | } |
| 1521 | /* |
| 1522 | * Grab the value part, except for an 'x' reln. |
| 1523 | */ |
| 1524 | if (m->reln != 'x' && getvalue(ms, m, &l, action)) |
| 1525 | return -1; |
| 1526 | |
| 1527 | /* |
| 1528 | * TODO finish this macro and start using it! |
| 1529 | * #define offsetcheck {if (offset > HOWMANY-1) |
| 1530 | * magwarn("offset too big"); } |
| 1531 | */ |
| 1532 | |
| 1533 | /* |
| 1534 | * Now get last part - the description |
| 1535 | */ |
| 1536 | EATAB; |
| 1537 | if (l[0] == '\b') { |
| 1538 | ++l; |
| 1539 | m->flag |= NOSPACE; |
| 1540 | } else if ((l[0] == '\\') && (l[1] == 'b')) { |
| 1541 | ++l; |
| 1542 | ++l; |
| 1543 | m->flag |= NOSPACE; |
| 1544 | } |
| 1545 | for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); ) |
| 1546 | continue; |
| 1547 | if (i == sizeof(m->desc)) { |
| 1548 | m->desc[sizeof(m->desc) - 1] = '\0'; |
| 1549 | if (ms->flags & MAGIC_CHECK) |
| 1550 | file_magwarn(ms, "description `%s' truncated", m->desc); |
| 1551 | } |
| 1552 | |
| 1553 | /* |
| 1554 | * We only do this check while compiling, or if any of the magic |
| 1555 | * files were not compiled. |
| 1556 | */ |
| 1557 | if (ms->flags & MAGIC_CHECK) { |
| 1558 | if (check_format(ms, m) == -1) |
| 1559 | return -1; |
| 1560 | } |
| 1561 | #ifndef COMPILE_ONLY |
| 1562 | if (action == FILE_CHECK) { |
| 1563 | file_mdump(m); |
| 1564 | } |
| 1565 | #endif |
| 1566 | m->mimetype[0] = '\0'; /* initialise MIME type to none */ |
| 1567 | if (m->cont_level == 0) |
| 1568 | ++(*nmentryp); /* make room for next */ |
| 1569 | return 0; |
| 1570 | } |
| 1571 | |
| 1572 | /* |
| 1573 | * parse a STRENGTH annotation line from magic file, put into magic[index - 1] |
| 1574 | * if valid |
| 1575 | */ |
| 1576 | private int |
| 1577 | parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line) |
| 1578 | { |
| 1579 | const char *l = line; |
| 1580 | char *el; |
| 1581 | unsigned long factor; |
| 1582 | struct magic *m = &me->mp[0]; |
| 1583 | |
| 1584 | if (m->factor_op != FILE_FACTOR_OP_NONE) { |
| 1585 | file_magwarn(ms, |
| 1586 | "Current entry already has a strength type: %c %d", |
| 1587 | m->factor_op, m->factor); |
| 1588 | return -1; |
| 1589 | } |
| 1590 | EATAB; |
| 1591 | switch (*l) { |
| 1592 | case FILE_FACTOR_OP_NONE: |
| 1593 | case FILE_FACTOR_OP_PLUS: |
| 1594 | case FILE_FACTOR_OP_MINUS: |
| 1595 | case FILE_FACTOR_OP_TIMES: |
| 1596 | case FILE_FACTOR_OP_DIV: |
| 1597 | m->factor_op = *l++; |
| 1598 | break; |
| 1599 | default: |
| 1600 | file_magwarn(ms, "Unknown factor op `%c'", *l); |
| 1601 | return -1; |
| 1602 | } |
| 1603 | EATAB; |
| 1604 | factor = strtoul(l, &el, 0); |
| 1605 | if (factor > 255) { |
| 1606 | file_magwarn(ms, "Too large factor `%lu'", factor); |
| 1607 | goto out; |
| 1608 | } |
| 1609 | if (*el && !isspace((unsigned char)*el)) { |
| 1610 | file_magwarn(ms, "Bad factor `%s'", l); |
| 1611 | goto out; |
| 1612 | } |
| 1613 | m->factor = (uint8_t)factor; |
| 1614 | if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) { |
| 1615 | file_magwarn(ms, "Cannot have factor op `%c' and factor %u", |
| 1616 | m->factor_op, m->factor); |
| 1617 | goto out; |
| 1618 | } |
| 1619 | return 0; |
| 1620 | out: |
| 1621 | m->factor_op = FILE_FACTOR_OP_NONE; |
| 1622 | m->factor = 0; |
| 1623 | return -1; |
| 1624 | } |
| 1625 | |
| 1626 | /* |
| 1627 | * Parse an Apple CREATOR/TYPE annotation from magic file and put it into |
| 1628 | * magic[index - 1] |
| 1629 | */ |
| 1630 | private int |
| 1631 | parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line) |
| 1632 | { |
| 1633 | size_t i; |
| 1634 | const char *l = line; |
| 1635 | struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; |
| 1636 | |
| 1637 | if (m->apple[0] != '\0') { |
| 1638 | file_magwarn(ms, "Current entry already has a APPLE type " |
| 1639 | "`%.8s', new type `%s'", m->mimetype, l); |
| 1640 | return -1; |
| 1641 | } |
| 1642 | |
| 1643 | EATAB; |
| 1644 | for (i = 0; *l && ((isascii((unsigned char)*l) && |
| 1645 | isalnum((unsigned char)*l)) || strchr("-+/.", *l)) && |
| 1646 | i < sizeof(m->apple); m->apple[i++] = *l++) |
| 1647 | continue; |
| 1648 | if (i == sizeof(m->apple) && *l) { |
| 1649 | /* We don't need to NUL terminate here, printing handles it */ |
| 1650 | if (ms->flags & MAGIC_CHECK) |
| 1651 | file_magwarn(ms, "APPLE type `%s' truncated %" |
| 1652 | SIZE_T_FORMAT "u", line, i); |
| 1653 | } |
| 1654 | |
| 1655 | if (i > 0) |
| 1656 | return 0; |
| 1657 | else |
| 1658 | return -1; |
| 1659 | } |
| 1660 | |
| 1661 | /* |
| 1662 | * parse a MIME annotation line from magic file, put into magic[index - 1] |
| 1663 | * if valid |
| 1664 | */ |
| 1665 | private int |
| 1666 | parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line) |
| 1667 | { |
| 1668 | size_t i; |
| 1669 | const char *l = line; |
| 1670 | struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; |
| 1671 | |
| 1672 | if (m->mimetype[0] != '\0') { |
| 1673 | file_magwarn(ms, "Current entry already has a MIME type `%s'," |
| 1674 | " new type `%s'", m->mimetype, l); |
| 1675 | return -1; |
| 1676 | } |
| 1677 | |
| 1678 | EATAB; |
| 1679 | for (i = 0; *l && ((isascii((unsigned char)*l) && |
| 1680 | isalnum((unsigned char)*l)) || strchr("-+/.", *l)) && |
| 1681 | i < sizeof(m->mimetype); m->mimetype[i++] = *l++) |
| 1682 | continue; |
| 1683 | if (i == sizeof(m->mimetype)) { |
| 1684 | m->mimetype[sizeof(m->mimetype) - 1] = '\0'; |
| 1685 | if (ms->flags & MAGIC_CHECK) |
| 1686 | file_magwarn(ms, "MIME type `%s' truncated %" |
| 1687 | SIZE_T_FORMAT "u", m->mimetype, i); |
| 1688 | } else |
| 1689 | m->mimetype[i] = '\0'; |
| 1690 | |
| 1691 | if (i > 0) |
| 1692 | return 0; |
| 1693 | else |
| 1694 | return -1; |
| 1695 | } |
| 1696 | |
| 1697 | private int |
| 1698 | check_format_type(const char *ptr, int type) |
| 1699 | { |
| 1700 | int quad = 0; |
| 1701 | if (*ptr == '\0') { |
| 1702 | /* Missing format string; bad */ |
| 1703 | return -1; |
| 1704 | } |
| 1705 | |
| 1706 | switch (type) { |
| 1707 | case FILE_FMT_QUAD: |
| 1708 | quad = 1; |
| 1709 | /*FALLTHROUGH*/ |
| 1710 | case FILE_FMT_NUM: |
| 1711 | if (*ptr == '-') |
| 1712 | ptr++; |
| 1713 | if (*ptr == '.') |
| 1714 | ptr++; |
| 1715 | while (isdigit((unsigned char)*ptr)) ptr++; |
| 1716 | if (*ptr == '.') |
| 1717 | ptr++; |
| 1718 | while (isdigit((unsigned char)*ptr)) ptr++; |
| 1719 | if (quad) { |
| 1720 | if (*ptr++ != 'l') |
| 1721 | return -1; |
| 1722 | if (*ptr++ != 'l') |
| 1723 | return -1; |
| 1724 | } |
| 1725 | |
| 1726 | switch (*ptr++) { |
| 1727 | case 'l': |
| 1728 | switch (*ptr++) { |
| 1729 | case 'i': |
| 1730 | case 'd': |
| 1731 | case 'u': |
| 1732 | case 'x': |
| 1733 | case 'X': |
| 1734 | return 0; |
| 1735 | default: |
| 1736 | return -1; |
| 1737 | } |
| 1738 | |
| 1739 | case 'h': |
| 1740 | switch (*ptr++) { |
| 1741 | case 'h': |
| 1742 | switch (*ptr++) { |
| 1743 | case 'i': |
| 1744 | case 'd': |
| 1745 | case 'u': |
| 1746 | case 'x': |
| 1747 | case 'X': |
| 1748 | return 0; |
| 1749 | default: |
| 1750 | return -1; |
| 1751 | } |
| 1752 | case 'd': |
| 1753 | return 0; |
| 1754 | default: |
| 1755 | return -1; |
| 1756 | } |
| 1757 | |
| 1758 | case 'i': |
| 1759 | case 'c': |
| 1760 | case 'd': |
| 1761 | case 'u': |
| 1762 | case 'x': |
| 1763 | case 'X': |
| 1764 | return 0; |
| 1765 | |
| 1766 | default: |
| 1767 | return -1; |
| 1768 | } |
| 1769 | |
| 1770 | case FILE_FMT_FLOAT: |
| 1771 | case FILE_FMT_DOUBLE: |
| 1772 | if (*ptr == '-') |
| 1773 | ptr++; |
| 1774 | if (*ptr == '.') |
| 1775 | ptr++; |
| 1776 | while (isdigit((unsigned char)*ptr)) ptr++; |
| 1777 | if (*ptr == '.') |
| 1778 | ptr++; |
| 1779 | while (isdigit((unsigned char)*ptr)) ptr++; |
| 1780 | |
| 1781 | switch (*ptr++) { |
| 1782 | case 'e': |
| 1783 | case 'E': |
| 1784 | case 'f': |
| 1785 | case 'F': |
| 1786 | case 'g': |
| 1787 | case 'G': |
| 1788 | return 0; |
| 1789 | |
| 1790 | default: |
| 1791 | return -1; |
| 1792 | } |
| 1793 | |
| 1794 | |
| 1795 | case FILE_FMT_STR: |
| 1796 | if (*ptr == '-') |
| 1797 | ptr++; |
| 1798 | while (isdigit((unsigned char )*ptr)) |
| 1799 | ptr++; |
| 1800 | if (*ptr == '.') { |
| 1801 | ptr++; |
| 1802 | while (isdigit((unsigned char )*ptr)) |
| 1803 | ptr++; |
| 1804 | } |
| 1805 | |
| 1806 | switch (*ptr++) { |
| 1807 | case 's': |
| 1808 | return 0; |
| 1809 | default: |
| 1810 | return -1; |
| 1811 | } |
| 1812 | |
| 1813 | default: |
| 1814 | /* internal error */ |
| 1815 | abort(); |
| 1816 | } |
| 1817 | /*NOTREACHED*/ |
| 1818 | return -1; |
| 1819 | } |
| 1820 | |
| 1821 | /* |
| 1822 | * Check that the optional printf format in description matches |
| 1823 | * the type of the magic. |
| 1824 | */ |
| 1825 | private int |
| 1826 | check_format(struct magic_set *ms, struct magic *m) |
| 1827 | { |
| 1828 | char *ptr; |
| 1829 | |
| 1830 | for (ptr = m->desc; *ptr; ptr++) |
| 1831 | if (*ptr == '%') |
| 1832 | break; |
| 1833 | if (*ptr == '\0') { |
| 1834 | /* No format string; ok */ |
| 1835 | return 1; |
| 1836 | } |
| 1837 | |
| 1838 | assert(file_nformats == file_nnames); |
| 1839 | |
| 1840 | if (m->type >= file_nformats) { |
| 1841 | file_magwarn(ms, "Internal error inconsistency between " |
| 1842 | "m->type and format strings"); |
| 1843 | return -1; |
| 1844 | } |
| 1845 | if (file_formats[m->type] == FILE_FMT_NONE) { |
| 1846 | file_magwarn(ms, "No format string for `%s' with description " |
| 1847 | "`%s'", m->desc, file_names[m->type]); |
| 1848 | return -1; |
| 1849 | } |
| 1850 | |
| 1851 | ptr++; |
| 1852 | if (check_format_type(ptr, file_formats[m->type]) == -1) { |
| 1853 | /* |
| 1854 | * TODO: this error message is unhelpful if the format |
| 1855 | * string is not one character long |
| 1856 | */ |
| 1857 | file_magwarn(ms, "Printf format `%c' is not valid for type " |
| 1858 | "`%s' in description `%s'", *ptr ? *ptr : '?', |
| 1859 | file_names[m->type], m->desc); |
| 1860 | return -1; |
| 1861 | } |
| 1862 | |
| 1863 | for (; *ptr; ptr++) { |
| 1864 | if (*ptr == '%') { |
| 1865 | file_magwarn(ms, |
| 1866 | "Too many format strings (should have at most one) " |
| 1867 | "for `%s' with description `%s'", |
| 1868 | file_names[m->type], m->desc); |
| 1869 | return -1; |
| 1870 | } |
| 1871 | } |
| 1872 | return 0; |
| 1873 | } |
| 1874 | |
| 1875 | /* |
| 1876 | * Read a numeric value from a pointer, into the value union of a magic |
| 1877 | * pointer, according to the magic type. Update the string pointer to point |
| 1878 | * just after the number read. Return 0 for success, non-zero for failure. |
| 1879 | */ |
| 1880 | private int |
| 1881 | getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) |
| 1882 | { |
| 1883 | switch (m->type) { |
| 1884 | case FILE_BESTRING16: |
| 1885 | case FILE_LESTRING16: |
| 1886 | case FILE_STRING: |
| 1887 | case FILE_PSTRING: |
| 1888 | case FILE_REGEX: |
| 1889 | case FILE_SEARCH: |
| 1890 | *p = getstr(ms, m, *p, action == FILE_COMPILE); |
| 1891 | if (*p == NULL) { |
| 1892 | if (ms->flags & MAGIC_CHECK) |
| 1893 | file_magwarn(ms, "cannot get string from `%s'", |
| 1894 | m->value.s); |
| 1895 | return -1; |
| 1896 | } |
| 1897 | return 0; |
| 1898 | case FILE_FLOAT: |
| 1899 | case FILE_BEFLOAT: |
| 1900 | case FILE_LEFLOAT: |
| 1901 | if (m->reln != 'x') { |
| 1902 | char *ep; |
| 1903 | #ifdef HAVE_STRTOF |
| 1904 | m->value.f = strtof(*p, &ep); |
| 1905 | #else |
| 1906 | m->value.f = (float)strtod(*p, &ep); |
| 1907 | #endif |
| 1908 | *p = ep; |
| 1909 | } |
| 1910 | return 0; |
| 1911 | case FILE_DOUBLE: |
| 1912 | case FILE_BEDOUBLE: |
| 1913 | case FILE_LEDOUBLE: |
| 1914 | if (m->reln != 'x') { |
| 1915 | char *ep; |
| 1916 | m->value.d = strtod(*p, &ep); |
| 1917 | *p = ep; |
| 1918 | } |
| 1919 | return 0; |
| 1920 | default: |
| 1921 | if (m->reln != 'x') { |
| 1922 | char *ep; |
| 1923 | m->value.q = file_signextend(ms, m, |
| 1924 | (uint64_t)strtoull(*p, &ep, 0)); |
| 1925 | *p = ep; |
| 1926 | eatsize(p); |
| 1927 | } |
| 1928 | return 0; |
| 1929 | } |
| 1930 | } |
| 1931 | |
| 1932 | /* |
| 1933 | * Convert a string containing C character escapes. Stop at an unescaped |
| 1934 | * space or tab. |
| 1935 | * Copy the converted version to "m->value.s", and the length in m->vallen. |
| 1936 | * Return updated scan pointer as function result. Warn if set. |
| 1937 | */ |
| 1938 | private const char * |
| 1939 | getstr(struct magic_set *ms, struct magic *m, const char *s, int warn) |
| 1940 | { |
| 1941 | const char *origs = s; |
| 1942 | char *p = m->value.s; |
| 1943 | size_t plen = sizeof(m->value.s); |
| 1944 | char *origp = p; |
| 1945 | char *pmax = p + plen - 1; |
| 1946 | int c; |
| 1947 | int val; |
| 1948 | |
| 1949 | while ((c = *s++) != '\0') { |
| 1950 | if (isspace((unsigned char) c)) |
| 1951 | break; |
| 1952 | if (p >= pmax) { |
| 1953 | file_error(ms, 0, "string too long: `%s'", origs); |
| 1954 | return NULL; |
| 1955 | } |
| 1956 | if (c == '\\') { |
| 1957 | switch(c = *s++) { |
| 1958 | |
| 1959 | case '\0': |
| 1960 | if (warn) |
| 1961 | file_magwarn(ms, "incomplete escape"); |
| 1962 | goto out; |
| 1963 | |
| 1964 | case '\t': |
| 1965 | if (warn) { |
| 1966 | file_magwarn(ms, |
| 1967 | "escaped tab found, use \\t instead"); |
| 1968 | warn = 0; /* already did */ |
| 1969 | } |
| 1970 | /*FALLTHROUGH*/ |
| 1971 | default: |
| 1972 | if (warn) { |
| 1973 | if (isprint((unsigned char)c)) { |
| 1974 | /* Allow escaping of |
| 1975 | * ``relations'' */ |
| 1976 | if (strchr("<>&^=!", c) == NULL |
| 1977 | && (m->type != FILE_REGEX || |
| 1978 | strchr("[]().*?^$|{}", c) |
| 1979 | == NULL)) { |
| 1980 | file_magwarn(ms, "no " |
| 1981 | "need to escape " |
| 1982 | "`%c'", c); |
| 1983 | } |
| 1984 | } else { |
| 1985 | file_magwarn(ms, |
| 1986 | "unknown escape sequence: " |
| 1987 | "\\%03o", c); |
| 1988 | } |
| 1989 | } |
| 1990 | /*FALLTHROUGH*/ |
| 1991 | /* space, perhaps force people to use \040? */ |
| 1992 | case ' ': |
| 1993 | #if 0 |
| 1994 | /* |
| 1995 | * Other things people escape, but shouldn't need to, |
| 1996 | * so we disallow them |
| 1997 | */ |
| 1998 | case '\'': |
| 1999 | case '"': |
| 2000 | case '?': |
| 2001 | #endif |
| 2002 | /* Relations */ |
| 2003 | case '>': |
| 2004 | case '<': |
| 2005 | case '&': |
| 2006 | case '^': |
| 2007 | case '=': |
| 2008 | case '!': |
| 2009 | /* and baskslash itself */ |
| 2010 | case '\\': |
| 2011 | *p++ = (char) c; |
| 2012 | break; |
| 2013 | |
| 2014 | case 'a': |
| 2015 | *p++ = '\a'; |
| 2016 | break; |
| 2017 | |
| 2018 | case 'b': |
| 2019 | *p++ = '\b'; |
| 2020 | break; |
| 2021 | |
| 2022 | case 'f': |
| 2023 | *p++ = '\f'; |
| 2024 | break; |
| 2025 | |
| 2026 | case 'n': |
| 2027 | *p++ = '\n'; |
| 2028 | break; |
| 2029 | |
| 2030 | case 'r': |
| 2031 | *p++ = '\r'; |
| 2032 | break; |
| 2033 | |
| 2034 | case 't': |
| 2035 | *p++ = '\t'; |
| 2036 | break; |
| 2037 | |
| 2038 | case 'v': |
| 2039 | *p++ = '\v'; |
| 2040 | break; |
| 2041 | |
| 2042 | /* \ and up to 3 octal digits */ |
| 2043 | case '0': |
| 2044 | case '1': |
| 2045 | case '2': |
| 2046 | case '3': |
| 2047 | case '4': |
| 2048 | case '5': |
| 2049 | case '6': |
| 2050 | case '7': |
| 2051 | val = c - '0'; |
| 2052 | c = *s++; /* try for 2 */ |
| 2053 | if (c >= '0' && c <= '7') { |
| 2054 | val = (val << 3) | (c - '0'); |
| 2055 | c = *s++; /* try for 3 */ |
| 2056 | if (c >= '0' && c <= '7') |
| 2057 | val = (val << 3) | (c-'0'); |
| 2058 | else |
| 2059 | --s; |
| 2060 | } |
| 2061 | else |
| 2062 | --s; |
| 2063 | *p++ = (char)val; |
| 2064 | break; |
| 2065 | |
| 2066 | /* \x and up to 2 hex digits */ |
| 2067 | case 'x': |
| 2068 | val = 'x'; /* Default if no digits */ |
| 2069 | c = hextoint(*s++); /* Get next char */ |
| 2070 | if (c >= 0) { |
| 2071 | val = c; |
| 2072 | c = hextoint(*s++); |
| 2073 | if (c >= 0) |
| 2074 | val = (val << 4) + c; |
| 2075 | else |
| 2076 | --s; |
| 2077 | } else |
| 2078 | --s; |
| 2079 | *p++ = (char)val; |
| 2080 | break; |
| 2081 | } |
| 2082 | } else |
| 2083 | *p++ = (char)c; |
| 2084 | } |
| 2085 | out: |
| 2086 | *p = '\0'; |
| 2087 | m->vallen = CAST(unsigned char, (p - origp)); |
| 2088 | if (m->type == FILE_PSTRING) |
| 2089 | m->vallen += (unsigned char)file_pstring_length_size(m); |
| 2090 | return s; |
| 2091 | } |
| 2092 | |
| 2093 | |
| 2094 | /* Single hex char to int; -1 if not a hex char. */ |
| 2095 | private int |
| 2096 | hextoint(int c) |
| 2097 | { |
| 2098 | if (!isascii((unsigned char) c)) |
| 2099 | return -1; |
| 2100 | if (isdigit((unsigned char) c)) |
| 2101 | return c - '0'; |
| 2102 | if ((c >= 'a') && (c <= 'f')) |
| 2103 | return c + 10 - 'a'; |
| 2104 | if (( c>= 'A') && (c <= 'F')) |
| 2105 | return c + 10 - 'A'; |
| 2106 | return -1; |
| 2107 | } |
| 2108 | |
| 2109 | |
| 2110 | /* |
| 2111 | * Print a string containing C character escapes. |
| 2112 | */ |
| 2113 | protected void |
| 2114 | file_showstr(FILE *fp, const char *s, size_t len) |
| 2115 | { |
| 2116 | char c; |
| 2117 | |
| 2118 | for (;;) { |
| 2119 | if (len == ~0U) { |
| 2120 | c = *s++; |
| 2121 | if (c == '\0') |
| 2122 | break; |
| 2123 | } |
| 2124 | else { |
| 2125 | if (len-- == 0) |
| 2126 | break; |
| 2127 | c = *s++; |
| 2128 | } |
| 2129 | if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ |
| 2130 | (void) fputc(c, fp); |
| 2131 | else { |
| 2132 | (void) fputc('\\', fp); |
| 2133 | switch (c) { |
| 2134 | case '\a': |
| 2135 | (void) fputc('a', fp); |
| 2136 | break; |
| 2137 | |
| 2138 | case '\b': |
| 2139 | (void) fputc('b', fp); |
| 2140 | break; |
| 2141 | |
| 2142 | case '\f': |
| 2143 | (void) fputc('f', fp); |
| 2144 | break; |
| 2145 | |
| 2146 | case '\n': |
| 2147 | (void) fputc('n', fp); |
| 2148 | break; |
| 2149 | |
| 2150 | case '\r': |
| 2151 | (void) fputc('r', fp); |
| 2152 | break; |
| 2153 | |
| 2154 | case '\t': |
| 2155 | (void) fputc('t', fp); |
| 2156 | break; |
| 2157 | |
| 2158 | case '\v': |
| 2159 | (void) fputc('v', fp); |
| 2160 | break; |
| 2161 | |
| 2162 | default: |
| 2163 | (void) fprintf(fp, "%.3o", c & 0377); |
| 2164 | break; |
| 2165 | } |
| 2166 | } |
| 2167 | } |
| 2168 | } |
| 2169 | |
| 2170 | /* |
| 2171 | * eatsize(): Eat the size spec from a number [eg. 10UL] |
| 2172 | */ |
| 2173 | private void |
| 2174 | eatsize(const char **p) |
| 2175 | { |
| 2176 | const char *l = *p; |
| 2177 | |
| 2178 | if (LOWCASE(*l) == 'u') |
| 2179 | l++; |
| 2180 | |
| 2181 | switch (LOWCASE(*l)) { |
| 2182 | case 'l': /* long */ |
| 2183 | case 's': /* short */ |
| 2184 | case 'h': /* short */ |
| 2185 | case 'b': /* char/byte */ |
| 2186 | case 'c': /* char/byte */ |
| 2187 | l++; |
| 2188 | /*FALLTHROUGH*/ |
| 2189 | default: |
| 2190 | break; |
| 2191 | } |
| 2192 | |
| 2193 | *p = l; |
| 2194 | } |
| 2195 | |
| 2196 | /* |
| 2197 | * handle a compiled file. |
| 2198 | */ |
| 2199 | private int |
| 2200 | apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, |
| 2201 | const char *fn) |
| 2202 | { |
| 2203 | int fd; |
| 2204 | struct stat st; |
| 2205 | uint32_t *ptr; |
| 2206 | uint32_t version; |
| 2207 | int needsbyteswap; |
| 2208 | char *dbname = NULL; |
| 2209 | void *mm = NULL; |
| 2210 | |
| 2211 | dbname = mkdbname(ms, fn, 0); |
| 2212 | if (dbname == NULL) |
| 2213 | goto error2; |
| 2214 | |
| 2215 | if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1) |
| 2216 | goto error2; |
| 2217 | |
| 2218 | if (fstat(fd, &st) == -1) { |
| 2219 | file_error(ms, errno, "cannot stat `%s'", dbname); |
| 2220 | goto error1; |
| 2221 | } |
| 2222 | if (st.st_size < 8) { |
| 2223 | file_error(ms, 0, "file `%s' is too small", dbname); |
| 2224 | goto error1; |
| 2225 | } |
| 2226 | |
| 2227 | #ifdef QUICK |
| 2228 | if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE, |
| 2229 | MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) { |
| 2230 | file_error(ms, errno, "cannot map `%s'", dbname); |
| 2231 | goto error1; |
| 2232 | } |
| 2233 | #define RET 2 |
| 2234 | #else |
| 2235 | if ((mm = CAST(void *, malloc((size_t)st.st_size))) == NULL) { |
| 2236 | file_oomem(ms, (size_t)st.st_size); |
| 2237 | goto error1; |
| 2238 | } |
| 2239 | if (read(fd, mm, (size_t)st.st_size) != (ssize_t)st.st_size) { |
| 2240 | file_badread(ms); |
| 2241 | goto error1; |
| 2242 | } |
| 2243 | #define RET 1 |
| 2244 | #endif |
| 2245 | *magicp = CAST(struct magic *, mm); |
| 2246 | (void)close(fd); |
| 2247 | fd = -1; |
| 2248 | ptr = (uint32_t *)(void *)*magicp; |
| 2249 | if (*ptr != MAGICNO) { |
| 2250 | if (swap4(*ptr) != MAGICNO) { |
| 2251 | file_error(ms, 0, "bad magic in `%s'", dbname); |
| 2252 | goto error1; |
| 2253 | } |
| 2254 | needsbyteswap = 1; |
| 2255 | } else |
| 2256 | needsbyteswap = 0; |
| 2257 | if (needsbyteswap) |
| 2258 | version = swap4(ptr[1]); |
| 2259 | else |
| 2260 | version = ptr[1]; |
| 2261 | if (version != VERSIONNO) { |
| 2262 | file_error(ms, 0, "File %s supports only version %d magic " |
| 2263 | "files. `%s' is version %d", VERSION, |
| 2264 | VERSIONNO, dbname, version); |
| 2265 | goto error1; |
| 2266 | } |
| 2267 | *nmagicp = (uint32_t)(st.st_size / sizeof(struct magic)); |
| 2268 | if (*nmagicp > 0) |
| 2269 | (*nmagicp)--; |
| 2270 | (*magicp)++; |
| 2271 | if (needsbyteswap) |
| 2272 | byteswap(*magicp, *nmagicp); |
| 2273 | free(dbname); |
| 2274 | return RET; |
| 2275 | |
| 2276 | error1: |
| 2277 | if (fd != -1) |
| 2278 | (void)close(fd); |
| 2279 | if (mm) { |
| 2280 | #ifdef QUICK |
| 2281 | (void)munmap((void *)mm, (size_t)st.st_size); |
| 2282 | #else |
| 2283 | free(mm); |
| 2284 | #endif |
| 2285 | } else { |
| 2286 | *magicp = NULL; |
| 2287 | *nmagicp = 0; |
| 2288 | } |
| 2289 | error2: |
| 2290 | free(dbname); |
| 2291 | return -1; |
| 2292 | } |
| 2293 | |
| 2294 | private const uint32_t ar[] = { |
| 2295 | MAGICNO, VERSIONNO |
| 2296 | }; |
| 2297 | /* |
| 2298 | * handle an mmaped file. |
| 2299 | */ |
| 2300 | private int |
| 2301 | apprentice_compile(struct magic_set *ms, struct magic **magicp, |
| 2302 | uint32_t *nmagicp, const char *fn) |
| 2303 | { |
| 2304 | int fd = -1; |
| 2305 | char *dbname; |
| 2306 | int rv = -1; |
| 2307 | |
| 2308 | dbname = mkdbname(ms, fn, 1); |
| 2309 | |
| 2310 | if (dbname == NULL) |
| 2311 | goto out; |
| 2312 | |
| 2313 | if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) { |
| 2314 | file_error(ms, errno, "cannot open `%s'", dbname); |
| 2315 | goto out; |
| 2316 | } |
| 2317 | |
| 2318 | if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) { |
| 2319 | file_error(ms, errno, "error writing `%s'", dbname); |
| 2320 | goto out; |
| 2321 | } |
| 2322 | |
| 2323 | if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET) |
| 2324 | != sizeof(struct magic)) { |
| 2325 | file_error(ms, errno, "error seeking `%s'", dbname); |
| 2326 | goto out; |
| 2327 | } |
| 2328 | |
| 2329 | if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp)) |
| 2330 | != (ssize_t)(sizeof(struct magic) * *nmagicp)) { |
| 2331 | file_error(ms, errno, "error writing `%s'", dbname); |
| 2332 | goto out; |
| 2333 | } |
| 2334 | |
| 2335 | if (fd != -1) |
| 2336 | (void)close(fd); |
| 2337 | rv = 0; |
| 2338 | out: |
| 2339 | free(dbname); |
| 2340 | return rv; |
| 2341 | } |
| 2342 | |
| 2343 | private const char ext[] = ".mgc"; |
| 2344 | /* |
| 2345 | * make a dbname |
| 2346 | */ |
| 2347 | private char * |
| 2348 | mkdbname(struct magic_set *ms, const char *fn, int strip) |
| 2349 | { |
| 2350 | const char *p, *q; |
| 2351 | char *buf; |
| 2352 | |
| 2353 | if (strip) { |
| 2354 | if ((p = strrchr(fn, '/')) != NULL) |
| 2355 | fn = ++p; |
| 2356 | } |
| 2357 | |
| 2358 | for (q = fn; *q; q++) |
| 2359 | continue; |
| 2360 | /* Look for .mgc */ |
| 2361 | for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--) |
| 2362 | if (*p != *q) |
| 2363 | break; |
| 2364 | |
| 2365 | /* Did not find .mgc, restore q */ |
| 2366 | if (p >= ext) |
| 2367 | while (*q) |
| 2368 | q++; |
| 2369 | |
| 2370 | q++; |
| 2371 | /* Compatibility with old code that looked in .mime */ |
| 2372 | if (ms->flags & MAGIC_MIME) { |
| 2373 | asprintf(&buf, "%.*s.mime%s", (int)(q - fn), fn, ext); |
| 2374 | if (access(buf, R_OK) != -1) { |
| 2375 | ms->flags &= MAGIC_MIME_TYPE; |
| 2376 | return buf; |
| 2377 | } |
| 2378 | free(buf); |
| 2379 | } |
| 2380 | asprintf(&buf, "%.*s%s", (int)(q - fn), fn, ext); |
| 2381 | |
| 2382 | /* Compatibility with old code that looked in .mime */ |
| 2383 | if (strstr(p, ".mime") != NULL) |
| 2384 | ms->flags &= MAGIC_MIME_TYPE; |
| 2385 | return buf; |
| 2386 | } |
| 2387 | |
| 2388 | /* |
| 2389 | * Byteswap an mmap'ed file if needed |
| 2390 | */ |
| 2391 | private void |
| 2392 | byteswap(struct magic *magic, uint32_t nmagic) |
| 2393 | { |
| 2394 | uint32_t i; |
| 2395 | for (i = 0; i < nmagic; i++) |
| 2396 | bs1(&magic[i]); |
| 2397 | } |
| 2398 | |
| 2399 | /* |
| 2400 | * swap a short |
| 2401 | */ |
| 2402 | private uint16_t |
| 2403 | swap2(uint16_t sv) |
| 2404 | { |
| 2405 | uint16_t rv; |
| 2406 | uint8_t *s = (uint8_t *)(void *)&sv; |
| 2407 | uint8_t *d = (uint8_t *)(void *)&rv; |
| 2408 | d[0] = s[1]; |
| 2409 | d[1] = s[0]; |
| 2410 | return rv; |
| 2411 | } |
| 2412 | |
| 2413 | /* |
| 2414 | * swap an int |
| 2415 | */ |
| 2416 | private uint32_t |
| 2417 | swap4(uint32_t sv) |
| 2418 | { |
| 2419 | uint32_t rv; |
| 2420 | uint8_t *s = (uint8_t *)(void *)&sv; |
| 2421 | uint8_t *d = (uint8_t *)(void *)&rv; |
| 2422 | d[0] = s[3]; |
| 2423 | d[1] = s[2]; |
| 2424 | d[2] = s[1]; |
| 2425 | d[3] = s[0]; |
| 2426 | return rv; |
| 2427 | } |
| 2428 | |
| 2429 | /* |
| 2430 | * swap a quad |
| 2431 | */ |
| 2432 | private uint64_t |
| 2433 | swap8(uint64_t sv) |
| 2434 | { |
| 2435 | uint64_t rv; |
| 2436 | uint8_t *s = (uint8_t *)(void *)&sv; |
| 2437 | uint8_t *d = (uint8_t *)(void *)&rv; |
| 2438 | #if 0 |
| 2439 | d[0] = s[3]; |
| 2440 | d[1] = s[2]; |
| 2441 | d[2] = s[1]; |
| 2442 | d[3] = s[0]; |
| 2443 | d[4] = s[7]; |
| 2444 | d[5] = s[6]; |
| 2445 | d[6] = s[5]; |
| 2446 | d[7] = s[4]; |
| 2447 | #else |
| 2448 | d[0] = s[7]; |
| 2449 | d[1] = s[6]; |
| 2450 | d[2] = s[5]; |
| 2451 | d[3] = s[4]; |
| 2452 | d[4] = s[3]; |
| 2453 | d[5] = s[2]; |
| 2454 | d[6] = s[1]; |
| 2455 | d[7] = s[0]; |
| 2456 | #endif |
| 2457 | return rv; |
| 2458 | } |
| 2459 | |
| 2460 | /* |
| 2461 | * byteswap a single magic entry |
| 2462 | */ |
| 2463 | private void |
| 2464 | bs1(struct magic *m) |
| 2465 | { |
| 2466 | m->cont_level = swap2(m->cont_level); |
| 2467 | m->offset = swap4((uint32_t)m->offset); |
| 2468 | m->in_offset = swap4((uint32_t)m->in_offset); |
| 2469 | m->lineno = swap4((uint32_t)m->lineno); |
| 2470 | if (IS_STRING(m->type)) { |
| 2471 | m->str_range = swap4(m->str_range); |
| 2472 | m->str_flags = swap4(m->str_flags); |
| 2473 | } |
| 2474 | else { |
| 2475 | m->value.q = swap8(m->value.q); |
| 2476 | m->num_mask = swap8(m->num_mask); |
| 2477 | } |
| 2478 | } |
| 2479 | |
| 2480 | protected size_t |
| 2481 | file_pstring_length_size(const struct magic *m) |
| 2482 | { |
| 2483 | switch (m->str_flags & PSTRING_LEN) { |
| 2484 | case PSTRING_1_LE: |
| 2485 | return 1; |
| 2486 | case PSTRING_2_LE: |
| 2487 | case PSTRING_2_BE: |
| 2488 | return 2; |
| 2489 | case PSTRING_4_LE: |
| 2490 | case PSTRING_4_BE: |
| 2491 | return 4; |
| 2492 | default: |
| 2493 | abort(); /* Impossible */ |
| 2494 | return 1; |
| 2495 | } |
| 2496 | } |
| 2497 | protected size_t |
| 2498 | file_pstring_get_length(const struct magic *m, const char *s) |
| 2499 | { |
| 2500 | size_t len = 0; |
| 2501 | |
| 2502 | switch (m->str_flags & PSTRING_LEN) { |
| 2503 | case PSTRING_1_LE: |
| 2504 | len = *s; |
| 2505 | break; |
| 2506 | case PSTRING_2_LE: |
| 2507 | len = (s[1] << 8) | s[0]; |
| 2508 | break; |
| 2509 | case PSTRING_2_BE: |
| 2510 | len = (s[0] << 8) | s[1]; |
| 2511 | break; |
| 2512 | case PSTRING_4_LE: |
| 2513 | len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0]; |
| 2514 | break; |
| 2515 | case PSTRING_4_BE: |
| 2516 | len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3]; |
| 2517 | break; |
| 2518 | default: |
| 2519 | abort(); /* Impossible */ |
| 2520 | } |
| 2521 | |
| 2522 | if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) |
| 2523 | len -= file_pstring_length_size(m); |
| 2524 | |
| 2525 | return len; |
| 2526 | } |