2 * apprentice - make one pass through /etc/magic, learning its secrets.
4 * Copyright (c) Ian F. Darwin, 1987.
5 * Written by Ian F. Darwin.
7 * This software is not subject to any license of the American Telephone
8 * and Telegraph Company or of the Regents of the University of California.
10 * Permission is granted to anyone to use this software for any purpose on
11 * any computer system, and to alter it and redistribute it freely, subject
12 * to the following restrictions:
14 * 1. The author is not responsible for the consequences of use of this
15 * software, no matter how awful, even if they arise from flaws in it.
17 * 2. The origin of this software must not be misrepresented, either by
18 * explicit claim or by omission. Since few users ever read sources,
19 * credits must appear in the documentation.
21 * 3. Altered versions must be plainly marked as such, and must not be
22 * misrepresented as being the original software. Since few users
23 * ever read sources, credits must appear in the documentation.
25 * 4. This notice may not be removed or altered.
41 FILE_RCSID("@(#)$Id: apprentice.c,v 1.50 2003/02/27 20:47:46 christos Exp $")
44 #define EATAB {while (isascii((unsigned char) *l) && \
45 isspace((unsigned char) *l)) ++l;}
46 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
47 tolower((unsigned char) (l)) : (l))
49 * Work around a bug in headers on Digital Unix.
50 * At least confirmed for: OSF1 V4.0 878
52 #if defined(__osf__) && defined(__DECC)
59 #define MAP_FAILED (void *) -1
73 static int getvalue(struct magic *, char **);
74 static int hextoint(int);
75 static char *getstr(char *, char *, int, int *);
76 static int parse(struct magic **, uint32_t *, char *, int);
77 static void eatsize(char **);
78 static int apprentice_1(const char *, int);
79 static int apprentice_file(struct magic **, uint32_t *, const char *, int);
80 static void byteswap(struct magic *, uint32_t);
81 static void bs1(struct magic *);
82 static uint16_t swap2(uint16_t);
83 static uint32_t swap4(uint32_t);
84 static char *mkdbname(const char *);
85 static int apprentice_map(struct magic **, uint32_t *, const char *, int);
86 static int apprentice_compile(struct magic **, uint32_t *, const char *, int);
88 static int maxmagic = 0;
93 const char *magicfile;
97 int main(int, char *[]);
100 main(int argc, char *argv[])
104 if ((progname = strrchr(argv[0], '/')) != NULL)
110 (void)fprintf(stderr, "usage: %s file\n", progname);
115 exit(apprentice(magicfile, COMPILE));
117 #endif /* COMPILE_ONLY */
124 apprentice_1(const char *fn, int action)
126 struct magic *magic = NULL;
131 if (action == COMPILE) {
132 rv = apprentice_file(&magic, &nmagic, fn, action);
134 return apprentice_compile(&magic, &nmagic, fn, action);
139 if ((rv = apprentice_map(&magic, &nmagic, fn, action)) != 0)
140 (void)fprintf(stderr, "%s: Using regular magic file `%s'\n",
144 rv = apprentice_file(&magic, &nmagic, fn, action);
149 if ((ml = malloc(sizeof(*ml))) == NULL) {
150 (void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
156 if (magic == NULL || nmagic == 0)
162 mlist.prev->next = ml;
163 ml->prev = mlist.prev;
168 #endif /* COMPILE_ONLY */
172 /* const char *fn: list of magic files */
174 apprentice(const char *fn, int action)
177 int file_err, errs = -1;
179 mlist.next = mlist.prev = &mlist;
180 mfn = malloc(strlen(fn)+1);
182 (void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
189 fn = strcpy(mfn, fn);
192 p = strchr(fn, PATHSEP);
195 file_err = apprentice_1(fn, action);
201 (void) fprintf(stderr, "%s: couldn't find any magic files!\n",
203 if (action == CHECK && errs)
212 * const char *fn: name of magic file
215 apprentice_file(struct magic **magicp, uint32_t *nmagicp, const char *fn,
218 static const char hdr[] =
219 "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
227 (void) fprintf(stderr,
228 "%s: can't read magic file %s (%s)\n",
229 progname, fn, strerror(errno));
234 *magicp = (struct magic *) calloc(maxmagic, sizeof(struct magic));
235 if (*magicp == NULL) {
236 (void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
243 if (action == CHECK) /* print silly verbose header for USG compat. */
244 (void) printf("%s\n", hdr);
246 for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
247 if (line[0]=='#') /* comment, do not parse */
249 if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
251 line[strlen(line)-1] = '\0'; /* delete newline */
252 if (parse(magicp, nmagicp, line, action) != 0)
266 * extend the sign bit if the comparison is to be signed
269 signextend(struct magic *m, uint32_t v)
271 if (!(m->flag & UNSIGNED))
274 * Do not remove the casts below. They are
275 * vital. When later compared with the data,
276 * the sign extension must have happened.
303 magwarn("can't happen: m->type=%d\n",
311 * parse one line from magic file, put into magic[index++] if valid
314 parse(struct magic **magicp, uint32_t *nmagicp, char *l, int action)
320 #define ALLOC_INCR 200
321 if (*nmagicp + 1 >= maxmagic){
322 maxmagic += ALLOC_INCR;
323 if ((m = (struct magic *) realloc(*magicp,
324 sizeof(struct magic) * maxmagic)) == NULL) {
325 (void) fprintf(stderr, "%s: Out of memory (%s).\n",
326 progname, strerror(errno));
335 memset(&(*magicp)[*nmagicp], 0, sizeof(struct magic)
338 m = &(*magicp)[*nmagicp];
347 if (m->cont_level != 0 && *l == '(') {
351 if (m->cont_level != 0 && *l == '&') {
356 /* get offset, then skip over it */
357 m->offset = (int) strtoul(l,&t,0);
359 magwarn("offset %s invalid", l);
362 if (m->flag & INDIR) {
366 * read [.lbs][+-]nnnnn)
379 m->in_type = LESHORT;
383 m->in_type = BESHORT;
392 magwarn("indirect offset type %c invalid", *l);
398 m->in_op = OPINVERSE;
423 m->in_op |= OPMULTIPLY;
427 m->in_op |= OPDIVIDE;
431 m->in_op |= OPMODULO;
435 if (isdigit((unsigned char)*l))
436 m->in_offset = strtoul(l, &t, 0);
440 magwarn("missing ')' in indirect offset");
445 while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
471 /* get type, skip it */
472 if (strncmp(l, "char", NBYTE)==0) { /* HP/UX compat */
475 } else if (strncmp(l, "byte", NBYTE)==0) {
478 } else if (strncmp(l, "short", NSHORT)==0) {
481 } else if (strncmp(l, "long", NLONG)==0) {
484 } else if (strncmp(l, "string", NSTRING)==0) {
487 } else if (strncmp(l, "date", NDATE)==0) {
490 } else if (strncmp(l, "beshort", NBESHORT)==0) {
493 } else if (strncmp(l, "belong", NBELONG)==0) {
496 } else if (strncmp(l, "bedate", NBEDATE)==0) {
499 } else if (strncmp(l, "leshort", NLESHORT)==0) {
502 } else if (strncmp(l, "lelong", NLELONG)==0) {
505 } else if (strncmp(l, "ledate", NLEDATE)==0) {
508 } else if (strncmp(l, "pstring", NPSTRING)==0) {
511 } else if (strncmp(l, "ldate", NLDATE)==0) {
514 } else if (strncmp(l, "beldate", NBELDATE)==0) {
517 } else if (strncmp(l, "leldate", NLELDATE)==0) {
520 } else if (strncmp(l, "regex", NREGEX)==0) {
522 l += sizeof("regex");
524 magwarn("type %s invalid", l);
527 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
528 /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
530 if (STRING != m->type && PSTRING != m->type)
531 m->mask_op = OPINVERSE;
538 m->mask = signextend(m, strtoul(l, &l, 0));
544 m->mask = signextend(m, strtoul(l, &l, 0));
550 m->mask = signextend(m, strtoul(l, &l, 0));
556 m->mask = signextend(m, strtoul(l, &l, 0));
560 m->mask_op |= OPMINUS;
562 m->mask = signextend(m, strtoul(l, &l, 0));
566 m->mask_op |= OPMULTIPLY;
568 m->mask = signextend(m, strtoul(l, &l, 0));
572 m->mask_op |= OPMODULO;
574 m->mask = signextend(m, strtoul(l, &l, 0));
578 if (STRING != m->type && PSTRING != m->type) {
579 m->mask_op |= OPDIVIDE;
581 m->mask = signextend(m, strtoul(l, &l, 0));
585 while (!isspace(*++l)) {
587 case CHAR_IGNORE_LOWERCASE:
588 m->mask |= STRING_IGNORE_LOWERCASE;
590 case CHAR_COMPACT_BLANK:
591 m->mask |= STRING_COMPACT_BLANK;
593 case CHAR_COMPACT_OPTIONAL_BLANK:
595 STRING_COMPACT_OPTIONAL_BLANK;
598 magwarn("string extension %c invalid",
606 /* We used to set mask to all 1's here, instead let's just not do anything
607 if mask = 0 (unless you have a better idea) */
613 /* Old-style anding: "0 byte &0x80 dynamically linked" */
620 /* HP compat: ignore &= etc. */
625 if (m->type != STRING && m->type != PSTRING) {
632 if (*l == 'x' && isascii((unsigned char)l[1]) &&
633 isspace((unsigned char)l[1])) {
636 goto GetDesc; /* Bill The Cat */
646 * TODO finish this macro and start using it!
647 * #define offsetcheck {if (offset > HOWMANY-1)
648 * magwarn("offset too big"); }
652 * now get last part - the description
659 } else if ((l[0] == '\\') && (l[1] == 'b')) {
665 while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
669 if (action == CHECK) {
673 ++(*nmagicp); /* make room for next */
678 * Read a numeric value from a pointer, into the value union of a magic
679 * pointer, according to the magic type. Update the string pointer to point
680 * just after the number read. Return 0 for success, non-zero for failure.
683 getvalue(struct magic *m, char **p)
687 if (m->type == STRING || m->type == PSTRING || m->type == REGEX) {
688 *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
691 if (m->reln != 'x') {
692 m->value.l = signextend(m, strtoul(*p, p, 0));
699 * Convert a string containing C character escapes. Stop at an unescaped
701 * Copy the converted version to "p", returning its length in *slen.
702 * Return updated scan pointer as function result.
705 getstr(char *s, char *p, int plen, int *slen)
707 char *origs = s, *origp = p;
708 char *pmax = p + plen - 1;
712 while ((c = *s++) != '\0') {
713 if (isspace((unsigned char) c))
716 fprintf(stderr, "String too long: %s\n", origs);
753 /* \ and up to 3 octal digits */
763 c = *s++; /* try for 2 */
764 if(c >= '0' && c <= '7') {
765 val = (val<<3) | (c - '0');
766 c = *s++; /* try for 3 */
767 if(c >= '0' && c <= '7')
768 val = (val<<3) | (c-'0');
777 /* \x and up to 2 hex digits */
779 val = 'x'; /* Default if no digits */
780 c = hextoint(*s++); /* Get next char */
785 val = (val << 4) + c;
803 /* Single hex char to int; -1 if not a hex char. */
807 if (!isascii((unsigned char) c))
809 if (isdigit((unsigned char) c))
811 if ((c >= 'a')&&(c <= 'f'))
813 if (( c>= 'A')&&(c <= 'F'))
820 * Print a string containing C character escapes.
823 showstr(FILE *fp, const char *s, int len)
837 if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
840 (void) fputc('\\', fp);
844 (void) fputc('n', fp);
848 (void) fputc('r', fp);
852 (void) fputc('b', fp);
856 (void) fputc('t', fp);
860 (void) fputc('f', fp);
864 (void) fputc('v', fp);
868 (void) fprintf(fp, "%.3o", c & 0377);
876 * eatsize(): Eat the size spec from a number [eg. 10UL]
883 if (LOWCASE(*l) == 'u')
886 switch (LOWCASE(*l)) {
888 case 's': /* short */
889 case 'h': /* short */
890 case 'b': /* char/byte */
891 case 'c': /* char/byte */
902 * handle a compiled file.
905 apprentice_map(struct magic **magicp, uint32_t *nmagicp, const char *fn,
913 char *dbname = mkdbname(fn);
919 if ((fd = open(dbname, O_RDONLY)) == -1)
922 if (fstat(fd, &st) == -1) {
923 (void)fprintf(stderr, "%s: Cannot stat `%s' (%s)\n",
924 progname, dbname, strerror(errno));
929 if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
930 MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
931 (void)fprintf(stderr, "%s: Cannot map `%s' (%s)\n",
932 progname, dbname, strerror(errno));
936 if ((mm = malloc((size_t)st.st_size)) == NULL) {
937 (void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
941 if (read(fd, mm, (size_t)st.st_size) != (size_t)st.st_size) {
942 (void) fprintf(stderr, "%s: Read failed (%s).\n", progname,
950 ptr = (uint32_t *) *magicp;
951 if (*ptr != MAGICNO) {
952 if (swap4(*ptr) != MAGICNO) {
953 (void)fprintf(stderr, "%s: Bad magic in `%s'\n",
961 version = swap4(ptr[1]);
964 if (version != VERSIONNO) {
965 (void)fprintf(stderr,
966 "%s: version mismatch (%d != %d) in `%s'\n",
967 progname, version, VERSIONNO, dbname);
970 *nmagicp = (st.st_size / sizeof(struct magic)) - 1;
973 byteswap(*magicp, *nmagicp);
981 (void)munmap(mm, (size_t)st.st_size);
993 * handle an mmaped file.
996 apprentice_compile(struct magic **magicp, uint32_t *nmagicp, const char *fn,
1000 char *dbname = mkdbname(fn);
1001 static const uint32_t ar[] = {
1008 if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC, 0644)) == -1) {
1009 (void)fprintf(stderr, "%s: Cannot open `%s' (%s)\n",
1010 progname, dbname, strerror(errno));
1014 if (write(fd, ar, sizeof(ar)) != sizeof(ar)) {
1015 (void)fprintf(stderr, "%s: error writing `%s' (%s)\n",
1016 progname, dbname, strerror(errno));
1020 if (lseek(fd, sizeof(struct magic), SEEK_SET) != sizeof(struct magic)) {
1021 (void)fprintf(stderr, "%s: error seeking `%s' (%s)\n",
1022 progname, dbname, strerror(errno));
1026 if (write(fd, *magicp, sizeof(struct magic) * *nmagicp)
1027 != sizeof(struct magic) * *nmagicp) {
1028 (void)fprintf(stderr, "%s: error writing `%s' (%s)\n",
1029 progname, dbname, strerror(errno));
1041 mkdbname(const char *fn)
1043 static const char ext[] = ".mgc";
1044 static char *buf = NULL;
1045 size_t len = strlen(fn) + sizeof(ext) + 1;
1049 buf = realloc(buf, len);
1051 (void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
1055 (void)strcpy(buf, fn);
1056 (void)strcat(buf, ext);
1061 * Byteswap an mmap'ed file if needed
1064 byteswap(struct magic *magic, uint32_t nmagic)
1067 for (i = 0; i < nmagic; i++)
1078 uint8_t *s = (uint8_t *) &sv;
1079 uint8_t *d = (uint8_t *) &rv;
1092 uint8_t *s = (uint8_t *) &sv;
1093 uint8_t *d = (uint8_t *) &rv;
1102 * byteswap a single magic entry
1105 void bs1(struct magic *m)
1107 m->cont_level = swap2(m->cont_level);
1108 m->offset = swap4(m->offset);
1109 m->in_offset = swap4(m->in_offset);
1110 if (m->type != STRING)
1111 m->value.l = swap4(m->value.l);
1112 m->mask = swap4(m->mask);