2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice immediately at the beginning of the file, without modification,
11 * this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * softmagic - interpret variable magic from MAGIC
42 FILE_RCSID("@(#)$Id: softmagic.c,v 1.73 2005/03/06 05:58:22 christos Exp $")
45 private int match(struct magic_set *, struct magic *, uint32_t,
46 const unsigned char *, size_t);
47 private int mget(struct magic_set *, union VALUETYPE *, const unsigned char *,
48 struct magic *, size_t, int);
49 private int mcheck(struct magic_set *, union VALUETYPE *, struct magic *);
50 private int32_t mprint(struct magic_set *, union VALUETYPE *, struct magic *);
51 private void mdebug(uint32_t, const char *, size_t);
52 private int mcopy(struct magic_set *, union VALUETYPE *, int, int,
53 const unsigned char *, size_t, size_t);
54 private int mconvert(struct magic_set *, union VALUETYPE *, struct magic *);
55 private int check_mem(struct magic_set *, unsigned int);
58 * softmagic - lookup one file in database
59 * (already read from MAGIC by apprentice.c).
60 * Passed the name and FILE * of one file to be typed.
62 /*ARGSUSED1*/ /* nbytes passed for regularity, maybe need later */
64 file_softmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
67 for (ml = ms->mlist->next; ml != ms->mlist; ml = ml->next)
68 if (match(ms, ml->magic, ml->nmagic, buf, nbytes))
75 * Go through the whole list, stopping if you find a match. Process all
76 * the continuations of that match before returning.
78 * We support multi-level continuations:
80 * At any time when processing a successful top-level match, there is a
81 * current continuation level; it represents the level of the last
82 * successfully matched continuation.
84 * Continuations above that level are skipped as, if we see one, it
85 * means that the continuation that controls them - i.e, the
86 * lower-level continuation preceding them - failed to match.
88 * Continuations below that level are processed as, if we see one,
89 * it means we've finished processing or skipping higher-level
90 * continuations under the control of a successful or unsuccessful
91 * lower-level continuation, and are now seeing the next lower-level
92 * continuation and should process it. The current continuation
93 * level reverts to the level of the one we're seeing.
95 * Continuations at the current level are processed as, if we see
96 * one, there's no lower-level continuation that may have failed.
98 * If a continuation matches, we bump the current continuation level
99 * so that higher-level continuations are processed.
102 match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
103 const unsigned char *s, size_t nbytes)
105 uint32_t magindex = 0;
106 unsigned int cont_level = 0;
107 int need_separator = 0;
110 int returnval = 0; /* if a match is found it is set to 1*/
111 int firstline = 1; /* a flag to print X\n X\n- X */
113 if (check_mem(ms, cont_level) == -1)
116 for (magindex = 0; magindex < nmagic; magindex++) {
117 /* if main entry matches, print it... */
118 int flush = !mget(ms, &p, s, &magic[magindex], nbytes,
121 if (magic[magindex].reln == '!') flush = 0;
123 switch (mcheck(ms, &p, &magic[magindex])) {
135 * main entry didn't match,
136 * flush its continuations
138 while (magindex < nmagic - 1 &&
139 magic[magindex + 1].cont_level != 0)
144 if (!firstline) { /* we found another match */
145 /* put a newline and '-' to do some simple formatting*/
146 if (file_printf(ms, "\n- ") == -1)
150 if ((ms->c.off[cont_level] = mprint(ms, &p, &magic[magindex]))
154 * If we printed something, we'll need to print
155 * a blank before we print something else.
157 if (magic[magindex].desc[0])
159 /* and any continuations that match */
160 if (check_mem(ms, ++cont_level) == -1)
163 while (magic[magindex+1].cont_level != 0 &&
164 ++magindex < nmagic) {
165 if (cont_level < magic[magindex].cont_level)
167 if (cont_level > magic[magindex].cont_level) {
169 * We're at the end of the level
170 * "cont_level" continuations.
172 cont_level = magic[magindex].cont_level;
174 oldoff = magic[magindex].offset;
175 if (magic[magindex].flag & OFFADD) {
176 magic[magindex].offset +=
177 ms->c.off[cont_level - 1];
180 flush = !mget(ms, &p, s, &magic[magindex], nbytes,
182 if (flush && magic[magindex].reln != '!')
185 switch (flush ? 1 : mcheck(ms, &p, &magic[magindex])) {
192 * This continuation matched.
193 * Print its message, with
194 * a blank before it if
195 * the previous item printed
196 * and this item isn't empty.
198 /* space if previous printed */
200 && (magic[magindex].nospflag == 0)
201 && (magic[magindex].desc[0] != '\0')) {
202 if (file_printf(ms, " ") == -1)
206 if ((ms->c.off[cont_level] = mprint(ms, &p,
207 &magic[magindex])) == -1)
209 if (magic[magindex].desc[0])
213 * If we see any continuations
217 if (check_mem(ms, ++cont_level) == -1)
221 magic[magindex].offset = oldoff;
225 if ((ms->flags & MAGIC_CONTINUE) == 0) {
226 return 1; /* don't keep searching */
229 return returnval; /* This is hit if -k is set or there is no match */
233 check_mem(struct magic_set *ms, unsigned int level)
237 if (level < ms->c.len)
240 len = (ms->c.len += 20) * sizeof(*ms->c.off);
241 ms->c.off = (ms->c.off == NULL) ? malloc(len) : realloc(ms->c.off, len);
242 if (ms->c.off != NULL)
249 mprint(struct magic_set *ms, union VALUETYPE *p, struct magic *m)
257 v = file_signextend(ms, m, (size_t)p->b);
258 if (file_printf(ms, m->desc, (unsigned char) v) == -1)
260 t = m->offset + sizeof(char);
266 v = file_signextend(ms, m, (size_t)p->h);
267 if (file_printf(ms, m->desc, (unsigned short) v) == -1)
269 t = m->offset + sizeof(short);
275 v = file_signextend(ms, m, p->l);
276 if (file_printf(ms, m->desc, (uint32_t) v) == -1)
278 t = m->offset + sizeof(int32_t);
283 case FILE_BESTRING16:
284 case FILE_LESTRING16:
285 if (m->reln == '=' || m->reln == '!') {
286 if (file_printf(ms, m->desc, m->value.s) == -1)
288 t = m->offset + m->vallen;
291 if (*m->value.s == '\0') {
292 char *cp = strchr(p->s,'\n');
296 if (file_printf(ms, m->desc, p->s) == -1)
298 t = m->offset + strlen(p->s);
305 if (file_printf(ms, m->desc, file_fmttime(p->l, 1)) == -1)
307 t = m->offset + sizeof(time_t);
313 if (file_printf(ms, m->desc, file_fmttime(p->l, 0)) == -1)
315 t = m->offset + sizeof(time_t);
318 if (file_printf(ms, m->desc, p->s) == -1)
320 t = m->offset + strlen(p->s);
323 if (file_printf(ms, m->desc, m->value.s) == -1)
325 t = m->offset + m->vallen;
329 file_error(ms, 0, "invalid m->type (%d) in mprint()", m->type);
336 * Convert the byte order of the data we are looking at
337 * While we're here, let's apply the mask operation
338 * (unless you have a better idea)
341 mconvert(struct magic_set *ms, union VALUETYPE *p, struct magic *m)
346 switch (m->mask_op & 0x7F) {
362 case FILE_OPMULTIPLY:
372 if (m->mask_op & FILE_OPINVERSE)
377 switch (m->mask_op & 0x7F) {
393 case FILE_OPMULTIPLY:
403 if (m->mask_op & FILE_OPINVERSE)
410 switch (m->mask_op & 0x7F) {
426 case FILE_OPMULTIPLY:
436 if (m->mask_op & FILE_OPINVERSE)
440 case FILE_BESTRING16:
441 case FILE_LESTRING16:
445 /* Null terminate and eat *trailing* return */
446 p->s[sizeof(p->s) - 1] = '\0';
448 if (len-- && p->s[len] == '\n')
454 char *ptr1 = p->s, *ptr2 = ptr1 + 1;
456 if (len >= sizeof(p->s))
457 len = sizeof(p->s) - 1;
462 if (len-- && p->s[len] == '\n')
467 p->h = (short)((p->hs[0]<<8)|(p->hs[1]));
469 switch (m->mask_op&0x7F) {
485 case FILE_OPMULTIPLY:
495 if (m->mask_op & FILE_OPINVERSE)
502 ((p->hl[0]<<24)|(p->hl[1]<<16)|(p->hl[2]<<8)|(p->hl[3]));
504 switch (m->mask_op&0x7F) {
520 case FILE_OPMULTIPLY:
530 if (m->mask_op & FILE_OPINVERSE)
534 p->h = (short)((p->hs[1]<<8)|(p->hs[0]));
536 switch (m->mask_op&0x7F) {
552 case FILE_OPMULTIPLY:
562 if (m->mask_op & FILE_OPINVERSE)
569 ((p->hl[3]<<24)|(p->hl[2]<<16)|(p->hl[1]<<8)|(p->hl[0]));
571 switch (m->mask_op&0x7F) {
587 case FILE_OPMULTIPLY:
597 if (m->mask_op & FILE_OPINVERSE)
604 file_error(ms, 0, "invalid type %d in mconvert()", m->type);
611 mdebug(uint32_t offset, const char *str, size_t len)
613 (void) fprintf(stderr, "mget @%d: ", offset);
614 file_showstr(stderr, str, len);
615 (void) fputc('\n', stderr);
616 (void) fputc('\n', stderr);
620 mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
621 const unsigned char *s, size_t offset, size_t nbytes)
623 if (type == FILE_REGEX && indir == 0) {
625 * offset is interpreted as last line to search,
626 * (starting at 1), not as bytes-from start-of-file
628 char *b, *c, *last = NULL;
629 if ((p->buf = strdup((const char *)s)) == NULL) {
633 for (b = p->buf; offset &&
634 ((b = strchr(c = b, '\n')) || (b = strchr(c, '\r')));
637 if (b[0] == '\r' && b[1] == '\n') b++;
644 if (indir == 0 && (type == FILE_BESTRING16 || type == FILE_LESTRING16))
646 const char *src = s + offset;
647 const char *esrc = s + nbytes;
648 char *dst = p->s, *edst = &p->s[sizeof(p->s) - 1];
650 if (type == FILE_BESTRING16)
653 for (;src < esrc; src++, dst++) {
665 if (offset >= nbytes) {
666 (void)memset(p, '\0', sizeof(*p));
669 if (nbytes - offset < sizeof(*p))
670 nbytes = nbytes - offset;
674 (void)memcpy(p, s + offset, nbytes);
677 * the usefulness of padding with zeroes eludes me, it
678 * might even cause problems
680 if (nbytes < sizeof(*p))
681 (void)memset(((char *)p) + nbytes, '\0', sizeof(*p) - nbytes);
686 mget(struct magic_set *ms, union VALUETYPE *p, const unsigned char *s,
687 struct magic *m, size_t nbytes, int cont_level)
689 uint32_t offset = m->offset;
691 if (mcopy(ms, p, m->type, m->flag & INDIR, s, offset, nbytes) == -1)
694 if ((ms->flags & MAGIC_DEBUG) != 0) {
695 mdebug(offset, (char *)(void *)p, sizeof(union VALUETYPE));
699 if (m->flag & INDIR) {
700 int off = m->in_offset;
701 if (m->in_op & FILE_OPINDIRECT) {
702 const union VALUETYPE *q =
703 ((const union VALUETYPE *)(s + offset + off));
704 switch (m->in_type) {
712 off = (short)((q->hs[0]<<8)|(q->hs[1]));
715 off = (short)((q->hs[1]<<8)|(q->hs[0]));
721 off = (int32_t)((q->hl[0]<<24)|(q->hl[1]<<16)|
722 (q->hl[2]<<8)|(q->hl[3]));
725 off = (int32_t)((q->hl[3]<<24)|(q->hl[2]<<16)|
726 (q->hl[1]<<8)|(q->hl[0]));
730 switch (m->in_type) {
732 if (nbytes < (offset + 1)) return 0;
734 switch (m->in_op & 0x3F) {
750 case FILE_OPMULTIPLY:
762 if (m->in_op & FILE_OPINVERSE)
766 if (nbytes < (offset + 2))
769 switch (m->in_op & 0x7F) {
771 offset = (short)((p->hs[0]<<8)|
776 offset = (short)((p->hs[0]<<8)|
781 offset = (short)((p->hs[0]<<8)|
786 offset = (short)((p->hs[0]<<8)|
791 offset = (short)((p->hs[0]<<8)|
795 case FILE_OPMULTIPLY:
796 offset = (short)((p->hs[0]<<8)|
801 offset = (short)((p->hs[0]<<8)|
806 offset = (short)((p->hs[0]<<8)|
812 offset = (short)((p->hs[0]<<8)|
814 if (m->in_op & FILE_OPINVERSE)
818 if (nbytes < (offset + 2))
821 switch (m->in_op & 0x7F) {
823 offset = (short)((p->hs[1]<<8)|
828 offset = (short)((p->hs[1]<<8)|
833 offset = (short)((p->hs[1]<<8)|
838 offset = (short)((p->hs[1]<<8)|
843 offset = (short)((p->hs[1]<<8)|
847 case FILE_OPMULTIPLY:
848 offset = (short)((p->hs[1]<<8)|
853 offset = (short)((p->hs[1]<<8)|
858 offset = (short)((p->hs[1]<<8)|
864 offset = (short)((p->hs[1]<<8)|
866 if (m->in_op & FILE_OPINVERSE)
870 if (nbytes < (offset + 2))
873 switch (m->in_op & 0x7F) {
889 case FILE_OPMULTIPLY:
902 if (m->in_op & FILE_OPINVERSE)
906 if (nbytes < (offset + 4))
909 switch (m->in_op & 0x7F) {
911 offset = (int32_t)((p->hl[0]<<24)|
918 offset = (int32_t)((p->hl[0]<<24)|
925 offset = (int32_t)((p->hl[0]<<24)|
932 offset = (int32_t)((p->hl[0]<<24)|
939 offset = (int32_t)((p->hl[0]<<24)|
945 case FILE_OPMULTIPLY:
946 offset = (int32_t)((p->hl[0]<<24)|
953 offset = (int32_t)((p->hl[0]<<24)|
960 offset = (int32_t)((p->hl[0]<<24)|
968 offset = (int32_t)((p->hl[0]<<24)|
972 if (m->in_op & FILE_OPINVERSE)
976 if (nbytes < (offset + 4))
979 switch (m->in_op & 0x7F) {
981 offset = (int32_t)((p->hl[3]<<24)|
988 offset = (int32_t)((p->hl[3]<<24)|
995 offset = (int32_t)((p->hl[3]<<24)|
1002 offset = (int32_t)((p->hl[3]<<24)|
1009 offset = (int32_t)((p->hl[3]<<24)|
1015 case FILE_OPMULTIPLY:
1016 offset = (int32_t)((p->hl[3]<<24)|
1023 offset = (int32_t)((p->hl[3]<<24)|
1030 offset = (int32_t)((p->hl[3]<<24)|
1038 offset = (int32_t)((p->hl[3]<<24)|
1042 if (m->in_op & FILE_OPINVERSE)
1046 if (nbytes < (offset + 4))
1049 switch (m->in_op & 0x7F) {
1051 offset = p->l & off;
1054 offset = p->l | off;
1057 offset = p->l ^ off;
1060 offset = p->l + off;
1063 offset = p->l - off;
1065 case FILE_OPMULTIPLY:
1066 offset = p->l * off;
1069 offset = p->l / off;
1072 offset = p->l % off;
1074 /* case TOOMANYSWITCHBLOCKS:
1075 * ugh = p->eye % m->strain;
1078 * off = p->tab & m->in_gest;
1084 if (m->in_op & FILE_OPINVERSE)
1089 if (m->flag & INDIROFFADD) offset += ms->c.off[cont_level-1];
1090 if (mcopy(ms, p, m->type, 0, s, offset, nbytes) == -1)
1094 if ((ms->flags & MAGIC_DEBUG) != 0) {
1095 mdebug(offset, (char *)(void *)p,
1096 sizeof(union VALUETYPE));
1101 /* Verify we have enough data to match magic type */
1104 if (nbytes < (offset + 1)) /* should alway be true */
1111 if (nbytes < (offset + 2))
1124 if (nbytes < (offset + 4))
1131 if (nbytes < (offset + m->vallen))
1137 if (m->type == FILE_SEARCH) {
1138 p->buf = malloc(m->mask + m->vallen);
1139 if (p->buf == NULL) {
1140 file_error(ms, errno, "Cannot allocate search buffer");
1143 (void)memcpy(p->buf, s + offset, m->mask + m->vallen);
1145 if (!mconvert(ms, p, m))
1151 mcheck(struct magic_set *ms, union VALUETYPE *p, struct magic *m)
1153 uint32_t l = m->value.l;
1157 if ( (m->value.s[0] == 'x') && (m->value.s[1] == '\0') ) {
1186 case FILE_BESTRING16:
1187 case FILE_LESTRING16:
1191 * What we want here is:
1192 * v = strncmp(m->value.s, p->s, m->vallen);
1193 * but ignoring any nulls. bcmp doesn't give -/+/0
1194 * and isn't universally available anyway.
1196 unsigned char *a = (unsigned char*)m->value.s;
1197 unsigned char *b = (unsigned char*)p->s;
1198 int len = m->vallen;
1201 if (0L == m->mask) { /* normal string: do it fast */
1203 if ((v = *b++ - *a++) != '\0')
1205 } else { /* combine the others */
1206 while (--len >= 0) {
1207 if ((m->mask & STRING_IGNORE_LOWERCASE) &&
1209 if ((v = tolower(*b++) - *a++) != '\0')
1211 } else if ((m->mask & STRING_COMPACT_BLANK) &&
1214 if (isspace(*b++)) {
1221 } else if (isspace(*a) &&
1222 (m->mask & STRING_COMPACT_OPTIONAL_BLANK)) {
1227 if ((v = *b++ - *a++) != '\0')
1240 rc = regcomp(&rx, m->value.s,
1241 REG_EXTENDED|REG_NOSUB|REG_NEWLINE|
1242 ((m->mask & STRING_IGNORE_LOWERCASE) ? REG_ICASE : 0));
1245 regerror(rc, &rx, errmsg, sizeof(errmsg));
1246 file_error(ms, 0, "regex error %d, (%s)", rc, errmsg);
1249 rc = regexec(&rx, p->buf, 0, 0, 0);
1258 * search for a string in a certain range
1260 unsigned char *a = (unsigned char*)m->value.s;
1261 unsigned char *b = (unsigned char*)p->buf;
1262 int len = m->vallen;
1266 while (++range <= m->mask) {
1267 while (len-- > 0 && (v = *b++ - *a++) == 0)
1270 m->offset += range-1;
1274 a = (unsigned char*)m->value.s;
1275 b = (unsigned char*)p->buf + range;
1281 file_error(ms, 0, "invalid type %d in mcheck()", m->type);
1285 if (m->type != FILE_STRING && m->type != FILE_PSTRING)
1286 v = file_signextend(ms, m, v);
1290 if ((ms->flags & MAGIC_DEBUG) != 0)
1291 (void) fprintf(stderr, "%u == *any* = 1\n", v);
1297 if ((ms->flags & MAGIC_DEBUG) != 0)
1298 (void) fprintf(stderr, "%u != %u = %d\n",
1304 if ((ms->flags & MAGIC_DEBUG) != 0)
1305 (void) fprintf(stderr, "%u == %u = %d\n",
1310 if (m->flag & UNSIGNED) {
1312 if ((ms->flags & MAGIC_DEBUG) != 0)
1313 (void) fprintf(stderr, "%u > %u = %d\n",
1317 matched = (int32_t) v > (int32_t) l;
1318 if ((ms->flags & MAGIC_DEBUG) != 0)
1319 (void) fprintf(stderr, "%d > %d = %d\n",
1325 if (m->flag & UNSIGNED) {
1327 if ((ms->flags & MAGIC_DEBUG) != 0)
1328 (void) fprintf(stderr, "%u < %u = %d\n",
1332 matched = (int32_t) v < (int32_t) l;
1333 if ((ms->flags & MAGIC_DEBUG) != 0)
1334 (void) fprintf(stderr, "%d < %d = %d\n",
1340 matched = (v & l) == l;
1341 if ((ms->flags & MAGIC_DEBUG) != 0)
1342 (void) fprintf(stderr, "((%x & %x) == %x) = %d\n",
1347 matched = (v & l) != l;
1348 if ((ms->flags & MAGIC_DEBUG) != 0)
1349 (void) fprintf(stderr, "((%x & %x) != %x) = %d\n",
1355 file_error(ms, 0, "cannot happen: invalid relation `%c'",