Import libarchive-2.2.7.
[dragonfly.git] / contrib / libarchive-2 / libarchive / archive_read_support_format_mtree.c
1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD$");
28
29 #ifdef HAVE_SYS_STAT_H
30 #include <sys/stat.h>
31 #endif
32 #ifdef HAVE_ERRNO_H
33 #include <errno.h>
34 #endif
35 #ifdef HAVE_FCNTL_H
36 #include <fcntl.h>
37 #endif
38 #include <stddef.h>
39 /* #include <stdint.h> */ /* See archive_platform.h */
40 #ifdef HAVE_STDLIB_H
41 #include <stdlib.h>
42 #endif
43 #ifdef HAVE_STRING_H
44 #include <string.h>
45 #endif
46
47 #include "archive.h"
48 #include "archive_entry.h"
49 #include "archive_private.h"
50 #include "archive_read_private.h"
51 #include "archive_string.h"
52
53 struct mtree_entry {
54         struct mtree_entry *next;
55         char *name;
56         char *option_start;
57         char *option_end;
58         char full;
59         char used;
60 };
61
62 struct mtree {
63         struct archive_string    line;
64         size_t                   buffsize;
65         char                    *buff;
66         off_t                    offset;
67         int                      fd;
68         int                      bid;
69         int                      filetype;
70         int                      archive_format;
71         const char              *archive_format_name;
72         struct mtree_entry      *entries;
73         struct mtree_entry      *this_entry;
74         struct archive_string    current_dir;
75         struct archive_string    contents_name;
76 };
77
78 static int      bid(struct archive_read *);
79 static int      cleanup(struct archive_read *);
80 static void     parse_escapes(char *, struct mtree_entry *);
81 static int      parse_setting(struct archive_read *, struct mtree *,
82                     struct archive_entry *, char *, char *);
83 static int      read_data(struct archive_read *a,
84                     const void **buff, size_t *size, off_t *offset);
85 static ssize_t  readline(struct archive_read *, struct mtree *, char **, ssize_t);
86 static int      skip(struct archive_read *a);
87 static int      read_header(struct archive_read *,
88                     struct archive_entry *);
89 static int64_t  mtree_atol10(char **);
90 static int64_t  mtree_atol8(char **);
91
92 int
93 archive_read_support_format_mtree(struct archive *_a)
94 {
95         struct archive_read *a = (struct archive_read *)_a;
96         struct mtree *mtree;
97         int r;
98
99         mtree = (struct mtree *)malloc(sizeof(*mtree));
100         if (mtree == NULL) {
101                 archive_set_error(&a->archive, ENOMEM,
102                     "Can't allocate mtree data");
103                 return (ARCHIVE_FATAL);
104         }
105         memset(mtree, 0, sizeof(*mtree));
106         mtree->bid = -1;
107         mtree->fd = -1;
108
109         r = __archive_read_register_format(a, mtree,
110             bid, read_header, read_data, skip, cleanup);
111
112         if (r != ARCHIVE_OK)
113                 free(mtree);
114         return (ARCHIVE_OK);
115 }
116
117 static int
118 cleanup(struct archive_read *a)
119 {
120         struct mtree *mtree;
121         struct mtree_entry *p, *q;
122
123         mtree = (struct mtree *)(a->format->data);
124         p = mtree->entries;
125         while (p != NULL) {
126                 q = p->next;
127                 free(p->name);
128                 /*
129                  * Note: option_start, option_end are pointers into
130                  * the block that p->name points to.  So we should
131                  * not try to free them!
132                  */
133                 free(p);
134                 p = q;
135         }
136         archive_string_free(&mtree->line);
137         archive_string_free(&mtree->current_dir);
138         archive_string_free(&mtree->contents_name);
139         free(mtree->buff);
140         free(mtree);
141         (a->format->data) = NULL;
142         return (ARCHIVE_OK);
143 }
144
145
146 static int
147 bid(struct archive_read *a)
148 {
149         struct mtree *mtree;
150         ssize_t bytes_read;
151         const void *h;
152         const char *signature = "#mtree";
153         const char *p;
154
155         mtree = (struct mtree *)(a->format->data);
156         if (mtree->bid != -1)
157                 return (mtree->bid);
158
159         /* Now let's look at the actual header and see if it matches. */
160         bytes_read = (a->decompressor->read_ahead)(a, &h, strlen(signature));
161
162         if (bytes_read <= 0)
163                 return (bytes_read);
164
165         p = h;
166         mtree->bid = 0;
167         while (bytes_read > 0 && *signature != '\0') {
168                 if (*p != *signature)
169                         return (mtree->bid = 0);
170                 mtree->bid += 8;
171                 p++;
172                 signature++;
173                 bytes_read--;
174         }
175         return (mtree->bid);
176 }
177
178 /*
179  * The extended mtree format permits multiple lines specifying
180  * attributes for each file.  Practically speaking, that means we have
181  * to read the entire mtree file into memory up front.
182  */
183 static int
184 read_mtree(struct archive_read *a, struct mtree *mtree)
185 {
186         ssize_t len;
187         char *p;
188         struct mtree_entry *mentry;
189         struct mtree_entry *last_mentry = NULL;
190
191         mtree->archive_format = ARCHIVE_FORMAT_MTREE_V1;
192         mtree->archive_format_name = "mtree";
193
194         for (;;) {
195                 len = readline(a, mtree, &p, 256);
196                 if (len == 0) {
197                         mtree->this_entry = mtree->entries;
198                         return (ARCHIVE_OK);
199                 }
200                 if (len < 0)
201                         return (len);
202                 /* Leading whitespace is never significant, ignore it. */
203                 while (*p == ' ' || *p == '\t') {
204                         ++p;
205                         --len;
206                 }
207                 /* Skip content lines and blank lines. */
208                 if (*p == '#')
209                         continue;
210                 if (*p == '\r' || *p == '\n' || *p == '\0')
211                         continue;
212                 mentry = malloc(sizeof(*mentry));
213                 if (mentry == NULL) {
214                         archive_set_error(&a->archive, ENOMEM,
215                             "Can't allocate memory");
216                         return (ARCHIVE_FATAL);
217                 }
218                 memset(mentry, 0, sizeof(*mentry));
219                 /* Add this entry to list. */
220                 if (last_mentry == NULL) {
221                         last_mentry = mtree->entries = mentry;
222                 } else {
223                         last_mentry->next = mentry;
224                 }
225                 last_mentry = mentry;
226
227                 /* Copy line over onto heap. */
228                 mentry->name = malloc(len + 1);
229                 if (mentry->name == NULL) {
230                         free(mentry);
231                         archive_set_error(&a->archive, ENOMEM,
232                             "Can't allocate memory");
233                         return (ARCHIVE_FATAL);
234                 }
235                 strcpy(mentry->name, p);
236                 mentry->option_end = mentry->name + len;
237                 /* Find end of name. */
238                 p = mentry->name;
239                 while (*p != ' ' && *p != '\n' && *p != '\0')
240                         ++p;
241                 *p++ = '\0';
242                 parse_escapes(mentry->name, mentry);
243                 /* Find start of options and record it. */
244                 while (p < mentry->option_end && (*p == ' ' || *p == '\t'))
245                         ++p;
246                 mentry->option_start = p;
247                 /* Null terminate each separate option. */
248                 while (++p < mentry->option_end)
249                         if (*p == ' ' || *p == '\t' || *p == '\n')
250                                 *p = '\0';
251         }
252 }
253
254 static int
255 read_header(struct archive_read *a, struct archive_entry *entry)
256 {
257         struct stat st;
258         struct mtree *mtree;
259         struct mtree_entry *mentry, *mentry2;
260         char *p, *q;
261         int r = ARCHIVE_OK, r1;
262
263         mtree = (struct mtree *)(a->format->data);
264
265         if (mtree->fd >= 0) {
266                 close(mtree->fd);
267                 mtree->fd = -1;
268         }
269
270         if (mtree->entries == NULL) {
271                 r = read_mtree(a, mtree);
272                 if (r != ARCHIVE_OK)
273                         return (r);
274         }
275
276         a->archive.archive_format = mtree->archive_format;
277         a->archive.archive_format_name = mtree->archive_format_name;
278
279         for (;;) {
280                 mentry = mtree->this_entry;
281                 if (mentry == NULL) {
282                         mtree->this_entry = NULL;
283                         return (ARCHIVE_EOF);
284                 }
285                 mtree->this_entry = mentry->next;
286                 if (mentry->used)
287                         continue;
288                 mentry->used = 1;
289                 if (strcmp(mentry->name, "..") == 0) {
290                         if (archive_strlen(&mtree->current_dir) > 0) {
291                                 /* Roll back current path. */
292                                 p = mtree->current_dir.s
293                                     + mtree->current_dir.length - 1;
294                                 while (p >= mtree->current_dir.s && *p != '/')
295                                         --p;
296                                 if (p >= mtree->current_dir.s)
297                                         --p;
298                                 mtree->current_dir.length
299                                     = p - mtree->current_dir.s + 1;
300                         }
301                         continue;
302                 }
303
304                 mtree->filetype = AE_IFREG;
305
306                 /* Parse options. */
307                 p = mentry->option_start;
308                 while (p < mentry->option_end) {
309                         q = p + strlen(p);
310                         r1 = parse_setting(a, mtree, entry, p, q);
311                         if (r1 != ARCHIVE_OK)
312                                 r = r1;
313                         p = q + 1;
314                 }
315
316                 if (mentry->full) {
317                         archive_entry_copy_pathname(entry, mentry->name);
318                         /*
319                          * "Full" entries are allowed to have multiple
320                          * lines and those lines aren't required to be
321                          * adjacent.  We don't support multiple lines
322                          * for "relative" entries nor do we make any
323                          * attempt to merge data from separate
324                          * "relative" and "full" entries.  (Merging
325                          * "relative" and "full" entries would require
326                          * dealing with pathname canonicalization,
327                          * which is a very tricky subject.)
328                          */
329                         mentry2 = mentry->next;
330                         while (mentry2 != NULL) {
331                                 if (mentry2->full
332                                     && !mentry2->used
333                                     && strcmp(mentry->name, mentry2->name) == 0) {
334                                         /*
335                                          * Add those options as well;
336                                          * later lines override
337                                          * earlier ones.
338                                          */
339                                         p = mentry2->option_start;
340                                         while (p < mentry2->option_end) {
341                                                 q = p + strlen(p);
342                                                 r1 = parse_setting(a, mtree, entry, p, q);
343                                                 if (r1 != ARCHIVE_OK)
344                                                         r = r1;
345                                                 p = q + 1;
346                                         }
347                                         mentry2->used = 1;
348                                 }
349                                 mentry2 = mentry2->next;
350                         }
351                 } else {
352                         /*
353                          * Relative entries require us to construct
354                          * the full path and possibly update the
355                          * current directory.
356                          */
357                         size_t n = archive_strlen(&mtree->current_dir);
358                         if (n > 0)
359                                 archive_strcat(&mtree->current_dir, "/");
360                         archive_strcat(&mtree->current_dir, mentry->name);
361                         archive_entry_copy_pathname(entry, mtree->current_dir.s);
362                         if (archive_entry_filetype(entry) != AE_IFDIR)
363                                 mtree->current_dir.length = n;
364                 }
365
366                 /*
367                  * Try to open and stat the file to get the real size.
368                  * It would be nice to avoid this here so that getting
369                  * a listing of an mtree wouldn't require opening
370                  * every referenced contents file.  But then we
371                  * wouldn't know the actual contents size, so I don't
372                  * see a really viable way around this.  (Also, we may
373                  * want to someday pull other unspecified info from
374                  * the contents file on disk.)
375                  */
376                 if (archive_strlen(&mtree->contents_name) > 0) {
377                         mtree->fd = open(mtree->contents_name.s, O_RDONLY);
378                         if (mtree->fd < 0) {
379                                 archive_set_error(&a->archive, errno,
380                                     "Can't open content=\"%s\"",
381                                     mtree->contents_name.s);
382                                 r = ARCHIVE_WARN;
383                         }
384                 } else {
385                         /* If the specified path opens, use it. */
386                         mtree->fd = open(mtree->current_dir.s, O_RDONLY);
387                         /* But don't fail if it's not there. */
388                 }
389
390                 /*
391                  * If there is a contents file on disk, use that size;
392                  * otherwise leave it as-is (it might have been set from
393                  * the mtree size= keyword).
394                  */
395                 if (mtree->fd >= 0) {
396                         fstat(mtree->fd, &st);
397                         archive_entry_set_size(entry, st.st_size);
398                 }
399
400                 return r;
401         }
402 }
403
404 static int
405 parse_setting(struct archive_read *a, struct mtree *mtree, struct archive_entry *entry, char *key, char *end)
406 {
407         char *val;
408
409
410         if (end == key)
411                 return (ARCHIVE_OK);
412         if (*key == '\0')
413                 return (ARCHIVE_OK);
414
415         val = strchr(key, '=');
416         if (val == NULL) {
417                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
418                     "Malformed attribute \"%s\" (%d)", key, key[0]);
419                 return (ARCHIVE_WARN);
420         }
421
422         *val = '\0';
423         ++val;
424
425         switch (key[0]) {
426         case 'c':
427                 if (strcmp(key, "content") == 0) {
428                         parse_escapes(val, NULL);
429                         archive_strcpy(&mtree->contents_name, val);
430                         break;
431                 }
432         case 'g':
433                 if (strcmp(key, "gid") == 0) {
434                         archive_entry_set_gid(entry, mtree_atol10(&val));
435                         break;
436                 }
437                 if (strcmp(key, "gname") == 0) {
438                         archive_entry_copy_gname(entry, val);
439                         break;
440                 }
441         case 'm':
442                 if (strcmp(key, "mode") == 0) {
443                         if (val[0] == '0') {
444                                 archive_entry_set_perm(entry,
445                                     mtree_atol8(&val));
446                         } else
447                                 archive_set_error(&a->archive,
448                                     ARCHIVE_ERRNO_FILE_FORMAT,
449                                     "Symbolic mode \"%s\" unsupported", val);
450                         break;
451                 }
452         case 't':
453                 if (strcmp(key, "type") == 0) {
454                         switch (val[0]) {
455                         case 'b':
456                                 if (strcmp(val, "block") == 0) {
457                                         mtree->filetype = AE_IFBLK;
458                                         break;
459                                 }
460                         case 'c':
461                                 if (strcmp(val, "char") == 0) {
462                                         mtree->filetype = AE_IFCHR;
463                                         break;
464                                 }
465                         case 'd':
466                                 if (strcmp(val, "dir") == 0) {
467                                         mtree->filetype = AE_IFDIR;
468                                         break;
469                                 }
470                         case 'f':
471                                 if (strcmp(val, "fifo") == 0) {
472                                         mtree->filetype = AE_IFIFO;
473                                         break;
474                                 }
475                                 if (strcmp(val, "file") == 0) {
476                                         mtree->filetype = AE_IFREG;
477                                         break;
478                                 }
479                         case 'l':
480                                 if (strcmp(val, "link") == 0) {
481                                         mtree->filetype = AE_IFLNK;
482                                         break;
483                                 }
484                         default:
485                                 archive_set_error(&a->archive,
486                                     ARCHIVE_ERRNO_FILE_FORMAT,
487                                     "Unrecognized file type \"%s\"", val);
488                                 return (ARCHIVE_WARN);
489                         }
490                         archive_entry_set_filetype(entry, mtree->filetype);
491                         break;
492                 }
493                 if (strcmp(key, "time") == 0) {
494                         archive_entry_set_mtime(entry, mtree_atol10(&val), 0);
495                         break;
496                 }
497         case 'u':
498                 if (strcmp(key, "uid") == 0) {
499                         archive_entry_set_uid(entry, mtree_atol10(&val));
500                         break;
501                 }
502                 if (strcmp(key, "uname") == 0) {
503                         archive_entry_copy_uname(entry, val);
504                         break;
505                 }
506         default:
507                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
508                     "Unrecognized key %s=%s", key, val);
509                 return (ARCHIVE_WARN);
510         }
511         return (ARCHIVE_OK);
512 }
513
514 static int
515 read_data(struct archive_read *a, const void **buff, size_t *size, off_t *offset)
516 {
517         ssize_t bytes_read;
518         struct mtree *mtree;
519
520         mtree = (struct mtree *)(a->format->data);
521         if (mtree->fd < 0) {
522                 *buff = NULL;
523                 *offset = 0;
524                 *size = 0;
525                 return (ARCHIVE_EOF);
526         }
527         if (mtree->buff == NULL) {
528                 mtree->buffsize = 64 * 1024;
529                 mtree->buff = malloc(mtree->buffsize);
530                 if (mtree->buff == NULL) {
531                         archive_set_error(&a->archive, ENOMEM,
532                             "Can't allocate memory");
533                 }
534         }
535
536         *buff = mtree->buff;
537         *offset = mtree->offset;
538         bytes_read = read(mtree->fd, mtree->buff, mtree->buffsize);
539         if (bytes_read < 0) {
540                 archive_set_error(&a->archive, errno, "Can't read");
541                 return (ARCHIVE_WARN);
542         }
543         if (bytes_read == 0) {
544                 *size = 0;
545                 return (ARCHIVE_EOF);
546         }
547         mtree->offset += bytes_read;
548         *size = (size_t)bytes_read;
549         return (ARCHIVE_OK);
550 }
551
552 /* Skip does nothing except possibly close the contents file. */
553 static int
554 skip(struct archive_read *a)
555 {
556         struct mtree *mtree;
557
558         mtree = (struct mtree *)(a->format->data);
559         if (mtree->fd >= 0) {
560                 close(mtree->fd);
561                 mtree->fd = -1;
562         }
563         return (ARCHIVE_OK);
564 }
565
566 /*
567  * Since parsing octal escapes always makes strings shorter,
568  * we can always do this conversion in-place.
569  */
570 static void
571 parse_escapes(char *src, struct mtree_entry *mentry)
572 {
573         char *dest = src;
574         char c;
575
576         while (*src != '\0') {
577                 c = *src++;
578                 if (c == '/' && mentry != NULL)
579                         mentry->full = 1;
580                 if (c == '\\') {
581                         if (src[0] >= '0' && src[0] <= '3'
582                             && src[1] >= '0' && src[1] <= '7'
583                             && src[2] >= '0' && src[2] <= '7') {
584                                 c = (src[0] - '0') << 6;
585                                 c |= (src[1] - '0') << 3;
586                                 c |= (src[2] - '0');
587                                 src += 3;
588                         }
589                 }
590                 *dest++ = c;
591         }
592         *dest = '\0';
593 }
594
595 /*
596  * Note that this implementation does not (and should not!) obey
597  * locale settings; you cannot simply substitute strtol here, since
598  * it does obey locale.
599  */
600 static int64_t
601 mtree_atol8(char **p)
602 {
603         int64_t l, limit, last_digit_limit;
604         int digit, base;
605
606         base = 8;
607         limit = INT64_MAX / base;
608         last_digit_limit = INT64_MAX % base;
609
610         l = 0;
611         digit = **p - '0';
612         while (digit >= 0 && digit < base) {
613                 if (l>limit || (l == limit && digit > last_digit_limit)) {
614                         l = INT64_MAX; /* Truncate on overflow. */
615                         break;
616                 }
617                 l = (l * base) + digit;
618                 digit = *++(*p) - '0';
619         }
620         return (l);
621 }
622
623 /*
624  * Note that this implementation does not (and should not!) obey
625  * locale settings; you cannot simply substitute strtol here, since
626  * it does obey locale.
627  */
628 static int64_t
629 mtree_atol10(char **p)
630 {
631         int64_t l, limit, last_digit_limit;
632         int base, digit, sign;
633
634         base = 10;
635         limit = INT64_MAX / base;
636         last_digit_limit = INT64_MAX % base;
637
638         if (**p == '-') {
639                 sign = -1;
640                 ++(*p);
641         } else
642                 sign = 1;
643
644         l = 0;
645         digit = **p - '0';
646         while (digit >= 0 && digit < base) {
647                 if (l > limit || (l == limit && digit > last_digit_limit)) {
648                         l = UINT64_MAX; /* Truncate on overflow. */
649                         break;
650                 }
651                 l = (l * base) + digit;
652                 digit = *++(*p) - '0';
653         }
654         return (sign < 0) ? -l : l;
655 }
656
657 /*
658  * Returns length of line (including trailing newline)
659  * or negative on error.  'start' argument is updated to
660  * point to first character of line.
661  */
662 static ssize_t
663 readline(struct archive_read *a, struct mtree *mtree, char **start, ssize_t limit)
664 {
665         ssize_t bytes_read;
666         ssize_t total_size = 0;
667         const void *t;
668         const char *s;
669         void *p;
670
671         /* Accumulate line in a line buffer. */
672         for (;;) {
673                 /* Read some more. */
674                 bytes_read = (a->decompressor->read_ahead)(a, &t, 1);
675                 if (bytes_read == 0)
676                         return (0);
677                 if (bytes_read < 0)
678                         return (ARCHIVE_FATAL);
679                 s = t;  /* Start of line? */
680                 p = memchr(t, '\n', bytes_read);
681                 /* If we found '\n', trim the read. */
682                 if (p != NULL) {
683                         bytes_read = 1 + ((const char *)p) - s;
684                 }
685                 if (total_size + bytes_read + 1 > limit) {
686                         archive_set_error(&a->archive,
687                             ARCHIVE_ERRNO_FILE_FORMAT,
688                             "Line too long");
689                         return (ARCHIVE_FATAL);
690                 }
691                 if (archive_string_ensure(&mtree->line,
692                         total_size + bytes_read + 1) == NULL) {
693                         archive_set_error(&a->archive, ENOMEM,
694                             "Can't allocate working buffer");
695                         return (ARCHIVE_FATAL);
696                 }
697                 memcpy(mtree->line.s + total_size, t, bytes_read);
698                 (a->decompressor->consume)(a, bytes_read);
699                 total_size += bytes_read;
700                 /* Null terminate. */
701                 mtree->line.s[total_size] = '\0';
702                 /* If we found '\n', clean up and return. */
703                 if (p != NULL) {
704                         *start = mtree->line.s;
705                         return (total_size);
706                 }
707         }
708 }