2 * Copyright (c) 2004 Tim Kientzle
3 * Copyright (c) 2011 Michihiro NAKAJIMA
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "archive_platform.h"
28 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_zip.c 201102 2009-12-28 03:11:36Z kientzle $");
41 #include "archive_entry.h"
42 #include "archive_entry_locale.h"
43 #include "archive_private.h"
44 #include "archive_read_private.h"
45 #include "archive_endian.h"
48 #include "archive_crc32.h"
52 int64_t local_header_offset;
53 int64_t compressed_size;
54 int64_t uncompressed_size;
57 struct archive_entry *entry;
69 /* Structural information about the archive. */
70 int64_t central_directory_offset;
71 size_t central_directory_size;
72 size_t central_directory_entries;
73 char have_central_directory;
75 /* List of entries (seekable Zip only) */
76 size_t entries_remaining;
77 struct zip_entry *zip_entries;
78 struct zip_entry *entry;
82 /* entry_bytes_remaining is the number of bytes we expect. */
83 int64_t entry_bytes_remaining;
85 /* These count the number of bytes actually read for the entry. */
86 int64_t entry_compressed_bytes_read;
87 int64_t entry_uncompressed_bytes_read;
89 /* Running CRC32 of the decompressed data */
90 unsigned long entry_crc32;
92 /* Flags to mark progress of decompression. */
96 ssize_t filename_length;
99 unsigned char *uncompressed_buffer;
100 size_t uncompressed_buffer_size;
106 struct archive_string extra;
107 struct archive_string_conv *sconv;
108 struct archive_string_conv *sconv_default;
109 struct archive_string_conv *sconv_utf8;
110 int init_default_conversion;
111 char format_name[64];
114 #define ZIP_LENGTH_AT_END 8
115 #define ZIP_ENCRYPTED (1<<0)
116 #define ZIP_STRONG_ENCRYPTED (1<<6)
117 #define ZIP_UTF8_NAME (1<<11)
119 static int archive_read_format_zip_streamable_bid(struct archive_read *, int);
120 static int archive_read_format_zip_seekable_bid(struct archive_read *, int);
121 static int archive_read_format_zip_options(struct archive_read *,
122 const char *, const char *);
123 static int archive_read_format_zip_cleanup(struct archive_read *);
124 static int archive_read_format_zip_read_data(struct archive_read *,
125 const void **, size_t *, int64_t *);
126 static int archive_read_format_zip_read_data_skip(struct archive_read *a);
127 static int archive_read_format_zip_seekable_read_header(struct archive_read *,
128 struct archive_entry *);
129 static int archive_read_format_zip_streamable_read_header(struct archive_read *,
130 struct archive_entry *);
132 static int zip_read_data_deflate(struct archive_read *a, const void **buff,
133 size_t *size, int64_t *offset);
135 static int zip_read_data_none(struct archive_read *a, const void **buff,
136 size_t *size, int64_t *offset);
137 static int zip_read_local_file_header(struct archive_read *a,
138 struct archive_entry *entry, struct zip *);
139 static time_t zip_time(const char *);
140 static const char *compression_name(int compression);
141 static void process_extra(const char *, size_t, struct zip_entry *);
144 archive_read_support_format_zip_streamable(struct archive *_a)
146 struct archive_read *a = (struct archive_read *)_a;
150 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
151 ARCHIVE_STATE_NEW, "archive_read_support_format_zip");
153 zip = (struct zip *)malloc(sizeof(*zip));
155 archive_set_error(&a->archive, ENOMEM,
156 "Can't allocate zip data");
157 return (ARCHIVE_FATAL);
159 memset(zip, 0, sizeof(*zip));
161 r = __archive_read_register_format(a,
164 archive_read_format_zip_streamable_bid,
165 archive_read_format_zip_options,
166 archive_read_format_zip_streamable_read_header,
167 archive_read_format_zip_read_data,
168 archive_read_format_zip_read_data_skip,
169 archive_read_format_zip_cleanup);
177 archive_read_support_format_zip_seekable(struct archive *_a)
179 struct archive_read *a = (struct archive_read *)_a;
183 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
184 ARCHIVE_STATE_NEW, "archive_read_support_format_zip_seekable");
186 zip = (struct zip *)malloc(sizeof(*zip));
188 archive_set_error(&a->archive, ENOMEM,
189 "Can't allocate zip data");
190 return (ARCHIVE_FATAL);
192 memset(zip, 0, sizeof(*zip));
194 r = __archive_read_register_format(a,
197 archive_read_format_zip_seekable_bid,
198 archive_read_format_zip_options,
199 archive_read_format_zip_seekable_read_header,
200 archive_read_format_zip_read_data,
201 archive_read_format_zip_read_data_skip,
202 archive_read_format_zip_cleanup);
210 archive_read_support_format_zip(struct archive *a)
213 r = archive_read_support_format_zip_streamable(a);
216 return (archive_read_support_format_zip_seekable(a));
220 * TODO: This is a performance sink because it forces
221 * the read core to drop buffered data from the start
222 * of file, which will then have to be re-read again
223 * if this bidder loses.
225 * Consider passing in the winning bid value to subsequent
226 * bidders so that this bidder in particular can avoid
227 * seeking if it knows it's going to lose anyway.
230 archive_read_format_zip_seekable_bid(struct archive_read *a, int best_bid)
232 struct zip *zip = (struct zip *)a->format->data;
236 /* If someone has already bid more than 32, then avoid
237 trashing the look-ahead buffers with a seek. */
241 filesize = __archive_read_seek(a, -22, SEEK_END);
242 /* If we can't seek, then we can't bid. */
246 /* TODO: More robust search for end of central directory record. */
247 if ((p = __archive_read_ahead(a, 22, NULL)) == NULL)
249 /* First four bytes are signature for end of central directory
250 record. Four zero bytes ensure this isn't a multi-volume
251 Zip file (which we don't yet support). */
252 if (memcmp(p, "PK\005\006\000\000\000\000", 8) != 0)
255 /* Since we've already done the hard work of finding the
256 end of central directory record, let's save the important
258 zip->central_directory_entries = archive_le16dec(p + 10);
259 zip->central_directory_size = archive_le32dec(p + 12);
260 zip->central_directory_offset = archive_le32dec(p + 16);
262 /* Just one volume, so central dir must all be on this volume. */
263 if (zip->central_directory_entries != archive_le16dec(p + 8))
265 /* Central directory can't extend beyond end of this file. */
266 if (zip->central_directory_offset + zip->central_directory_size > filesize)
269 /* This is just a tiny bit higher than the maximum returned by
270 the streaming Zip bidder. This ensures that the more accurate
271 seeking Zip parser wins whenever seek is available. */
276 slurp_central_directory(struct archive_read *a, struct zip *zip)
280 __archive_read_seek(a, zip->central_directory_offset, SEEK_SET);
282 zip->zip_entries = calloc(zip->central_directory_entries, sizeof(struct zip_entry));
283 for (i = 0; i < zip->central_directory_entries; ++i) {
284 struct zip_entry *zip_entry = &zip->zip_entries[i];
285 size_t filename_length, extra_length, comment_length;
286 uint32_t external_attributes;
289 if ((p = __archive_read_ahead(a, 46, NULL)) == NULL)
290 return ARCHIVE_FATAL;
291 if (memcmp(p, "PK\001\002", 4) != 0) {
292 archive_set_error(&a->archive,
293 -1, "Invalid central directory signature");
294 return ARCHIVE_FATAL;
296 zip->have_central_directory = 1;
297 /* version = p[4]; */
298 zip_entry->system = p[5];
299 /* version_required = archive_le16dec(p + 6); */
300 zip_entry->flags = archive_le16dec(p + 8);
301 zip_entry->compression = archive_le16dec(p + 10);
302 zip_entry->mtime = zip_time(p + 12);
303 zip_entry->crc32 = archive_le32dec(p + 16);
304 zip_entry->compressed_size = archive_le32dec(p + 20);
305 zip_entry->uncompressed_size = archive_le32dec(p + 24);
306 filename_length = archive_le16dec(p + 28);
307 extra_length = archive_le16dec(p + 30);
308 comment_length = archive_le16dec(p + 32);
309 /* disk_start = archive_le16dec(p + 34); */ /* Better be zero. */
310 /* internal_attributes = archive_le16dec(p + 36); */ /* text bit */
311 external_attributes = archive_le32dec(p + 38);
312 zip_entry->local_header_offset = archive_le32dec(p + 42);
314 if (zip_entry->system == 3) {
315 zip_entry->mode = external_attributes >> 16;
317 zip_entry->mode = AE_IFREG | 0777;
320 /* Do we need to parse filename here? */
321 /* Or can we wait until we read the local header? */
322 __archive_read_consume(a,
323 46 + filename_length + extra_length + comment_length);
326 /* TODO: Sort zip entries. */
332 archive_read_format_zip_seekable_read_header(struct archive_read *a,
333 struct archive_entry *entry)
335 struct zip *zip = (struct zip *)a->format->data;
338 a->archive.archive_format = ARCHIVE_FORMAT_ZIP;
339 if (a->archive.archive_format_name == NULL)
340 a->archive.archive_format_name = "ZIP";
342 if (zip->zip_entries == NULL) {
343 r = slurp_central_directory(a, zip);
344 zip->entries_remaining = zip->central_directory_entries;
347 zip->entry = zip->zip_entries;
352 if (zip->entries_remaining <= 0)
354 --zip->entries_remaining;
356 /* TODO: If entries are sorted by offset within the file, we
357 should be able to skip here instead of seeking. Skipping is
358 typically faster (easier for I/O layer to optimize). */
359 __archive_read_seek(a, zip->entry->local_header_offset, SEEK_SET);
361 r = zip_read_local_file_header(a, entry, zip);
364 if ((zip->entry->mode & AE_IFMT) == AE_IFLNK) {
366 size_t linkname_length = archive_entry_size(entry);
368 archive_entry_set_size(entry, 0);
369 p = __archive_read_ahead(a, linkname_length, NULL);
371 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
372 "Truncated Zip file");
373 return ARCHIVE_FATAL;
376 if (archive_entry_copy_symlink_l(entry, p, linkname_length,
378 /* NOTE: If the last argument is NULL, this will
379 * fail only by memeory allocation failure. */
380 archive_set_error(&a->archive, ENOMEM,
381 "Can't allocate memory for Symlink");
382 return (ARCHIVE_FATAL);
384 /* TODO: handle character-set issues? */
390 archive_read_format_zip_streamable_bid(struct archive_read *a, int best_bid)
394 (void)best_bid; /* UNUSED */
396 if ((p = __archive_read_ahead(a, 4, NULL)) == NULL)
400 * Bid of 30 here is: 16 bits for "PK",
401 * next 16-bit field has four options (-2 bits).
404 if (p[0] == 'P' && p[1] == 'K') {
405 if ((p[2] == '\001' && p[3] == '\002')
406 || (p[2] == '\003' && p[3] == '\004')
407 || (p[2] == '\005' && p[3] == '\006')
408 || (p[2] == '\007' && p[3] == '\010')
409 || (p[2] == '0' && p[3] == '0'))
417 archive_read_format_zip_options(struct archive_read *a,
418 const char *key, const char *val)
421 int ret = ARCHIVE_FAILED;
423 zip = (struct zip *)(a->format->data);
424 if (strcmp(key, "compat-2x") == 0) {
425 /* Handle filnames as libarchive 2.x */
426 zip->init_default_conversion = (val != NULL) ? 1 : 0;
428 } else if (strcmp(key, "hdrcharset") == 0) {
429 if (val == NULL || val[0] == 0)
430 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
431 "zip: hdrcharset option needs a character-set name");
433 zip->sconv = archive_string_conversion_from_charset(
434 &a->archive, val, 0);
435 if (zip->sconv != NULL) {
436 if (strcmp(val, "UTF-8") == 0)
437 zip->sconv_utf8 = zip->sconv;
443 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
444 "zip: unknown keyword ``%s''", key);
450 archive_read_format_zip_streamable_read_header(struct archive_read *a,
451 struct archive_entry *entry)
455 a->archive.archive_format = ARCHIVE_FORMAT_ZIP;
456 if (a->archive.archive_format_name == NULL)
457 a->archive.archive_format_name = "ZIP";
459 zip = (struct zip *)(a->format->data);
461 /* Make sure we have a zip_entry structure to use. */
462 if (zip->zip_entries == NULL) {
463 zip->zip_entries = malloc(sizeof(struct zip_entry));
464 if (zip->zip_entries == NULL) {
465 archive_set_error(&a->archive, ENOMEM, "Out of memory");
466 return ARCHIVE_FATAL;
469 zip->entry = zip->zip_entries;
470 memset(zip->entry, 0, sizeof(struct zip_entry));
472 /* Search ahead for the next local file header. */
473 __archive_read_consume(a, zip->unconsumed);
480 p = __archive_read_ahead(a, 4, &bytes);
482 return (ARCHIVE_FATAL);
485 while (p + 4 <= end) {
486 if (p[0] == 'P' && p[1] == 'K') {
487 if (p[2] == '\001' && p[3] == '\002')
488 /* Beginning of central directory. */
489 return (ARCHIVE_EOF);
491 if (p[2] == '\003' && p[3] == '\004') {
492 /* Regular file entry. */
493 __archive_read_consume(a, skipped);
494 return zip_read_local_file_header(a, entry, zip);
497 if (p[2] == '\005' && p[3] == '\006')
498 /* End of central directory. */
499 return (ARCHIVE_EOF);
504 __archive_read_consume(a, skipped);
509 * Assumes file pointer is at beginning of local file header.
512 zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry,
519 size_t len, filename_length, extra_length;
520 struct archive_string_conv *sconv;
521 struct zip_entry *zip_entry = zip->entry;
522 uint32_t local_crc32;
523 int64_t compressed_size, uncompressed_size;
524 int ret = ARCHIVE_OK;
527 zip->decompress_init = 0;
528 zip->end_of_entry = 0;
529 zip->entry_uncompressed_bytes_read = 0;
530 zip->entry_compressed_bytes_read = 0;
531 zip->entry_crc32 = crc32(0, NULL, 0);
533 /* Setup default conversion. */
534 if (zip->sconv == NULL && !zip->init_default_conversion) {
536 archive_string_default_conversion_for_read(&(a->archive));
537 zip->init_default_conversion = 1;
540 if ((p = __archive_read_ahead(a, 30, NULL)) == NULL) {
541 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
542 "Truncated ZIP file header");
543 return (ARCHIVE_FATAL);
546 if (memcmp(p, "PK\003\004", 4) != 0) {
547 archive_set_error(&a->archive, -1, "Damaged Zip archive");
548 return ARCHIVE_FATAL;
551 zip_entry->system = p[5];
552 zip_entry->flags = archive_le16dec(p + 6);
553 zip_entry->compression = archive_le16dec(p + 8);
554 zip_entry->mtime = zip_time(p + 10);
555 local_crc32 = archive_le32dec(p + 14);
556 compressed_size = archive_le32dec(p + 18);
557 uncompressed_size = archive_le32dec(p + 22);
558 filename_length = archive_le16dec(p + 26);
559 extra_length = archive_le16dec(p + 28);
561 __archive_read_consume(a, 30);
563 if (zip->have_central_directory) {
564 /* If we read the central dir entry, we must have size information
565 as well, so ignore the length-at-end flag. */
566 zip_entry->flags &= ~ZIP_LENGTH_AT_END;
567 /* If we have values from both the local file header
568 and the central directory, warn about mismatches
569 which might indicate a damaged file. But some
570 writers always put zero in the local header; don't
571 bother warning about that. */
572 if (local_crc32 != 0 && local_crc32 != zip_entry->crc32) {
573 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
574 "Inconsistent CRC32 values");
577 if (compressed_size != 0
578 && compressed_size != zip_entry->compressed_size) {
579 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
580 "Inconsistent compressed size");
583 if (uncompressed_size != 0
584 && uncompressed_size != zip_entry->uncompressed_size) {
585 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
586 "Inconsistent uncompressed size");
590 /* If we don't have the CD info, use whatever we do have. */
591 zip_entry->crc32 = local_crc32;
592 zip_entry->compressed_size = compressed_size;
593 zip_entry->uncompressed_size = uncompressed_size;
596 /* Read the filename. */
597 if ((h = __archive_read_ahead(a, filename_length, NULL)) == NULL) {
598 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
599 "Truncated ZIP file header");
600 return (ARCHIVE_FATAL);
602 if (zip_entry->flags & ZIP_UTF8_NAME) {
603 /* The filename is stored to be UTF-8. */
604 if (zip->sconv_utf8 == NULL) {
606 archive_string_conversion_from_charset(
607 &a->archive, "UTF-8", 1);
608 if (zip->sconv_utf8 == NULL)
609 return (ARCHIVE_FATAL);
611 sconv = zip->sconv_utf8;
612 } else if (zip->sconv != NULL)
615 sconv = zip->sconv_default;
617 if (archive_entry_copy_pathname_l(entry,
618 h, filename_length, sconv) != 0) {
619 if (errno == ENOMEM) {
620 archive_set_error(&a->archive, ENOMEM,
621 "Can't allocate memory for Pathname");
622 return (ARCHIVE_FATAL);
624 archive_set_error(&a->archive,
625 ARCHIVE_ERRNO_FILE_FORMAT,
626 "Pathname cannot be converted "
627 "from %s to current locale.",
628 archive_string_conversion_charset_name(sconv));
631 __archive_read_consume(a, filename_length);
633 if (zip_entry->mode == 0) {
634 /* Especially in streaming mode, we can end up
635 here without having seen any mode information.
636 Guess from the filename. */
637 wp = archive_entry_pathname_w(entry);
640 if (len > 0 && wp[len - 1] == L'/')
641 zip_entry->mode = AE_IFDIR | 0777;
643 zip_entry->mode = AE_IFREG | 0777;
645 cp = archive_entry_pathname(entry);
646 len = (cp != NULL)?strlen(cp):0;
647 if (len > 0 && cp[len - 1] == '/')
648 zip_entry->mode = AE_IFDIR | 0777;
650 zip_entry->mode = AE_IFREG | 0777;
654 /* Read the extra data. */
655 if ((h = __archive_read_ahead(a, extra_length, NULL)) == NULL) {
656 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
657 "Truncated ZIP file header");
658 return (ARCHIVE_FATAL);
660 process_extra(h, extra_length, zip_entry);
661 __archive_read_consume(a, extra_length);
663 /* Populate some additional entry fields: */
664 archive_entry_set_mode(entry, zip_entry->mode);
665 archive_entry_set_uid(entry, zip_entry->uid);
666 archive_entry_set_gid(entry, zip_entry->gid);
667 archive_entry_set_mtime(entry, zip_entry->mtime, 0);
668 archive_entry_set_ctime(entry, zip_entry->ctime, 0);
669 archive_entry_set_atime(entry, zip_entry->atime, 0);
670 /* Set the size only if it's meaningful. */
671 if (0 == (zip_entry->flags & ZIP_LENGTH_AT_END))
672 archive_entry_set_size(entry, zip_entry->uncompressed_size);
674 zip->entry_bytes_remaining = zip_entry->compressed_size;
676 /* If there's no body, force read_data() to return EOF immediately. */
677 if (0 == (zip_entry->flags & ZIP_LENGTH_AT_END)
678 && zip->entry_bytes_remaining < 1)
679 zip->end_of_entry = 1;
681 /* Set up a more descriptive format name. */
682 sprintf(zip->format_name, "ZIP %d.%d (%s)",
683 version / 10, version % 10,
684 compression_name(zip->entry->compression));
685 a->archive.archive_format_name = zip->format_name;
691 compression_name(int compression)
693 static const char *compression_names[] = {
706 sizeof(compression_names)/sizeof(compression_names[0]))
707 return compression_names[compression];
712 /* Convert an MSDOS-style date/time into Unix-style time. */
714 zip_time(const char *p)
719 msTime = (0xff & (unsigned)p[0]) + 256 * (0xff & (unsigned)p[1]);
720 msDate = (0xff & (unsigned)p[2]) + 256 * (0xff & (unsigned)p[3]);
722 memset(&ts, 0, sizeof(ts));
723 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */
724 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */
725 ts.tm_mday = msDate & 0x1f; /* Day of month. */
726 ts.tm_hour = (msTime >> 11) & 0x1f;
727 ts.tm_min = (msTime >> 5) & 0x3f;
728 ts.tm_sec = (msTime << 1) & 0x3e;
734 archive_read_format_zip_read_data(struct archive_read *a,
735 const void **buff, size_t *size, int64_t *offset)
738 struct zip *zip = (struct zip *)(a->format->data);
740 *offset = zip->entry_uncompressed_bytes_read;
744 /* If we hit end-of-entry last time, return ARCHIVE_EOF. */
745 if (zip->end_of_entry)
746 return (ARCHIVE_EOF);
748 /* Return EOF immediately if this is a non-regular file. */
749 if (AE_IFREG != (zip->entry->mode & AE_IFMT))
750 return (ARCHIVE_EOF);
752 if (zip->entry->flags & (ZIP_ENCRYPTED | ZIP_STRONG_ENCRYPTED)) {
753 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
754 "Encrypted file is unsupported");
755 return (ARCHIVE_FAILED);
758 __archive_read_consume(a, zip->unconsumed);
761 switch(zip->entry->compression) {
762 case 0: /* No compression. */
763 r = zip_read_data_none(a, buff, size, offset);
766 case 8: /* Deflate compression. */
767 r = zip_read_data_deflate(a, buff, size, offset);
770 default: /* Unsupported compression. */
771 /* Return a warning. */
772 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
773 "Unsupported ZIP compression method (%s)",
774 compression_name(zip->entry->compression));
775 /* We can't decompress this entry, but we will
776 * be able to skip() it and try the next entry. */
777 return (ARCHIVE_FAILED);
782 /* Update checksum */
784 zip->entry_crc32 = crc32(zip->entry_crc32, *buff, *size);
785 /* If we hit the end, swallow any end-of-data marker. */
786 if (zip->end_of_entry) {
787 /* Check file size, CRC against these values. */
788 if (zip->entry->compressed_size != zip->entry_compressed_bytes_read) {
789 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
790 "ZIP compressed data is wrong size (read %jd, expected %jd)",
791 (intmax_t)zip->entry_compressed_bytes_read,
792 (intmax_t)zip->entry->compressed_size);
793 return (ARCHIVE_WARN);
795 /* Size field only stores the lower 32 bits of the actual
797 if ((zip->entry->uncompressed_size & UINT32_MAX)
798 != (zip->entry_uncompressed_bytes_read & UINT32_MAX)) {
799 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
800 "ZIP uncompressed data is wrong size (read %jd, expected %jd)",
801 (intmax_t)zip->entry_uncompressed_bytes_read,
802 (intmax_t)zip->entry->uncompressed_size);
803 return (ARCHIVE_WARN);
805 /* Check computed CRC against header */
806 if (zip->entry->crc32 != zip->entry_crc32) {
807 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
808 "ZIP bad CRC: 0x%lx should be 0x%lx",
809 (unsigned long)zip->entry_crc32,
810 (unsigned long)zip->entry->crc32);
811 return (ARCHIVE_WARN);
819 * Read "uncompressed" data. There are three cases:
820 * 1) We know the size of the data. This is always true for the
821 * seeking reader (we've examined the Central Directory already).
822 * 2) ZIP_LENGTH_AT_END was set, but only the CRC was deferred.
823 * Info-ZIP seems to do this; we know the size but have to grab
824 * the CRC from the data descriptor afterwards.
825 * 3) We're streaming and ZIP_LENGTH_AT_END was specified and
826 * we have no size information. In this case, we can do pretty
827 * well by watching for the data descriptor record. The data
828 * descriptor is 16 bytes and includes a computed CRC that should
829 * provide a strong check.
831 * TODO: Technically, the PK\007\010 signature is optional.
832 * In the original spec, the data descriptor contained CRC
833 * and size fields but had no leading signature. In practice,
834 * newer writers seem to provide the signature pretty consistently,
835 * but we might need to do something more complex here if
836 * we want to handle older archives that lack that signature.
838 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
839 * zip->end_of_entry if it consumes all of the data.
842 zip_read_data_none(struct archive_read *a, const void **_buff,
843 size_t *size, int64_t *offset)
849 zip = (struct zip *)(a->format->data);
851 if (zip->entry->flags & ZIP_LENGTH_AT_END) {
854 /* Grab at least 16 bytes. */
855 buff = __archive_read_ahead(a, 16, &bytes_avail);
856 if (bytes_avail < 16) {
857 /* Zip archives have end-of-archive markers
858 that are longer than this, so a failure to get at
859 least 16 bytes really does indicate a truncated
861 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
862 "Truncated ZIP file data");
863 return (ARCHIVE_FATAL);
865 /* Check for a complete PK\007\010 signature. */
867 if (p[0] == 'P' && p[1] == 'K'
868 && p[2] == '\007' && p[3] == '\010'
869 && archive_le32dec(p + 4) == zip->entry_crc32
870 && archive_le32dec(p + 8) == zip->entry_compressed_bytes_read
871 && archive_le32dec(p + 12) == zip->entry_uncompressed_bytes_read) {
872 zip->entry->crc32 = archive_le32dec(p + 4);
873 zip->entry->compressed_size = archive_le32dec(p + 8);
874 zip->entry->uncompressed_size = archive_le32dec(p + 12);
875 zip->end_of_entry = 1;
876 zip->unconsumed = 16;
879 /* If not at EOF, ensure we consume at least one byte. */
882 /* Scan forward until we see where a PK\007\010 signature might be. */
883 /* Return bytes up until that point. On the next call, the code
884 above will verify the data descriptor. */
885 while (p < buff + bytes_avail - 4) {
886 if (p[3] == 'P') { p += 3; }
887 else if (p[3] == 'K') { p += 2; }
888 else if (p[3] == '\007') { p += 1; }
889 else if (p[3] == '\010' && p[2] == '\007'
890 && p[1] == 'K' && p[0] == 'P') {
894 bytes_avail = p - buff;
896 if (zip->entry_bytes_remaining == 0) {
897 zip->end_of_entry = 1;
900 /* Grab a bunch of bytes. */
901 buff = __archive_read_ahead(a, 1, &bytes_avail);
902 if (bytes_avail <= 0) {
903 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
904 "Truncated ZIP file data");
905 return (ARCHIVE_FATAL);
907 if (bytes_avail > zip->entry_bytes_remaining)
908 bytes_avail = zip->entry_bytes_remaining;
911 zip->entry_bytes_remaining -= bytes_avail;
912 zip->entry_uncompressed_bytes_read += bytes_avail;
913 zip->entry_compressed_bytes_read += bytes_avail;
914 zip->unconsumed += bytes_avail;
921 zip_read_data_deflate(struct archive_read *a, const void **buff,
922 size_t *size, int64_t *offset)
926 const void *compressed_buff;
929 zip = (struct zip *)(a->format->data);
931 /* If the buffer hasn't been allocated, allocate it now. */
932 if (zip->uncompressed_buffer == NULL) {
933 zip->uncompressed_buffer_size = 256 * 1024;
934 zip->uncompressed_buffer
935 = (unsigned char *)malloc(zip->uncompressed_buffer_size);
936 if (zip->uncompressed_buffer == NULL) {
937 archive_set_error(&a->archive, ENOMEM,
938 "No memory for ZIP decompression");
939 return (ARCHIVE_FATAL);
943 /* If we haven't yet read any data, initialize the decompressor. */
944 if (!zip->decompress_init) {
945 if (zip->stream_valid)
946 r = inflateReset(&zip->stream);
948 r = inflateInit2(&zip->stream,
949 -15 /* Don't check for zlib header */);
951 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
952 "Can't initialize ZIP decompression.");
953 return (ARCHIVE_FATAL);
955 /* Stream structure has been set up. */
956 zip->stream_valid = 1;
957 /* We've initialized decompression for this stream. */
958 zip->decompress_init = 1;
962 * Note: '1' here is a performance optimization.
963 * Recall that the decompression layer returns a count of
964 * available bytes; asking for more than that forces the
965 * decompressor to combine reads by copying data.
967 compressed_buff = __archive_read_ahead(a, 1, &bytes_avail);
968 if (0 == (zip->entry->flags & ZIP_LENGTH_AT_END)
969 && bytes_avail > zip->entry_bytes_remaining) {
970 bytes_avail = zip->entry_bytes_remaining;
972 if (bytes_avail <= 0) {
973 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
974 "Truncated ZIP file body");
975 return (ARCHIVE_FATAL);
979 * A bug in zlib.h: stream.next_in should be marked 'const'
980 * but isn't (the library never alters data through the
981 * next_in pointer, only reads it). The result: this ugly
982 * cast to remove 'const'.
984 zip->stream.next_in = (Bytef *)(uintptr_t)(const void *)compressed_buff;
985 zip->stream.avail_in = bytes_avail;
986 zip->stream.total_in = 0;
987 zip->stream.next_out = zip->uncompressed_buffer;
988 zip->stream.avail_out = zip->uncompressed_buffer_size;
989 zip->stream.total_out = 0;
991 r = inflate(&zip->stream, 0);
996 zip->end_of_entry = 1;
999 archive_set_error(&a->archive, ENOMEM,
1000 "Out of memory for ZIP decompression");
1001 return (ARCHIVE_FATAL);
1003 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1004 "ZIP decompression failed (%d)", r);
1005 return (ARCHIVE_FATAL);
1008 /* Consume as much as the compressor actually used. */
1009 bytes_avail = zip->stream.total_in;
1010 __archive_read_consume(a, bytes_avail);
1011 zip->entry_bytes_remaining -= bytes_avail;
1012 zip->entry_compressed_bytes_read += bytes_avail;
1014 *size = zip->stream.total_out;
1015 zip->entry_uncompressed_bytes_read += zip->stream.total_out;
1016 *buff = zip->uncompressed_buffer;
1018 if (zip->end_of_entry && (zip->entry->flags & ZIP_LENGTH_AT_END)) {
1021 if (NULL == (p = __archive_read_ahead(a, 16, NULL))) {
1022 archive_set_error(&a->archive,
1023 ARCHIVE_ERRNO_FILE_FORMAT,
1024 "Truncated ZIP end-of-file record");
1025 return (ARCHIVE_FATAL);
1027 /* Consume the optional PK\007\010 marker. */
1028 if (p[0] == 'P' && p[1] == 'K' && p[2] == '\007' && p[3] == '\010') {
1029 zip->entry->crc32 = archive_le32dec(p + 4);
1030 zip->entry->compressed_size = archive_le32dec(p + 8);
1031 zip->entry->uncompressed_size = archive_le32dec(p + 12);
1032 zip->unconsumed = 16;
1036 return (ARCHIVE_OK);
1041 archive_read_format_zip_read_data_skip(struct archive_read *a)
1045 zip = (struct zip *)(a->format->data);
1047 /* If we've already read to end of data, we're done. */
1048 if (zip->end_of_entry)
1049 return (ARCHIVE_OK);
1050 /* If we're seeking, we're done. */
1051 if (zip->have_central_directory)
1052 return (ARCHIVE_OK);
1054 /* So we know we're streaming... */
1055 if (0 == (zip->entry->flags & ZIP_LENGTH_AT_END)) {
1056 /* We know the compressed length, so we can just skip. */
1057 int64_t bytes_skipped = __archive_read_consume(a,
1058 zip->entry_bytes_remaining + zip->unconsumed);
1059 if (bytes_skipped < 0)
1060 return (ARCHIVE_FATAL);
1061 zip->unconsumed = 0;
1062 return (ARCHIVE_OK);
1065 /* We're streaming and we don't know the length. */
1066 /* If the body is compressed and we know the format, we can
1067 * find an exact end-of-entry by decompressing it. */
1068 switch (zip->entry->compression) {
1070 case 8: /* Deflate compression. */
1071 while (!zip->end_of_entry) {
1073 const void *buff = NULL;
1076 r = zip_read_data_deflate(a, &buff, &size, &offset);
1077 if (r != ARCHIVE_OK)
1082 default: /* Uncompressed or unknown. */
1083 /* Scan for a PK\007\010 signature. */
1084 __archive_read_consume(a, zip->unconsumed);
1085 zip->unconsumed = 0;
1087 const char *p, *buff;
1088 ssize_t bytes_avail;
1089 buff = __archive_read_ahead(a, 16, &bytes_avail);
1090 if (bytes_avail < 16) {
1091 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1092 "Truncated ZIP file data");
1093 return (ARCHIVE_FATAL);
1096 while (p < buff + bytes_avail - 16) {
1097 if (p[3] == 'P') { p += 3; }
1098 else if (p[3] == 'K') { p += 2; }
1099 else if (p[3] == '\007') { p += 1; }
1100 else if (p[3] == '\010' && p[2] == '\007'
1101 && p[1] == 'K' && p[0] == 'P') {
1102 __archive_read_consume(a, p - buff + 16);
1106 __archive_read_consume(a, p - buff);
1113 archive_read_format_zip_cleanup(struct archive_read *a)
1117 zip = (struct zip *)(a->format->data);
1119 if (zip->stream_valid)
1120 inflateEnd(&zip->stream);
1122 free(zip->zip_entries);
1123 free(zip->uncompressed_buffer);
1124 archive_string_free(&(zip->extra));
1126 (a->format->data) = NULL;
1127 return (ARCHIVE_OK);
1131 * The extra data is stored as a list of
1132 * id1+size1+data1 + id2+size2+data2 ...
1133 * triplets. id and size are 2 bytes each.
1136 process_extra(const char *p, size_t extra_length, struct zip_entry* zip_entry)
1138 unsigned offset = 0;
1140 while (offset < extra_length - 4)
1142 unsigned short headerid = archive_le16dec(p + offset);
1143 unsigned short datasize = archive_le16dec(p + offset + 2);
1145 if (offset + datasize > extra_length)
1148 fprintf(stderr, "Header id 0x%x, length %d\n",
1149 headerid, datasize);
1153 /* Zip64 extended information extra field. */
1155 zip_entry->uncompressed_size =
1156 archive_le64dec(p + offset);
1158 zip_entry->compressed_size =
1159 archive_le64dec(p + offset + 8);
1163 /* Extended time field "UT". */
1164 int flags = p[offset];
1167 /* Flag bits indicate which dates are present. */
1171 fprintf(stderr, "mtime: %lld -> %d\n",
1172 (long long)zip_entry->mtime,
1173 archive_le32dec(p + offset));
1177 zip_entry->mtime = archive_le32dec(p + offset);
1185 zip_entry->atime = archive_le32dec(p + offset);
1193 zip_entry->ctime = archive_le32dec(p + offset);
1201 /* Info-ZIP Unix Extra Field (old version) "UX". */
1202 if (datasize >= 8) {
1203 zip_entry->atime = archive_le32dec(p + offset);
1204 zip_entry->mtime = archive_le32dec(p + offset + 4);
1206 if (datasize >= 12) {
1207 zip_entry->uid = archive_le16dec(p + offset + 8);
1208 zip_entry->gid = archive_le16dec(p + offset + 10);
1213 /* Info-ZIP Unix Extra Field (type 2) "Ux". */
1215 fprintf(stderr, "uid %d gid %d\n",
1216 archive_le16dec(p + offset),
1217 archive_le16dec(p + offset + 2));
1220 zip_entry->uid = archive_le16dec(p + offset);
1222 zip_entry->gid = archive_le16dec(p + offset + 2);
1226 /* Info-Zip Unix Extra Field (type 3) "ux". */
1227 int uidsize = 0, gidsize = 0;
1229 if (datasize >= 1 && p[offset] == 1) {/* version=1 */
1230 if (datasize >= 4) {
1231 /* get a uid size. */
1232 uidsize = p[offset+1];
1234 zip_entry->uid = archive_le16dec(
1236 else if (uidsize == 4 && datasize >= 6)
1237 zip_entry->uid = archive_le32dec(
1240 if (datasize >= (2 + uidsize + 3)) {
1241 /* get a gid size. */
1242 gidsize = p[offset+2+uidsize];
1244 zip_entry->gid = archive_le16dec(
1245 p+offset+2+uidsize+1);
1246 else if (gidsize == 4 &&
1247 datasize >= (2 + uidsize + 5))
1248 zip_entry->gid = archive_le32dec(
1249 p+offset+2+uidsize+1);
1260 if (offset != extra_length)
1263 "Extra data field contents do not match reported size!\n");