2 * Copyright (c) 2010-2011 Michihiro NAKAJIMA
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
45 #include "archive_entry.h"
46 #include "archive_entry_locale.h"
47 #include "archive_private.h"
48 #include "archive_read_private.h"
49 #include "archive_endian.h"
53 /* Decoding status. */
57 * Window to see last decoded data, from 32KBi to 2MBi.
61 /* Window buffer, which is a loop buffer. */
62 unsigned char *w_buff;
63 /* The insert position to the window. */
65 /* The position where we can copy decoded code from the window. */
67 /* The length how many bytes we can copy decoded code from
70 /* Translation reversal for x86 proccessor CALL byte sequence(E8).
71 * This is used for LZX only. */
72 uint32_t translation_size;
75 #define VERBATIM_BLOCK 1
76 #define ALIGNED_OFFSET_BLOCK 2
77 #define UNCOMPRESSED_BLOCK 3
79 size_t block_bytes_avail;
80 /* Repeated offset. */
82 unsigned char rbytes[4];
96 #define CACHE_TYPE uint64_t
97 #define CACHE_BITS (8 * sizeof(CACHE_TYPE))
99 CACHE_TYPE cache_buffer;
100 /* Indicates how many bits avail in cache_buffer. */
112 unsigned char *bitlen;
115 * Use a index table. It's faster than searching a huffman
116 * coding tree, which is a binary tree. But a use of a large
117 * index table causes L1 cache read miss many times.
125 /* Direct access table. */
127 /* Binary tree table for extra bits over the direct access. */
138 static const int slots[] = {
139 30, 32, 34, 36, 38, 42, 50, 66, 98, 162, 290
142 #define SLOT_MAX 21/*->25*/
145 const unsigned char *next_in;
148 unsigned char *next_out;
155 * Cabinet file definitions.
157 /* CFHEADER offset */
158 #define CFHEADER_signature 0
159 #define CFHEADER_cbCabinet 8
160 #define CFHEADER_coffFiles 16
161 #define CFHEADER_versionMinor 24
162 #define CFHEADER_versionMajor 25
163 #define CFHEADER_cFolders 26
164 #define CFHEADER_cFiles 28
165 #define CFHEADER_flags 30
166 #define CFHEADER_setID 32
167 #define CFHEADER_iCabinet 34
168 #define CFHEADER_cbCFHeader 36
169 #define CFHEADER_cbCFFolder 38
170 #define CFHEADER_cbCFData 39
172 /* CFFOLDER offset */
173 #define CFFOLDER_coffCabStart 0
174 #define CFFOLDER_cCFData 4
175 #define CFFOLDER_typeCompress 6
176 #define CFFOLDER_abReserve 8
179 #define CFFILE_cbFile 0
180 #define CFFILE_uoffFolderStart 4
181 #define CFFILE_iFolder 8
182 #define CFFILE_date_time 10
183 #define CFFILE_attribs 14
186 #define CFDATA_csum 0
187 #define CFDATA_cbData 4
188 #define CFDATA_cbUncomp 6
190 static const char *compression_name[] = {
198 /* Sum value of this CFDATA. */
200 uint16_t compressed_size;
201 uint16_t compressed_bytes_remaining;
202 uint16_t uncompressed_size;
203 uint16_t uncompressed_bytes_remaining;
204 /* To know how many bytes we have decompressed. */
205 uint16_t uncompressed_avail;
206 /* Offset from the beginning of compressed data of this CFDATA */
207 uint16_t read_offset;
209 /* To keep memory image of this CFDATA to compute the sum. */
210 size_t memimage_size;
211 unsigned char *memimage;
212 /* Result of calculation of sum. */
213 uint32_t sum_calculated;
214 unsigned char sum_extra[4];
220 uint32_t cfdata_offset_in_cab;
221 uint16_t cfdata_count;
223 #define COMPTYPE_NONE 0x0000
224 #define COMPTYPE_MSZIP 0x0001
225 #define COMPTYPE_QUANTUM 0x0002
226 #define COMPTYPE_LZX 0x0003
228 const char *compname;
229 /* At the time reading CFDATA */
230 struct cfdata cfdata;
232 /* Flags to mark progress of decompression. */
233 char decompress_init;
237 uint32_t uncompressed_size;
241 #define iFoldCONTINUED_FROM_PREV 0xFFFD
242 #define iFoldCONTINUED_TO_NEXT 0xFFFE
243 #define iFoldCONTINUED_PREV_AND_NEXT 0xFFFF
245 #define ATTR_RDONLY 0x01
246 #define ATTR_NAME_IS_UTF 0x80
247 struct archive_string pathname;
251 /* Total bytes of all file size in a Cabinet. */
252 uint32_t total_bytes;
253 uint32_t files_offset;
254 uint16_t folder_count;
257 #define PREV_CABINET 0x0001
258 #define NEXT_CABINET 0x0002
259 #define RESERVE_PRESENT 0x0004
262 /* Version number. */
265 unsigned char cffolder;
266 unsigned char cfdata;
267 /* All folders in a cabinet. */
268 struct cffolder *folder_array;
269 /* All files in a cabinet. */
270 struct cffile *file_array;
275 /* entry_bytes_remaining is the number of bytes we expect. */
276 int64_t entry_offset;
277 int64_t entry_bytes_remaining;
278 int64_t entry_unconsumed;
279 int64_t entry_compressed_bytes_read;
280 int64_t entry_uncompressed_bytes_read;
281 struct cffolder *entry_cffolder;
282 struct cffile *entry_cffile;
283 struct cfdata *entry_cfdata;
285 /* Offset from beginning of a cabinet file. */
287 struct cfheader cfheader;
288 struct archive_wstring ws;
290 /* Flag to mark progress that an archive was read their first header.*/
294 char end_of_entry_cleanup;
296 unsigned char *uncompressed_buffer;
297 size_t uncompressed_buffer_size;
299 int init_default_conversion;
300 struct archive_string_conv *sconv;
301 struct archive_string_conv *sconv_default;
302 struct archive_string_conv *sconv_utf8;
303 char format_name[64];
309 struct lzx_stream xstrm;
312 static int archive_read_format_cab_bid(struct archive_read *, int);
313 static int archive_read_format_cab_options(struct archive_read *,
314 const char *, const char *);
315 static int archive_read_format_cab_read_header(struct archive_read *,
316 struct archive_entry *);
317 static int archive_read_format_cab_read_data(struct archive_read *,
318 const void **, size_t *, int64_t *);
319 static int archive_read_format_cab_read_data_skip(struct archive_read *);
320 static int archive_read_format_cab_cleanup(struct archive_read *);
322 static int cab_skip_sfx(struct archive_read *);
323 static time_t cab_dos_time(const unsigned char *);
324 static int cab_read_data(struct archive_read *, const void **,
325 size_t *, int64_t *);
326 static int cab_read_header(struct archive_read *);
327 static uint32_t cab_checksum_cfdata_4(const void *, size_t bytes, uint32_t);
328 static uint32_t cab_checksum_cfdata(const void *, size_t bytes, uint32_t);
329 static void cab_checksum_update(struct archive_read *, size_t);
330 static int cab_checksum_finish(struct archive_read *);
331 static int cab_next_cfdata(struct archive_read *);
332 static const void *cab_read_ahead_cfdata(struct archive_read *, ssize_t *);
333 static const void *cab_read_ahead_cfdata_none(struct archive_read *, ssize_t *);
334 static const void *cab_read_ahead_cfdata_deflate(struct archive_read *,
336 static const void *cab_read_ahead_cfdata_lzx(struct archive_read *,
338 static int64_t cab_consume_cfdata(struct archive_read *, int64_t);
339 static int64_t cab_minimum_consume_cfdata(struct archive_read *, int64_t);
340 static int lzx_decode_init(struct lzx_stream *, int);
341 static int lzx_read_blocks(struct lzx_stream *, int);
342 static int lzx_decode_blocks(struct lzx_stream *, int);
343 static void lzx_decode_free(struct lzx_stream *);
344 static void lzx_translation(struct lzx_stream *, void *, size_t, uint32_t);
345 static void lzx_cleanup_bitstream(struct lzx_stream *);
346 static int lzx_decode(struct lzx_stream *, int);
347 static int lzx_read_pre_tree(struct lzx_stream *);
348 static int lzx_read_bitlen(struct lzx_stream *, struct huffman *, int);
349 static int lzx_huffman_init(struct huffman *, size_t, int);
350 static void lzx_huffman_free(struct huffman *);
351 static int lzx_make_huffman_table(struct huffman *);
352 static int inline lzx_decode_huffman(struct huffman *, unsigned);
353 static int lzx_decode_huffman_tree(struct huffman *, unsigned, int);
357 archive_read_support_format_cab(struct archive *_a)
359 struct archive_read *a = (struct archive_read *)_a;
363 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
364 ARCHIVE_STATE_NEW, "archive_read_support_format_cab");
366 cab = (struct cab *)calloc(1, sizeof(*cab));
368 archive_set_error(&a->archive, ENOMEM,
369 "Can't allocate CAB data");
370 return (ARCHIVE_FATAL);
372 archive_string_init(&cab->ws);
373 archive_wstring_ensure(&cab->ws, 256);
375 r = __archive_read_register_format(a,
378 archive_read_format_cab_bid,
379 archive_read_format_cab_options,
380 archive_read_format_cab_read_header,
381 archive_read_format_cab_read_data,
382 archive_read_format_cab_read_data_skip,
383 archive_read_format_cab_cleanup);
391 find_cab_magic(const char *p)
396 * Note: Self-Extraction program has 'MSCF' string in their
397 * program. If we were finding 'MSCF' string only, we got
398 * wrong place for Cabinet header, thus, we have to check
399 * following four bytes which are reserved and must be set
402 if (memcmp(p, "MSCF\0\0\0\0", 8) == 0)
414 archive_read_format_cab_bid(struct archive_read *a, int best_bid)
417 ssize_t bytes_avail, offset, window;
419 /* If there's already a better bid than we can ever
420 make, don't bother testing. */
424 if ((p = __archive_read_ahead(a, 8, NULL)) == NULL)
427 if (memcmp(p, "MSCF\0\0\0\0", 8) == 0)
431 * Attempt to handle self-extracting archives
432 * by noting a PE header and searching forward
433 * up to 128k for a 'MSCF' marker.
435 if (p[0] == 'M' && p[1] == 'Z') {
438 while (offset < (1024 * 128)) {
439 const char *h = __archive_read_ahead(a, offset + window,
442 /* Remaining bytes are less than window. */
449 while (p + 8 < h + bytes_avail) {
451 if ((next = find_cab_magic(p)) == 0)
462 archive_read_format_cab_options(struct archive_read *a,
463 const char *key, const char *val)
466 int ret = ARCHIVE_FAILED;
468 cab = (struct cab *)(a->format->data);
469 if (strcmp(key, "hdrcharset") == 0) {
470 if (val == NULL || val[0] == 0)
471 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
472 "cab: hdrcharset option needs a character-set name");
474 cab->sconv = archive_string_conversion_from_charset(
475 &a->archive, val, 0);
476 if (cab->sconv != NULL)
482 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
483 "cab: unknown keyword ``%s''", key);
489 cab_skip_sfx(struct archive_read *a)
493 ssize_t bytes, window;
497 const char *h = __archive_read_ahead(a, window, &bytes);
499 /* Remaining size are less than window. */
502 archive_set_error(&a->archive,
503 ARCHIVE_ERRNO_FILE_FORMAT,
504 "Couldn't find out CAB header");
505 return (ARCHIVE_FATAL);
513 * Scan ahead until we find something that looks
514 * like the cab header.
518 if ((next = find_cab_magic(p)) == 0) {
520 __archive_read_consume(a, skip);
526 __archive_read_consume(a, skip);
531 truncated_error(struct archive_read *a)
533 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
534 "Truncated CAB header");
535 return (ARCHIVE_FATAL);
539 cab_strnlen(const unsigned char *p, size_t maxlen)
543 for (i = 0; i <= maxlen; i++) {
548 return (-1);/* invalid */
552 /* Read bytes as much as remaining. */
554 cab_read_ahead_remaining(struct archive_read *a, size_t min, ssize_t *avail)
559 p = __archive_read_ahead(a, min, avail);
567 /* Convert a path separator '\' -> '/' */
569 cab_convert_path_separator_1(struct archive_string *fn, unsigned char attr)
574 /* Easy check if we have '\' in multi-byte string. */
576 for (i = 0; i < archive_strlen(fn); i++) {
577 if (fn->s[i] == '\\') {
579 /* This may be second byte of multi-byte
585 } else if ((fn->s[i] & 0x80) && !(attr & ATTR_NAME_IS_UTF))
590 if (i == archive_strlen(fn))
596 * Replace a character '\' with '/' in wide character.
599 cab_convert_path_separator_2(struct cab *cab, struct archive_entry *entry)
604 /* If a conversion to wide character failed, force the replacement. */
605 if ((wp = archive_entry_pathname_w(entry)) != NULL) {
606 archive_wstrcpy(&(cab->ws), wp);
607 for (i = 0; i < archive_strlen(&(cab->ws)); i++) {
608 if (cab->ws.s[i] == L'\\')
611 archive_entry_copy_pathname_w(entry, cab->ws.s);
616 * Read CFHEADER, CFFOLDER and CFFILE.
619 cab_read_header(struct archive_read *a)
621 const unsigned char *p;
627 int cur_folder, prev_folder;
630 a->archive.archive_format = ARCHIVE_FORMAT_CAB;
631 if (a->archive.archive_format_name == NULL)
632 a->archive.archive_format_name = "CAB";
634 if ((p = __archive_read_ahead(a, 42, NULL)) == NULL)
635 return (truncated_error(a));
637 cab = (struct cab *)(a->format->data);
638 if (cab->found_header == 0 &&
639 p[0] == 'M' && p[1] == 'Z') {
640 /* This is an executable? Must be self-extracting... */
641 err = cab_skip_sfx(a);
642 if (err < ARCHIVE_WARN)
645 if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
646 return (truncated_error(a));
654 if (p[CFHEADER_signature+0] != 'M' || p[CFHEADER_signature+1] != 'S' ||
655 p[CFHEADER_signature+2] != 'C' || p[CFHEADER_signature+3] != 'F') {
656 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
657 "Couldn't find out CAB header");
658 return (ARCHIVE_FATAL);
660 hd->total_bytes = archive_le32dec(p + CFHEADER_cbCabinet);
661 hd->files_offset = archive_le32dec(p + CFHEADER_coffFiles);
662 hd->minor = p[CFHEADER_versionMinor];
663 hd->major = p[CFHEADER_versionMajor];
664 hd->folder_count = archive_le16dec(p + CFHEADER_cFolders);
665 if (hd->folder_count == 0)
667 hd->file_count = archive_le16dec(p + CFHEADER_cFiles);
668 if (hd->file_count == 0)
670 hd->flags = archive_le16dec(p + CFHEADER_flags);
671 hd->setid = archive_le16dec(p + CFHEADER_setID);
672 hd->cabinet = archive_le16dec(p + CFHEADER_iCabinet);
673 used = CFHEADER_iCabinet + 2;
674 if (hd->flags & RESERVE_PRESENT) {
676 cfheader = archive_le16dec(p + CFHEADER_cbCFHeader);
677 if (cfheader > 60000U)
679 hd->cffolder = p[CFHEADER_cbCFFolder];
680 hd->cfdata = p[CFHEADER_cbCFData];
681 used += 4;/* cbCFHeader, cbCFFolder and cbCFData */
682 used += cfheader;/* abReserve */
684 hd->cffolder = 0;/* Avoid compiling warning. */
685 if (hd->flags & PREV_CABINET) {
686 /* How many bytes are used for szCabinetPrev. */
687 if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL)
688 return (truncated_error(a));
689 if ((len = cab_strnlen(p + used, 255)) <= 0)
692 /* How many bytes are used for szDiskPrev. */
693 if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL)
694 return (truncated_error(a));
695 if ((len = cab_strnlen(p + used, 255)) <= 0)
699 if (hd->flags & NEXT_CABINET) {
700 /* How many bytes are used for szCabinetNext. */
701 if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL)
702 return (truncated_error(a));
703 if ((len = cab_strnlen(p + used, 255)) <= 0)
706 /* How many bytes are used for szDiskNext. */
707 if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL)
708 return (truncated_error(a));
709 if ((len = cab_strnlen(p + used, 255)) <= 0)
713 __archive_read_consume(a, used);
714 cab->cab_offset += used;
720 hd->folder_array = (struct cffolder *)calloc(
721 hd->folder_count, sizeof(struct cffolder));
722 if (hd->folder_array == NULL)
726 if (hd->flags & RESERVE_PRESENT)
727 bytes += hd->cffolder;
728 bytes *= hd->folder_count;
729 if ((p = __archive_read_ahead(a, bytes, NULL)) == NULL)
730 return (truncated_error(a));
732 for (i = 0; i < hd->folder_count; i++) {
733 struct cffolder *folder = &(hd->folder_array[i]);
734 folder->cfdata_offset_in_cab =
735 archive_le32dec(p + CFFOLDER_coffCabStart);
736 folder->cfdata_count = archive_le16dec(p+CFFOLDER_cCFData);
738 archive_le16dec(p+CFFOLDER_typeCompress) & 0x0F;
740 archive_le16dec(p+CFFOLDER_typeCompress) >> 8;
741 /* Get a compression name. */
742 if (folder->comptype <
743 sizeof(compression_name) / sizeof(compression_name[0]))
744 folder->compname = compression_name[folder->comptype];
746 folder->compname = "UNKNOWN";
749 if (hd->flags & RESERVE_PRESENT) {
750 p += hd->cffolder;/* abReserve */
751 used += hd->cffolder;
754 * Sanity check if each data is acceptable.
756 if (offset32 >= folder->cfdata_offset_in_cab)
758 offset32 = folder->cfdata_offset_in_cab;
760 /* Set a request to initialize zlib for the CFDATA of
762 folder->decompress_init = 0;
764 __archive_read_consume(a, used);
765 cab->cab_offset += used;
770 /* Seek read pointer to the offset of CFFILE if needed. */
771 skip = (int64_t)hd->files_offset - cab->cab_offset;
773 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
774 "Invalid offset of CFFILE %jd < %jd",
775 (intmax_t)hd->files_offset, (intmax_t)cab->cab_offset);
776 return (ARCHIVE_FATAL);
779 __archive_read_consume(a, skip);
780 cab->cab_offset += skip;
782 /* Allocate memory for CFDATA */
783 hd->file_array = (struct cffile *)calloc(
784 hd->file_count, sizeof(struct cffile));
785 if (hd->file_array == NULL)
789 for (i = 0; i < hd->file_count; i++) {
790 struct cffile *file = &(hd->file_array[i]);
793 if ((p = __archive_read_ahead(a, 16, NULL)) == NULL)
794 return (truncated_error(a));
795 file->uncompressed_size = archive_le32dec(p + CFFILE_cbFile);
796 file->offset = archive_le32dec(p + CFFILE_uoffFolderStart);
797 file->folder = archive_le16dec(p + CFFILE_iFolder);
798 file->mtime = cab_dos_time(p + CFFILE_date_time);
799 file->attr = archive_le16dec(p + CFFILE_attribs);
800 __archive_read_consume(a, 16);
802 cab->cab_offset += 16;
803 if ((p = cab_read_ahead_remaining(a, 256, &avail)) == NULL)
804 return (truncated_error(a));
805 if ((len = cab_strnlen(p, avail-1)) <= 0)
808 /* Copy a pathname. */
809 archive_string_init(&(file->pathname));
810 archive_strncpy(&(file->pathname), p, len);
811 __archive_read_consume(a, len + 1);
812 cab->cab_offset += len + 1;
815 * Sanity check if each data is acceptable.
817 if (file->uncompressed_size > 0x7FFF8000)
818 goto invalid;/* Too large */
819 if ((int64_t)file->offset + (int64_t)file->uncompressed_size
820 > ARCHIVE_LITERAL_LL(0x7FFF8000))
821 goto invalid;/* Too large */
822 switch (file->folder) {
823 case iFoldCONTINUED_TO_NEXT:
824 /* This must be last file in a folder. */
825 if (i != hd->file_count -1)
827 cur_folder = hd->folder_count -1;
829 case iFoldCONTINUED_PREV_AND_NEXT:
830 /* This must be only one file in a folder. */
831 if (hd->file_count != 1)
834 case iFoldCONTINUED_FROM_PREV:
835 /* This must be first file in a folder. */
838 prev_folder = cur_folder = 0;
839 offset32 = file->offset;
842 if (file->folder >= hd->folder_count)
844 cur_folder = file->folder;
847 /* Dot not back track. */
848 if (cur_folder < prev_folder)
850 if (cur_folder != prev_folder)
852 prev_folder = cur_folder;
854 /* Make sure there are not any blanks from last file
856 if (offset32 != file->offset)
858 offset32 += file->uncompressed_size;
860 /* CFDATA is available for file contents. */
861 if (file->uncompressed_size > 0 &&
862 hd->folder_array[cur_folder].cfdata_count == 0)
866 if (hd->cabinet != 0 || hd->flags & (PREV_CABINET | NEXT_CABINET)) {
867 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
868 "Multivolume cabinet file is unsupported");
869 return (ARCHIVE_WARN);
873 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
874 "Invalid CAB header");
875 return (ARCHIVE_FATAL);
877 archive_set_error(&a->archive, ENOMEM,
878 "Can't allocate memory for CAB data");
879 return (ARCHIVE_FATAL);
883 archive_read_format_cab_read_header(struct archive_read *a,
884 struct archive_entry *entry)
888 struct cffolder *prev_folder;
890 struct archive_string_conv *sconv;
891 int err = ARCHIVE_OK, r;
893 cab = (struct cab *)(a->format->data);
894 if (cab->found_header == 0) {
895 err = cab_read_header(a);
896 if (err < ARCHIVE_WARN)
898 /* We've found the header. */
899 cab->found_header = 1;
903 if (hd->file_index >= hd->file_count) {
904 cab->end_of_archive = 1;
905 return (ARCHIVE_EOF);
907 file = &hd->file_array[hd->file_index++];
909 cab->end_of_entry = 0;
910 cab->end_of_entry_cleanup = 0;
911 cab->entry_compressed_bytes_read = 0;
912 cab->entry_uncompressed_bytes_read = 0;
913 cab->entry_unconsumed = 0;
914 cab->entry_cffile = file;
917 * Choose a proper folder.
919 prev_folder = cab->entry_cffolder;
920 switch (file->folder) {
921 case iFoldCONTINUED_FROM_PREV:
922 case iFoldCONTINUED_PREV_AND_NEXT:
923 cab->entry_cffolder = &hd->folder_array[0];
925 case iFoldCONTINUED_TO_NEXT:
926 cab->entry_cffolder = &hd->folder_array[hd->folder_count-1];
929 cab->entry_cffolder = &hd->folder_array[file->folder];
932 /* If a cffolder of this file is changed, reset a cfdata to read
933 * file contents from next cfdata. */
934 if (prev_folder != cab->entry_cffolder)
935 cab->entry_cfdata = NULL;
937 /* If a pathname is UTF-8, prepare a string conversion object
938 * for UTF-8 and use it. */
939 if (file->attr & ATTR_NAME_IS_UTF) {
940 if (cab->sconv_utf8 == NULL) {
942 archive_string_conversion_from_charset(
943 &(a->archive), "UTF-8", 1);
944 if (cab->sconv_utf8 == NULL)
945 return (ARCHIVE_FATAL);
947 sconv = cab->sconv_utf8;
948 } else if (cab->sconv != NULL) {
949 /* Choose the conversion specified by the option. */
952 /* Choose the default conversion. */
953 if (!cab->init_default_conversion) {
955 archive_string_default_conversion_for_read(
957 cab->init_default_conversion = 1;
959 sconv = cab->sconv_default;
963 * Set a default value and common data
965 r = cab_convert_path_separator_1(&(file->pathname), file->attr);
966 if (archive_entry_copy_pathname_l(entry, file->pathname.s,
967 archive_strlen(&(file->pathname)), sconv) != 0) {
968 if (errno == ENOMEM) {
969 archive_set_error(&a->archive, ENOMEM,
970 "Can't allocate memory for Pathname");
971 return (ARCHIVE_FATAL);
973 archive_set_error(&a->archive,
974 ARCHIVE_ERRNO_FILE_FORMAT,
975 "Pathname cannot be converted "
976 "from %s to current locale.",
977 archive_string_conversion_charset_name(sconv));
981 /* Convert a path separator '\' -> '/' */
982 cab_convert_path_separator_2(cab, entry);
985 archive_entry_set_size(entry, file->uncompressed_size);
986 if (file->attr & ATTR_RDONLY)
987 archive_entry_set_mode(entry, AE_IFREG | 0555);
989 archive_entry_set_mode(entry, AE_IFREG | 0777);
990 archive_entry_set_mtime(entry, file->mtime, 0);
992 cab->entry_bytes_remaining = file->uncompressed_size;
993 cab->entry_offset = 0;
994 /* We don't need compress data. */
995 if (file->uncompressed_size == 0)
996 cab->end_of_entry_cleanup = cab->end_of_entry = 1;
998 /* Set up a more descriptive format name. */
999 sprintf(cab->format_name, "CAB %d.%d (%s)",
1000 hd->major, hd->minor, cab->entry_cffolder->compname);
1001 a->archive.archive_format_name = cab->format_name;
1007 archive_read_format_cab_read_data(struct archive_read *a,
1008 const void **buff, size_t *size, int64_t *offset)
1010 struct cab *cab = (struct cab *)(a->format->data);
1013 switch (cab->entry_cffile->folder) {
1014 case iFoldCONTINUED_FROM_PREV:
1015 case iFoldCONTINUED_TO_NEXT:
1016 case iFoldCONTINUED_PREV_AND_NEXT:
1020 archive_clear_error(&a->archive);
1021 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1022 "Cannot restore this file split in multivolume.");
1023 return (ARCHIVE_FAILED);
1027 if (cab->entry_unconsumed) {
1028 /* Consume as much as the compressor actually used. */
1029 r = cab_consume_cfdata(a, cab->entry_unconsumed);
1030 cab->entry_unconsumed = 0;
1034 if (cab->end_of_archive || cab->end_of_entry) {
1035 if (!cab->end_of_entry_cleanup) {
1036 /* End-of-entry cleanup done. */
1037 cab->end_of_entry_cleanup = 1;
1039 *offset = cab->entry_offset;
1042 return (ARCHIVE_EOF);
1045 return (cab_read_data(a, buff, size, offset));
1049 cab_checksum_cfdata_4(const void *p, size_t bytes, uint32_t seed)
1051 const unsigned char *b;
1058 while (--u32num >= 0) {
1059 sum ^= archive_le32dec(b);
1066 cab_checksum_cfdata(const void *p, size_t bytes, uint32_t seed)
1068 const unsigned char *b;
1072 sum = cab_checksum_cfdata_4(p, bytes, seed);
1076 switch (bytes & 3) {
1078 t |= ((uint32_t)(*b++)) << 16;
1081 t |= ((uint32_t)(*b++)) << 8;
1095 cab_checksum_update(struct archive_read *a, size_t bytes)
1097 struct cab *cab = (struct cab *)(a->format->data);
1098 struct cfdata *cfdata = cab->entry_cfdata;
1099 const unsigned char *p;
1102 if (cfdata->sum == 0 || cfdata->sum_ptr == NULL)
1105 * Calculate the sum of this CFDATA.
1106 * Make sure CFDATA must be calculated in four bytes.
1108 p = cfdata->sum_ptr;
1110 if (cfdata->sum_extra_avail) {
1111 while (cfdata->sum_extra_avail < 4 && sumbytes > 0) {
1113 cfdata->sum_extra_avail++] = *p++;
1116 if (cfdata->sum_extra_avail == 4) {
1117 cfdata->sum_calculated = cab_checksum_cfdata_4(
1118 cfdata->sum_extra, 4, cfdata->sum_calculated);
1119 cfdata->sum_extra_avail = 0;
1123 int odd = sumbytes & 3;
1124 if (sumbytes - odd > 0)
1125 cfdata->sum_calculated = cab_checksum_cfdata_4(
1126 p, sumbytes - odd, cfdata->sum_calculated);
1128 memcpy(cfdata->sum_extra, p + sumbytes - odd, odd);
1129 cfdata->sum_extra_avail = odd;
1131 cfdata->sum_ptr = NULL;
1135 cab_checksum_finish(struct archive_read *a)
1137 struct cab *cab = (struct cab *)(a->format->data);
1138 struct cfdata *cfdata = cab->entry_cfdata;
1141 /* Do not need to compute a sum. */
1142 if (cfdata->sum == 0)
1143 return (ARCHIVE_OK);
1146 * Calculate the sum of remaining CFDATA.
1148 if (cfdata->sum_extra_avail) {
1149 cfdata->sum_calculated =
1150 cab_checksum_cfdata(cfdata->sum_extra,
1151 cfdata->sum_extra_avail, cfdata->sum_calculated);
1152 cfdata->sum_extra_avail = 0;
1156 if (cab->cfheader.flags & RESERVE_PRESENT)
1157 l += cab->cfheader.cfdata;
1158 cfdata->sum_calculated = cab_checksum_cfdata(
1159 cfdata->memimage + CFDATA_cbData, l, cfdata->sum_calculated);
1160 if (cfdata->sum_calculated != cfdata->sum) {
1161 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1162 "Checksum error CFDATA[%d] %x:%x in %d bytes",
1163 cab->entry_cffolder->cfdata_index -1,
1164 cfdata->sum, cfdata->sum_calculated,
1165 cfdata->compressed_size);
1166 return (ARCHIVE_FAILED);
1168 return (ARCHIVE_OK);
1172 * Read CFDATA if needed.
1175 cab_next_cfdata(struct archive_read *a)
1177 struct cab *cab = (struct cab *)(a->format->data);
1178 struct cfdata *cfdata = cab->entry_cfdata;
1180 /* There are remaining bytes in current CFDATA, use it first. */
1181 if (cfdata != NULL && cfdata->uncompressed_bytes_remaining > 0)
1182 return (ARCHIVE_OK);
1184 if (cfdata == NULL) {
1187 cab->entry_cffolder->cfdata_index = 0;
1189 /* Seek read pointer to the offset of CFDATA if needed. */
1190 skip = cab->entry_cffolder->cfdata_offset_in_cab
1194 switch (cab->entry_cffile->folder) {
1195 case iFoldCONTINUED_FROM_PREV:
1196 case iFoldCONTINUED_PREV_AND_NEXT:
1199 case iFoldCONTINUED_TO_NEXT:
1200 folder_index = cab->cfheader.folder_count-1;
1203 folder_index = cab->entry_cffile->folder;
1206 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1207 "Invalid offset of CFDATA in folder(%d) %jd < %jd",
1209 (intmax_t)cab->entry_cffolder->cfdata_offset_in_cab,
1210 (intmax_t)cab->cab_offset);
1211 return (ARCHIVE_FATAL);
1214 if (__archive_read_consume(a, skip) < 0)
1215 return (ARCHIVE_FATAL);
1217 cab->entry_cffolder->cfdata_offset_in_cab;
1224 if (cab->entry_cffolder->cfdata_index <
1225 cab->entry_cffolder->cfdata_count) {
1226 const unsigned char *p;
1229 cfdata = &(cab->entry_cffolder->cfdata);
1230 cab->entry_cffolder->cfdata_index++;
1231 cab->entry_cfdata = cfdata;
1232 cfdata->sum_calculated = 0;
1233 cfdata->sum_extra_avail = 0;
1234 cfdata->sum_ptr = NULL;
1236 if (cab->cfheader.flags & RESERVE_PRESENT)
1237 l += cab->cfheader.cfdata;
1238 if ((p = __archive_read_ahead(a, l, NULL)) == NULL)
1239 return (truncated_error(a));
1240 cfdata->sum = archive_le32dec(p + CFDATA_csum);
1241 cfdata->compressed_size = archive_le16dec(p + CFDATA_cbData);
1242 cfdata->compressed_bytes_remaining = cfdata->compressed_size;
1243 cfdata->uncompressed_size =
1244 archive_le16dec(p + CFDATA_cbUncomp);
1245 cfdata->uncompressed_bytes_remaining =
1246 cfdata->uncompressed_size;
1247 cfdata->uncompressed_avail = 0;
1248 cfdata->read_offset = 0;
1249 cfdata->unconsumed = 0;
1252 * Sanity check if data size is acceptable.
1254 if (cfdata->compressed_size == 0 ||
1255 cfdata->compressed_size > (0x8000+6144))
1257 if (cfdata->uncompressed_size > 0x8000)
1259 if (cfdata->uncompressed_size == 0) {
1260 switch (cab->entry_cffile->folder) {
1261 case iFoldCONTINUED_PREV_AND_NEXT:
1262 case iFoldCONTINUED_TO_NEXT:
1264 case iFoldCONTINUED_FROM_PREV:
1269 /* If CFDATA is not last in a folder, an uncompressed
1270 * size must be 0x8000(32KBi) */
1271 if ((cab->entry_cffolder->cfdata_index <
1272 cab->entry_cffolder->cfdata_count) &&
1273 cfdata->uncompressed_size != 0x8000)
1276 /* A compressed data size and an uncompressed data size must
1277 * be the same in no compression mode. */
1278 if (cab->entry_cffolder->comptype == COMPTYPE_NONE &&
1279 cfdata->compressed_size != cfdata->uncompressed_size)
1283 * Save CFDATA image for sum check.
1285 if (cfdata->memimage_size < (size_t)l) {
1286 free(cfdata->memimage);
1287 cfdata->memimage = malloc(l);
1288 if (cfdata->memimage == NULL) {
1289 archive_set_error(&a->archive, ENOMEM,
1290 "Can't allocate memory for CAB data");
1291 return (ARCHIVE_FATAL);
1293 cfdata->memimage_size = l;
1295 memcpy(cfdata->memimage, p, l);
1297 /* Consume bytes as much as we used. */
1298 __archive_read_consume(a, l);
1299 cab->cab_offset += l;
1300 } else if (cab->entry_cffolder->cfdata_count > 0) {
1301 /* Run out of all CFDATA in a folder. */
1302 cfdata->compressed_size = 0;
1303 cfdata->uncompressed_size = 0;
1304 cfdata->compressed_bytes_remaining = 0;
1305 cfdata->uncompressed_bytes_remaining = 0;
1307 /* Current folder does not have any CFDATA. */
1308 cfdata = &(cab->entry_cffolder->cfdata);
1309 cab->entry_cfdata = cfdata;
1310 memset(cfdata, 0, sizeof(*cfdata));
1312 return (ARCHIVE_OK);
1314 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1316 return (ARCHIVE_FATAL);
1320 * Read ahead CFDATA.
1323 cab_read_ahead_cfdata(struct archive_read *a, ssize_t *avail)
1325 struct cab *cab = (struct cab *)(a->format->data);
1328 err = cab_next_cfdata(a);
1329 if (err < ARCHIVE_OK) {
1334 switch (cab->entry_cffolder->comptype) {
1336 return (cab_read_ahead_cfdata_none(a, avail));
1337 case COMPTYPE_MSZIP:
1338 return (cab_read_ahead_cfdata_deflate(a, avail));
1340 return (cab_read_ahead_cfdata_lzx(a, avail));
1341 default: /* Unsupported compression. */
1342 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1343 "Unsupported CAB compression : %s",
1344 cab->entry_cffolder->compname);
1345 *avail = ARCHIVE_FAILED;
1351 * Read ahead CFDATA as uncompressed data.
1354 cab_read_ahead_cfdata_none(struct archive_read *a, ssize_t *avail)
1356 struct cab *cab = (struct cab *)(a->format->data);
1357 struct cfdata *cfdata;
1359 int64_t skipped_bytes;
1361 cfdata = cab->entry_cfdata;
1363 if (cfdata->uncompressed_avail == 0 &&
1364 cfdata->read_offset > 0) {
1365 /* we've already skipped some bytes before really read. */
1366 skipped_bytes = cfdata->read_offset;
1367 cfdata->read_offset = 0;
1368 cfdata->uncompressed_bytes_remaining += skipped_bytes;
1373 * Note: '1' here is a performance optimization.
1374 * Recall that the decompression layer returns a count of
1375 * available bytes; asking for more than that forces the
1376 * decompressor to combine reads by copying data.
1378 d = __archive_read_ahead(a, 1, avail);
1380 *avail = truncated_error(a);
1383 if (*avail > cfdata->uncompressed_bytes_remaining)
1384 *avail = cfdata->uncompressed_bytes_remaining;
1385 cfdata->uncompressed_avail = cfdata->uncompressed_size;
1386 cfdata->unconsumed = *avail;
1387 cfdata->sum_ptr = d;
1388 if (skipped_bytes > 0) {
1390 cab_minimum_consume_cfdata(a, skipped_bytes);
1391 if (skipped_bytes < 0) {
1392 *avail = ARCHIVE_FATAL;
1403 * Read ahead CFDATA as deflate data.
1407 cab_read_ahead_cfdata_deflate(struct archive_read *a, ssize_t *avail)
1409 struct cab *cab = (struct cab *)(a->format->data);
1410 struct cfdata *cfdata;
1416 cfdata = cab->entry_cfdata;
1417 /* If the buffer hasn't been allocated, allocate it now. */
1418 if (cab->uncompressed_buffer == NULL) {
1419 cab->uncompressed_buffer_size = 0x8000;
1420 cab->uncompressed_buffer
1421 = (unsigned char *)malloc(cab->uncompressed_buffer_size);
1422 if (cab->uncompressed_buffer == NULL) {
1423 archive_set_error(&a->archive, ENOMEM,
1424 "No memory for CAB reader");
1425 *avail = ARCHIVE_FATAL;
1430 uavail = cfdata->uncompressed_avail;
1431 if (uavail == cfdata->uncompressed_size) {
1432 d = cab->uncompressed_buffer + cfdata->read_offset;
1433 *avail = uavail - cfdata->read_offset;
1437 if (!cab->entry_cffolder->decompress_init) {
1438 cab->stream.next_in = NULL;
1439 cab->stream.avail_in = 0;
1440 cab->stream.total_in = 0;
1441 cab->stream.next_out = NULL;
1442 cab->stream.avail_out = 0;
1443 cab->stream.total_out = 0;
1444 if (cab->stream_valid)
1445 r = inflateReset(&cab->stream);
1447 r = inflateInit2(&cab->stream,
1448 -15 /* Don't check for zlib header */);
1450 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1451 "Can't initialize deflate decompression.");
1452 *avail = ARCHIVE_FATAL;
1455 /* Stream structure has been set up. */
1456 cab->stream_valid = 1;
1457 /* We've initialized decompression for this stream. */
1458 cab->entry_cffolder->decompress_init = 1;
1461 if (cfdata->compressed_bytes_remaining == cfdata->compressed_size)
1466 cab->stream.total_out = uavail;
1468 * We always uncompress all data in current CFDATA.
1470 while (!eod && cab->stream.total_out < cfdata->uncompressed_size) {
1471 ssize_t bytes_avail;
1473 cab->stream.next_out =
1474 cab->uncompressed_buffer + cab->stream.total_out;
1475 cab->stream.avail_out =
1476 cfdata->uncompressed_size - cab->stream.total_out;
1478 d = __archive_read_ahead(a, 1, &bytes_avail);
1479 if (bytes_avail <= 0) {
1480 *avail = truncated_error(a);
1483 if (bytes_avail > cfdata->compressed_bytes_remaining)
1484 bytes_avail = cfdata->compressed_bytes_remaining;
1486 * A bug in zlib.h: stream.next_in should be marked 'const'
1487 * but isn't (the library never alters data through the
1488 * next_in pointer, only reads it). The result: this ugly
1489 * cast to remove 'const'.
1491 cab->stream.next_in = (Bytef *)(uintptr_t)d;
1492 cab->stream.avail_in = bytes_avail;
1493 cab->stream.total_in = 0;
1495 /* Cut out a tow-byte MSZIP signature(0x43, 0x4b). */
1497 if (bytes_avail <= mszip) {
1499 if (cab->stream.next_in[0] != 0x43)
1501 if (bytes_avail > 1 &&
1502 cab->stream.next_in[1] != 0x4b)
1504 } else if (cab->stream.next_in[0] != 0x4b)
1506 cfdata->unconsumed = bytes_avail;
1507 cfdata->sum_ptr = d;
1508 if (cab_minimum_consume_cfdata(
1509 a, cfdata->unconsumed) < 0) {
1510 *avail = ARCHIVE_FATAL;
1513 mszip -= bytes_avail;
1516 if (mszip == 1 && cab->stream.next_in[0] != 0x4b)
1518 else if (cab->stream.next_in[0] != 0x43 ||
1519 cab->stream.next_in[1] != 0x4b)
1521 cab->stream.next_in += mszip;
1522 cab->stream.avail_in -= mszip;
1523 cab->stream.total_in += mszip;
1527 r = inflate(&cab->stream, 0);
1537 cfdata->unconsumed = cab->stream.total_in;
1538 cfdata->sum_ptr = d;
1539 if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) {
1540 *avail = ARCHIVE_FATAL;
1544 uavail = cab->stream.total_out;
1546 if (uavail < cfdata->uncompressed_size) {
1547 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1548 "Invalid uncompressed size (%d < %d)",
1549 uavail, cfdata->uncompressed_size);
1550 *avail = ARCHIVE_FATAL;
1555 * Note: I suspect there is a bug in makecab.exe because, in rare
1556 * case, compressed bytes are still remaining regardless we have
1557 * gotten all uncompressed bytes, which size is recoded in CFDATA,
1558 * as much as we need, and we have to use the garbage so as to
1559 * correctly compute the sum of CFDATA accordingly.
1561 if (cfdata->compressed_bytes_remaining > 0) {
1562 ssize_t bytes_avail;
1564 d = __archive_read_ahead(a, cfdata->compressed_bytes_remaining,
1566 if (bytes_avail <= 0) {
1567 *avail = truncated_error(a);
1570 cfdata->unconsumed = cfdata->compressed_bytes_remaining;
1571 cfdata->sum_ptr = d;
1572 if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) {
1573 *avail = ARCHIVE_FATAL;
1579 * Set dictionary data for decompressing of next CFDATA, which
1580 * in the same folder. This is why we always do decompress CFDATA
1581 * even if beginning CFDATA or some of CFDATA are not used in
1582 * skipping file data.
1584 if (cab->entry_cffolder->cfdata_index <
1585 cab->entry_cffolder->cfdata_count) {
1586 r = inflateReset(&cab->stream);
1589 r = inflateSetDictionary(&cab->stream,
1590 cab->uncompressed_buffer, cfdata->uncompressed_size);
1595 d = cab->uncompressed_buffer + cfdata->read_offset;
1596 *avail = uavail - cfdata->read_offset;
1597 cfdata->uncompressed_avail = uavail;
1604 archive_set_error(&a->archive, ENOMEM,
1605 "Out of memory for deflate decompression");
1608 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1609 "Deflate decompression failed (%d)", r);
1612 *avail = ARCHIVE_FATAL;
1615 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1616 "CFDATA incorrect(no MSZIP signature)");
1617 *avail = ARCHIVE_FATAL;
1621 #else /* HAVE_ZLIB_H */
1624 cab_read_ahead_cfdata_deflate(struct archive_read *a, ssize_t *avail)
1626 *avail = ARCHIVE_FATAL;
1627 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1628 "libarchive compiled without deflate support (no libz)");
1632 #endif /* HAVE_ZLIB_H */
1635 cab_read_ahead_cfdata_lzx(struct archive_read *a, ssize_t *avail)
1637 struct cab *cab = (struct cab *)(a->format->data);
1638 struct cfdata *cfdata;
1643 cfdata = cab->entry_cfdata;
1644 /* If the buffer hasn't been allocated, allocate it now. */
1645 if (cab->uncompressed_buffer == NULL) {
1646 cab->uncompressed_buffer_size = 0x8000;
1647 cab->uncompressed_buffer
1648 = (unsigned char *)malloc(cab->uncompressed_buffer_size);
1649 if (cab->uncompressed_buffer == NULL) {
1650 archive_set_error(&a->archive, ENOMEM,
1651 "No memory for CAB reader");
1652 *avail = ARCHIVE_FATAL;
1657 uavail = cfdata->uncompressed_avail;
1658 if (uavail == cfdata->uncompressed_size) {
1659 d = cab->uncompressed_buffer + cfdata->read_offset;
1660 *avail = uavail - cfdata->read_offset;
1664 if (!cab->entry_cffolder->decompress_init) {
1665 r = lzx_decode_init(&cab->xstrm,
1666 cab->entry_cffolder->compdata);
1667 if (r != ARCHIVE_OK) {
1668 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1669 "Can't initialize LZX decompression.");
1670 *avail = ARCHIVE_FATAL;
1673 /* We've initialized decompression for this stream. */
1674 cab->entry_cffolder->decompress_init = 1;
1677 /* Clean up remaining bits of previous CFDATA. */
1678 lzx_cleanup_bitstream(&cab->xstrm);
1679 cab->xstrm.total_out = uavail;
1680 while (cab->xstrm.total_out < cfdata->uncompressed_size) {
1681 ssize_t bytes_avail;
1683 cab->xstrm.next_out =
1684 cab->uncompressed_buffer + cab->xstrm.total_out;
1685 cab->xstrm.avail_out =
1686 cfdata->uncompressed_size - cab->xstrm.total_out;
1688 d = __archive_read_ahead(a, 1, &bytes_avail);
1689 if (bytes_avail <= 0) {
1690 archive_set_error(&a->archive,
1691 ARCHIVE_ERRNO_FILE_FORMAT,
1692 "Truncated CAB file data");
1693 *avail = ARCHIVE_FATAL;
1696 if (bytes_avail > cfdata->compressed_bytes_remaining)
1697 bytes_avail = cfdata->compressed_bytes_remaining;
1699 cab->xstrm.next_in = d;
1700 cab->xstrm.avail_in = bytes_avail;
1701 cab->xstrm.total_in = 0;
1702 r = lzx_decode(&cab->xstrm,
1703 cfdata->compressed_bytes_remaining == bytes_avail);
1709 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1710 "LZX decompression failed (%d)", r);
1711 *avail = ARCHIVE_FATAL;
1714 cfdata->unconsumed = cab->xstrm.total_in;
1715 cfdata->sum_ptr = d;
1716 if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) {
1717 *avail = ARCHIVE_FATAL;
1722 uavail = cab->xstrm.total_out;
1724 * Make sure a read pointer advances to next CFDATA.
1726 if (cfdata->compressed_bytes_remaining > 0) {
1727 ssize_t bytes_avail;
1729 d = __archive_read_ahead(a, cfdata->compressed_bytes_remaining,
1731 if (bytes_avail <= 0) {
1732 *avail = truncated_error(a);
1735 cfdata->unconsumed = cfdata->compressed_bytes_remaining;
1736 cfdata->sum_ptr = d;
1737 if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) {
1738 *avail = ARCHIVE_FATAL;
1744 * Translation reversal of x86 proccessor CALL byte sequence(E8).
1746 lzx_translation(&cab->xstrm, cab->uncompressed_buffer,
1747 cfdata->uncompressed_size,
1748 (cab->entry_cffolder->cfdata_index-1) * 0x8000);
1750 d = cab->uncompressed_buffer + cfdata->read_offset;
1751 *avail = uavail - cfdata->read_offset;
1752 cfdata->uncompressed_avail = uavail;
1759 * We always decompress CFDATA to consume CFDATA as much as we need
1760 * in uncompressed bytes because all CFDATA in a folder are related
1761 * so we do not skip any CFDATA without decompressing.
1762 * Note: If the folder of a CFFILE is iFoldCONTINUED_PREV_AND_NEXT or
1763 * iFoldCONTINUED_FROM_PREV, we won't decompress because a CFDATA for
1764 * the CFFILE is remaining bytes of previous Multivolume CAB file.
1767 cab_consume_cfdata(struct archive_read *a, int64_t consumed_bytes)
1769 struct cab *cab = (struct cab *)(a->format->data);
1770 struct cfdata *cfdata;
1771 int64_t cbytes, rbytes;
1774 rbytes = cab_minimum_consume_cfdata(a, consumed_bytes);
1776 return (ARCHIVE_FATAL);
1778 cfdata = cab->entry_cfdata;
1779 while (rbytes > 0) {
1782 if (cfdata->compressed_size == 0) {
1783 archive_set_error(&a->archive,
1784 ARCHIVE_ERRNO_FILE_FORMAT,
1786 return (ARCHIVE_FATAL);
1788 cbytes = cfdata->uncompressed_bytes_remaining;
1789 if (cbytes > rbytes)
1793 if (cfdata->uncompressed_avail == 0 &&
1794 (cab->entry_cffolder->comptype == COMPTYPE_NONE ||
1795 cab->entry_cffile->folder == iFoldCONTINUED_PREV_AND_NEXT ||
1796 cab->entry_cffile->folder == iFoldCONTINUED_FROM_PREV)) {
1797 /* We have not read any data yet. */
1798 if (cbytes == cfdata->uncompressed_bytes_remaining) {
1799 /* Skip whole current CFDATA. */
1800 __archive_read_consume(a,
1801 cfdata->compressed_size);
1802 cab->cab_offset += cfdata->compressed_size;
1803 cfdata->compressed_bytes_remaining = 0;
1804 cfdata->uncompressed_bytes_remaining = 0;
1805 err = cab_next_cfdata(a);
1808 cfdata = cab->entry_cfdata;
1809 if (cfdata->uncompressed_size == 0) {
1810 switch (cab->entry_cffile->folder) {
1811 case iFoldCONTINUED_PREV_AND_NEXT:
1812 case iFoldCONTINUED_TO_NEXT:
1813 case iFoldCONTINUED_FROM_PREV:
1822 cfdata->read_offset += cbytes;
1823 cfdata->uncompressed_bytes_remaining -= cbytes;
1825 } else if (cbytes == 0) {
1826 err = cab_next_cfdata(a);
1829 cfdata = cab->entry_cfdata;
1830 if (cfdata->uncompressed_size == 0) {
1831 switch (cab->entry_cffile->folder) {
1832 case iFoldCONTINUED_PREV_AND_NEXT:
1833 case iFoldCONTINUED_TO_NEXT:
1834 case iFoldCONTINUED_FROM_PREV:
1835 return (ARCHIVE_FATAL);
1842 while (cbytes > 0) {
1843 (void)cab_read_ahead_cfdata(a, &avail);
1845 return (ARCHIVE_FATAL);
1848 if (cab_minimum_consume_cfdata(a, avail) < 0)
1849 return (ARCHIVE_FATAL);
1853 return (consumed_bytes);
1857 * Consume CFDATA as much as we have already gotten and
1858 * compute the sum of CFDATA.
1861 cab_minimum_consume_cfdata(struct archive_read *a, int64_t consumed_bytes)
1863 struct cab *cab = (struct cab *)(a->format->data);
1864 struct cfdata *cfdata;
1865 int64_t cbytes, rbytes;
1868 cfdata = cab->entry_cfdata;
1869 rbytes = consumed_bytes;
1870 if (cab->entry_cffolder->comptype == COMPTYPE_NONE) {
1871 if (consumed_bytes < cfdata->unconsumed)
1872 cbytes = consumed_bytes;
1874 cbytes = cfdata->unconsumed;
1876 cfdata->read_offset += cbytes;
1877 cfdata->uncompressed_bytes_remaining -= cbytes;
1878 cfdata->unconsumed -= cbytes;
1880 cbytes = cfdata->uncompressed_avail - cfdata->read_offset;
1882 if (consumed_bytes < cbytes)
1883 cbytes = consumed_bytes;
1885 cfdata->read_offset += cbytes;
1886 cfdata->uncompressed_bytes_remaining -= cbytes;
1889 if (cfdata->unconsumed) {
1890 cbytes = cfdata->unconsumed;
1891 cfdata->unconsumed = 0;
1896 /* Compute the sum. */
1897 cab_checksum_update(a, cbytes);
1899 /* Consume as much as the compressor actually used. */
1900 __archive_read_consume(a, cbytes);
1901 cab->cab_offset += cbytes;
1902 cfdata->compressed_bytes_remaining -= cbytes;
1903 if (cfdata->compressed_bytes_remaining == 0) {
1904 err = cab_checksum_finish(a);
1913 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1914 * cab->end_of_entry if it consumes all of the data.
1917 cab_read_data(struct archive_read *a, const void **buff,
1918 size_t *size, int64_t *offset)
1920 struct cab *cab = (struct cab *)(a->format->data);
1921 ssize_t bytes_avail;
1923 if (cab->entry_bytes_remaining == 0) {
1926 *offset = cab->entry_offset;
1927 cab->end_of_entry = 1;
1928 return (ARCHIVE_OK);
1931 *buff = cab_read_ahead_cfdata(a, &bytes_avail);
1932 if (bytes_avail <= 0) {
1936 if (bytes_avail == 0 &&
1937 cab->entry_cfdata->uncompressed_size == 0) {
1938 /* All of CFDATA in a folder has been handled. */
1939 archive_set_error(&a->archive,
1940 ARCHIVE_ERRNO_FILE_FORMAT, "Invalid CFDATA");
1941 return (ARCHIVE_FATAL);
1943 return (bytes_avail);
1945 if (bytes_avail > cab->entry_bytes_remaining)
1946 bytes_avail = cab->entry_bytes_remaining;
1948 *size = bytes_avail;
1949 *offset = cab->entry_offset;
1950 cab->entry_offset += bytes_avail;
1951 cab->entry_bytes_remaining -= bytes_avail;
1952 if (cab->entry_bytes_remaining == 0)
1953 cab->end_of_entry = 1;
1954 cab->entry_unconsumed = bytes_avail;
1955 return (ARCHIVE_OK);
1959 archive_read_format_cab_read_data_skip(struct archive_read *a)
1962 int64_t bytes_skipped;
1965 cab = (struct cab *)(a->format->data);
1967 if (cab->end_of_archive)
1968 return (ARCHIVE_EOF);
1970 if (cab->entry_unconsumed) {
1971 /* Consume as much as the compressor actually used. */
1972 r = cab_consume_cfdata(a, cab->entry_unconsumed);
1973 cab->entry_unconsumed = 0;
1976 } else if (cab->entry_cfdata == NULL) {
1977 r = cab_next_cfdata(a);
1982 /* if we've already read to end of data, we're done. */
1983 if (cab->end_of_entry_cleanup)
1984 return (ARCHIVE_OK);
1987 * If the length is at the beginning, we can skip the
1988 * compressed data much more quickly.
1990 bytes_skipped = cab_consume_cfdata(a, cab->entry_bytes_remaining);
1991 if (bytes_skipped < 0)
1992 return (ARCHIVE_FATAL);
1994 /* This entry is finished and done. */
1995 cab->end_of_entry_cleanup = cab->end_of_entry = 1;
1996 return (ARCHIVE_OK);
2000 archive_read_format_cab_cleanup(struct archive_read *a)
2002 struct cab *cab = (struct cab *)(a->format->data);
2003 struct cfheader *hd = &cab->cfheader;
2006 if (hd->folder_array != NULL) {
2007 for (i = 0; i < hd->folder_count; i++)
2008 free(hd->folder_array[i].cfdata.memimage);
2009 free(hd->folder_array);
2011 if (hd->file_array != NULL) {
2012 for (i = 0; i < cab->cfheader.file_count; i++)
2013 archive_string_free(&(hd->file_array[i].pathname));
2014 free(hd->file_array);
2017 if (cab->stream_valid)
2018 inflateEnd(&cab->stream);
2020 lzx_decode_free(&cab->xstrm);
2021 archive_wstring_free(&cab->ws);
2022 free(cab->uncompressed_buffer);
2024 (a->format->data) = NULL;
2025 return (ARCHIVE_OK);
2028 /* Convert an MSDOS-style date/time into Unix-style time. */
2030 cab_dos_time(const unsigned char *p)
2035 msDate = archive_le16dec(p);
2036 msTime = archive_le16dec(p+2);
2038 memset(&ts, 0, sizeof(ts));
2039 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */
2040 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */
2041 ts.tm_mday = msDate & 0x1f; /* Day of month. */
2042 ts.tm_hour = (msTime >> 11) & 0x1f;
2043 ts.tm_min = (msTime >> 5) & 0x3f;
2044 ts.tm_sec = (msTime << 1) & 0x3e;
2046 return (mktime(&ts));
2049 /*****************************************************************
2051 * LZX decompression code.
2053 *****************************************************************/
2056 * Initialize LZX decoder.
2058 * Returns ARCHIVE_OK if initialization was successful.
2059 * Returns ARCHIVE_FAILED if w_bits has unsupported value.
2060 * Returns ARCHIVE_FATAL if initialization failed; memory allocation
2064 lzx_decode_init(struct lzx_stream *strm, int w_bits)
2067 int slot, w_size, w_slot;
2070 if (strm->ds == NULL) {
2071 strm->ds = calloc(1, sizeof(*strm->ds));
2072 if (strm->ds == NULL)
2073 return (ARCHIVE_FATAL);
2076 ds->error = ARCHIVE_FAILED;
2078 /* Allow bits from 15(32KBi) up to 21(2MBi) */
2079 if (w_bits < SLOT_BASE || w_bits > SLOT_MAX)
2080 return (ARCHIVE_FAILED);
2082 ds->error = ARCHIVE_FATAL;
2087 w_size = ds->w_size;
2088 w_slot = slots[w_bits - SLOT_BASE];
2089 ds->w_size = 1U << w_bits;
2090 ds->w_mask = ds->w_size -1;
2091 if (ds->w_buff == NULL || w_size != ds->w_size) {
2093 ds->w_buff = malloc(ds->w_size);
2094 if (ds->w_buff == NULL)
2095 return (ARCHIVE_FATAL);
2097 ds->pos_tbl = malloc(sizeof(ds->pos_tbl[0]) * w_slot);
2098 if (ds->pos_tbl == NULL)
2099 return (ARCHIVE_FATAL);
2100 lzx_huffman_free(&(ds->mt));
2104 for (slot = 0; slot < w_slot; slot++) {
2109 base += 1 << footer;
2112 for (n = base; n; n >>= 1)
2117 ds->pos_tbl[slot].base = base;
2118 ds->pos_tbl[slot].footer_bits = footer;
2123 ds->br.cache_buffer = 0;
2124 ds->br.cache_avail = 0;
2125 ds->r0 = ds->r1 = ds->r2 = 1;
2127 /* Initialize aligned offset tree. */
2128 if (lzx_huffman_init(&(ds->at), 8, 8) != ARCHIVE_OK)
2129 return (ARCHIVE_FATAL);
2131 /* Initialize pre-tree. */
2132 if (lzx_huffman_init(&(ds->pt), 20, 10) != ARCHIVE_OK)
2133 return (ARCHIVE_FATAL);
2135 /* Initialize Main tree. */
2136 if (lzx_huffman_init(&(ds->mt), 256+(w_slot<<3), 16)
2138 return (ARCHIVE_FATAL);
2140 /* Initialize Length tree. */
2141 if (lzx_huffman_init(&(ds->lt), 249, 16) != ARCHIVE_OK)
2142 return (ARCHIVE_FATAL);
2146 return (ARCHIVE_OK);
2150 * Release LZX decoder.
2153 lzx_decode_free(struct lzx_stream *strm)
2156 if (strm->ds == NULL)
2158 free(strm->ds->w_buff);
2159 free(strm->ds->pos_tbl);
2160 lzx_huffman_free(&(strm->ds->at));
2161 lzx_huffman_free(&(strm->ds->pt));
2162 lzx_huffman_free(&(strm->ds->mt));
2163 lzx_huffman_free(&(strm->ds->lt));
2169 * E8 Call Translation reversal.
2172 lzx_translation(struct lzx_stream *strm, void *p, size_t size, uint32_t offset)
2174 struct lzx_dec *ds = strm->ds;
2175 unsigned char *b, *end;
2177 if (!ds->translation || size <= 10)
2180 end = b + size - 10;
2181 while (b < end && (b = memchr(b, 0xE8, end - b)) != NULL) {
2182 size_t i = b - (unsigned char *)p;
2183 long cp, displacement, value;
2186 value = archive_le32dec(&b[1]);
2187 if (value >= -cp && value < (long)ds->translation_size) {
2189 displacement = value - cp;
2191 displacement = value + ds->translation_size;
2192 archive_le32enc(&b[1], (uint32_t)displacement);
2199 * Bit stream reader.
2201 /* Check that the cache buffer has enough bits. */
2202 #define lzx_br_has(br, n) ((br)->cache_avail >= n)
2203 /* Get compressed data by bit. */
2204 #define lzx_br_bits(br, n) \
2205 (((uint32_t)((br)->cache_buffer >> \
2206 ((br)->cache_avail - (n)))) & cache_masks[n])
2207 #define lzx_br_bits_forced(br, n) \
2208 (((uint32_t)((br)->cache_buffer << \
2209 ((n) - (br)->cache_avail))) & cache_masks[n])
2210 /* Read ahead to make sure the cache buffer has enough compressed data we
2212 * True : completed, there is enough data in the cache buffer.
2213 * False : we met that strm->next_in is empty, we have to get following
2215 #define lzx_br_read_ahead_0(strm, br, n) \
2216 (lzx_br_has((br), (n)) || lzx_br_fillup(strm, br))
2217 /* True : the cache buffer has some bits as much as we need.
2218 * False : there are no enough bits in the cache buffer to be used,
2219 * we have to get following bytes if we could. */
2220 #define lzx_br_read_ahead(strm, br, n) \
2221 (lzx_br_read_ahead_0((strm), (br), (n)) || lzx_br_has((br), (n)))
2223 /* Notify how many bits we consumed. */
2224 #define lzx_br_consume(br, n) ((br)->cache_avail -= (n))
2225 #define lzx_br_consume_unalined_bits(br) ((br)->cache_avail &= ~0x0f)
2227 static const uint32_t cache_masks[] = {
2228 0x00000000, 0x00000001, 0x00000003, 0x00000007,
2229 0x0000000F, 0x0000001F, 0x0000003F, 0x0000007F,
2230 0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF,
2231 0x00000FFF, 0x00001FFF, 0x00003FFF, 0x00007FFF,
2232 0x0000FFFF, 0x0001FFFF, 0x0003FFFF, 0x0007FFFF,
2233 0x000FFFFF, 0x001FFFFF, 0x003FFFFF, 0x007FFFFF,
2234 0x00FFFFFF, 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF,
2235 0x0FFFFFFF, 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF,
2236 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
2240 * Shift away used bits in the cache data and fill it up with following bits.
2241 * Call this when cache buffer does not have enough bits you need.
2243 * Returns 1 if the cache buffer is full.
2244 * Returns 0 if the cache buffer is not full; input buffer is empty.
2247 lzx_br_fillup(struct lzx_stream *strm, struct lzx_br *br)
2250 * x86 proccessor family can read misaligned data without an access error.
2252 int n = CACHE_BITS - br->cache_avail;
2257 if (strm->avail_in >= 8) {
2259 ((uint64_t)strm->next_in[1]) << 56 |
2260 ((uint64_t)strm->next_in[0]) << 48 |
2261 ((uint64_t)strm->next_in[3]) << 40 |
2262 ((uint64_t)strm->next_in[2]) << 32 |
2263 ((uint32_t)strm->next_in[5]) << 24 |
2264 ((uint32_t)strm->next_in[4]) << 16 |
2265 ((uint32_t)strm->next_in[7]) << 8 |
2266 (uint32_t)strm->next_in[6];
2268 strm->avail_in -= 8;
2269 br->cache_avail += 8 * 8;
2274 if (strm->avail_in >= 6) {
2276 (br->cache_buffer << 48) |
2277 ((uint64_t)strm->next_in[1]) << 40 |
2278 ((uint64_t)strm->next_in[0]) << 32 |
2279 ((uint32_t)strm->next_in[3]) << 24 |
2280 ((uint32_t)strm->next_in[2]) << 16 |
2281 ((uint32_t)strm->next_in[5]) << 8 |
2282 (uint32_t)strm->next_in[4];
2284 strm->avail_in -= 6;
2285 br->cache_avail += 6 * 8;
2290 /* We have enough compressed data in
2291 * the cache buffer.*/
2296 if (strm->avail_in < 2) {
2297 /* There is not enough compressed data to
2298 * fill up the cache buffer. */
2299 if (strm->avail_in == 1) {
2300 br->odd = *strm->next_in++;
2307 (br->cache_buffer << 16) |
2308 archive_le16dec(strm->next_in);
2310 strm->avail_in -= 2;
2311 br->cache_avail += 16;
2317 lzx_br_fixup(struct lzx_stream *strm, struct lzx_br *br)
2319 int n = CACHE_BITS - br->cache_avail;
2321 if (br->have_odd && n >= 16 && strm->avail_in > 0) {
2323 (br->cache_buffer << 16) |
2324 ((uint16_t)(*strm->next_in)) << 8 | br->odd;
2327 br->cache_avail += 16;
2333 lzx_cleanup_bitstream(struct lzx_stream *strm)
2335 strm->ds->br.cache_avail = 0;
2336 strm->ds->br.have_odd = 0;
2342 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
2343 * Please set available buffer and call this function again.
2344 * 2. Returns ARCHIVE_EOF if decompression has been completed.
2345 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
2346 * is broken or you do not set 'last' flag properly.
2348 #define ST_RD_TRANSLATION 0
2349 #define ST_RD_TRANSLATION_SIZE 1
2350 #define ST_RD_BLOCK_TYPE 2
2351 #define ST_RD_BLOCK_SIZE 3
2355 #define ST_COPY_UNCOMP1 7
2356 #define ST_COPY_UNCOMP2 8
2357 #define ST_RD_ALIGNED_OFFSET 9
2358 #define ST_RD_VERBATIM 10
2359 #define ST_RD_PRE_MAIN_TREE_256 11
2360 #define ST_MAIN_TREE_256 12
2361 #define ST_RD_PRE_MAIN_TREE_REM 13
2362 #define ST_MAIN_TREE_REM 14
2363 #define ST_RD_PRE_LENGTH_TREE 15
2364 #define ST_LENGTH_TREE 16
2366 #define ST_LENGTH 18
2367 #define ST_OFFSET 19
2368 #define ST_REAL_POS 20
2372 lzx_decode(struct lzx_stream *strm, int last)
2374 struct lzx_dec *ds = strm->ds;
2381 avail_in = strm->avail_in;
2382 lzx_br_fixup(strm, &(ds->br));
2384 if (ds->state < ST_MAIN)
2385 r = lzx_read_blocks(strm, last);
2387 int64_t bytes_written = strm->avail_out;
2388 r = lzx_decode_blocks(strm, last);
2389 bytes_written -= strm->avail_out;
2390 strm->next_out += bytes_written;
2391 strm->total_out += bytes_written;
2394 strm->total_in += avail_in - strm->avail_in;
2399 lzx_read_blocks(struct lzx_stream *strm, int last)
2401 struct lzx_dec *ds = strm->ds;
2402 struct lzx_br *br = &(ds->br);
2406 switch (ds->state) {
2407 case ST_RD_TRANSLATION:
2408 if (!lzx_br_read_ahead(strm, br, 1)) {
2409 ds->state = ST_RD_TRANSLATION;
2412 return (ARCHIVE_OK);
2414 ds->translation = lzx_br_bits(br, 1);
2415 lzx_br_consume(br, 1);
2417 case ST_RD_TRANSLATION_SIZE:
2418 if (ds->translation) {
2419 if (!lzx_br_read_ahead(strm, br, 32)) {
2420 ds->state = ST_RD_TRANSLATION_SIZE;
2423 return (ARCHIVE_OK);
2425 ds->translation_size = lzx_br_bits(br, 16);
2426 lzx_br_consume(br, 16);
2427 ds->translation_size <<= 16;
2428 ds->translation_size |= lzx_br_bits(br, 16);
2429 lzx_br_consume(br, 16);
2432 case ST_RD_BLOCK_TYPE:
2433 if (!lzx_br_read_ahead(strm, br, 3)) {
2434 ds->state = ST_RD_BLOCK_TYPE;
2437 return (ARCHIVE_OK);
2439 ds->block_type = lzx_br_bits(br, 3);
2440 lzx_br_consume(br, 3);
2441 /* Check a block type. */
2442 switch (ds->block_type) {
2443 case VERBATIM_BLOCK:
2444 case ALIGNED_OFFSET_BLOCK:
2445 case UNCOMPRESSED_BLOCK:
2448 goto failed;/* Invalid */
2451 case ST_RD_BLOCK_SIZE:
2452 if (!lzx_br_read_ahead(strm, br, 24)) {
2453 ds->state = ST_RD_BLOCK_SIZE;
2456 return (ARCHIVE_OK);
2458 ds->block_size = lzx_br_bits(br, 8);
2459 lzx_br_consume(br, 8);
2460 ds->block_size <<= 16;
2461 ds->block_size |= lzx_br_bits(br, 16);
2462 lzx_br_consume(br, 16);
2463 if (ds->block_size == 0)
2465 ds->block_bytes_avail = ds->block_size;
2466 if (ds->block_type != UNCOMPRESSED_BLOCK) {
2467 if (ds->block_type == VERBATIM_BLOCK)
2468 ds->state = ST_RD_VERBATIM;
2470 ds->state = ST_RD_ALIGNED_OFFSET;
2474 * Handle an Uncompressed Block.
2476 /* Skip padding to align following field on
2477 * 16-bit boundary. */
2478 lzx_br_consume_unalined_bits(br);
2479 /* Preparation to read repeated offsets R0,R1 and R2. */
2480 ds->rbytes_avail = 0;
2481 ds->state = ST_RD_R0;
2488 /* Drain bits in the cache buffer of
2490 if (lzx_br_has(br, 32)) {
2491 u16 = lzx_br_bits(br, 16);
2492 lzx_br_consume(br, 16);
2493 archive_le16enc(ds->rbytes, u16);
2494 u16 = lzx_br_bits(br, 16);
2495 lzx_br_consume(br, 16);
2496 archive_le16enc(ds->rbytes+2, u16);
2497 ds->rbytes_avail = 4;
2498 } else if (lzx_br_has(br, 16)) {
2499 u16 = lzx_br_bits(br, 16);
2500 lzx_br_consume(br, 16);
2501 archive_le16enc(ds->rbytes, u16);
2502 ds->rbytes_avail = 2;
2504 ds->rbytes_avail = 0;
2505 if (ds->rbytes_avail < 4 && ds->br.have_odd) {
2506 ds->rbytes[ds->rbytes_avail++] =
2508 ds->br.have_odd = 0;
2510 while (ds->rbytes_avail < 4) {
2511 if (strm->avail_in <= 0) {
2514 return (ARCHIVE_OK);
2516 ds->rbytes[ds->rbytes_avail++] =
2520 if (ds->state == ST_RD_R0) {
2521 ds->r0 = archive_le32dec(ds->rbytes);
2524 ds->state = ST_RD_R1;
2525 } else if (ds->state == ST_RD_R1) {
2526 ds->r1 = archive_le32dec(ds->rbytes);
2529 ds->state = ST_RD_R2;
2530 } else if (ds->state == ST_RD_R2) {
2531 ds->r2 = archive_le32dec(ds->rbytes);
2534 /* We've gotten all repeated offsets. */
2535 ds->state = ST_COPY_UNCOMP1;
2537 } while (ds->state != ST_COPY_UNCOMP1);
2539 case ST_COPY_UNCOMP1:
2541 * Copy bytes form next_in to next_out directly.
2543 while (ds->block_bytes_avail) {
2547 if (strm->avail_out <= 0)
2548 /* Output buffer is empty. */
2549 return (ARCHIVE_OK);
2550 if (strm->avail_in <= 0) {
2551 /* Input buffer is empty. */
2554 return (ARCHIVE_OK);
2556 l = ds->block_bytes_avail;
2557 if (l > ds->w_size - ds->w_pos)
2558 l = ds->w_size - ds->w_pos;
2559 if (l > strm->avail_out)
2560 l = (int)strm->avail_out;
2561 if (l > strm->avail_in)
2562 l = (int)strm->avail_in;
2564 d = &(ds->w_buff[ds->w_pos]);
2566 *strm->next_out++ = *strm->next_in;
2567 *d++ = *strm->next_in++;
2569 strm->avail_out -= ll;
2570 strm->total_out += ll;
2571 strm->avail_in -= ll;
2572 ds->w_pos = (ds->w_pos + ll) & ds->w_mask;
2573 ds->block_bytes_avail -= ll;
2576 case ST_COPY_UNCOMP2:
2577 /* Re-align; skip padding byte. */
2578 if (ds->block_size & 1) {
2579 if (strm->avail_in <= 0) {
2580 /* Input buffer is empty. */
2581 ds->state = ST_COPY_UNCOMP2;
2584 return (ARCHIVE_OK);
2589 /* This block ended. */
2590 ds->state = ST_RD_BLOCK_TYPE;
2591 return (ARCHIVE_EOF);
2592 /********************/
2593 case ST_RD_ALIGNED_OFFSET:
2595 * Read Aligned offset tree.
2597 if (!lzx_br_read_ahead(strm, br, 3 * ds->at.len_size)) {
2598 ds->state = ST_RD_ALIGNED_OFFSET;
2601 return (ARCHIVE_OK);
2603 memset(ds->at.freq, 0, sizeof(ds->at.freq));
2604 for (i = 0; i < ds->at.len_size; i++) {
2605 ds->at.bitlen[i] = lzx_br_bits(br, 3);
2606 ds->at.freq[ds->at.bitlen[i]]++;
2607 lzx_br_consume(br, 3);
2609 if (!lzx_make_huffman_table(&ds->at))
2612 case ST_RD_VERBATIM:
2615 case ST_RD_PRE_MAIN_TREE_256:
2617 * Read Pre-tree for first 256 elements of main tree.
2619 if (!lzx_read_pre_tree(strm)) {
2620 ds->state = ST_RD_PRE_MAIN_TREE_256;
2623 return (ARCHIVE_OK);
2625 if (!lzx_make_huffman_table(&ds->pt))
2629 case ST_MAIN_TREE_256:
2631 * Get path lengths of first 256 elements of main tree.
2633 r = lzx_read_bitlen(strm, &ds->mt, 256);
2637 ds->state = ST_MAIN_TREE_256;
2640 return (ARCHIVE_OK);
2644 case ST_RD_PRE_MAIN_TREE_REM:
2646 * Read Pre-tree for remaining elements of main tree.
2648 if (!lzx_read_pre_tree(strm)) {
2649 ds->state = ST_RD_PRE_MAIN_TREE_REM;
2652 return (ARCHIVE_OK);
2654 if (!lzx_make_huffman_table(&ds->pt))
2658 case ST_MAIN_TREE_REM:
2660 * Get path lengths of remaining elements of main tree.
2662 r = lzx_read_bitlen(strm, &ds->mt, -1);
2666 ds->state = ST_MAIN_TREE_REM;
2669 return (ARCHIVE_OK);
2671 if (!lzx_make_huffman_table(&ds->mt))
2675 case ST_RD_PRE_LENGTH_TREE:
2677 * Read Pre-tree for remaining elements of main tree.
2679 if (!lzx_read_pre_tree(strm)) {
2680 ds->state = ST_RD_PRE_LENGTH_TREE;
2683 return (ARCHIVE_OK);
2685 if (!lzx_make_huffman_table(&ds->pt))
2689 case ST_LENGTH_TREE:
2691 * Get path lengths of remaining elements of main tree.
2693 r = lzx_read_bitlen(strm, &ds->lt, -1);
2697 ds->state = ST_LENGTH_TREE;
2700 return (ARCHIVE_OK);
2702 if (!lzx_make_huffman_table(&ds->lt))
2704 ds->state = ST_MAIN;
2709 return (ds->error = ARCHIVE_FAILED);
2713 lzx_decode_blocks(struct lzx_stream *strm, int last)
2715 struct lzx_dec *ds = strm->ds;
2716 struct lzx_br bre = ds->br;
2717 struct huffman *at = &(ds->at), *lt = &(ds->lt), *mt = &(ds->mt);
2718 const struct lzx_pos_tbl *pos_tbl = ds->pos_tbl;
2719 unsigned char *outp = strm->next_out;
2720 unsigned char *endp = outp + strm->avail_out;
2721 unsigned char *w_buff = ds->w_buff;
2722 unsigned char *at_bitlen = at->bitlen;
2723 unsigned char *lt_bitlen = lt->bitlen;
2724 unsigned char *mt_bitlen = mt->bitlen;
2725 size_t block_bytes_avail = ds->block_bytes_avail;
2726 int at_max_bits = at->max_bits;
2727 int lt_max_bits = lt->max_bits;
2728 int mt_max_bits = mt->max_bits;
2729 int c, copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2730 int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2731 int length_header = ds->length_header;
2732 int offset_bits = ds->offset_bits;
2733 int position_slot = ds->position_slot;
2734 int r0 = ds->r0, r1 = ds->r1, r2 = ds->r2;
2735 int state = ds->state;
2736 char block_type = ds->block_type;
2742 if (block_bytes_avail == 0) {
2743 /* This block ended. */
2744 ds->state = ST_RD_BLOCK_TYPE;
2746 ds->block_bytes_avail =
2748 ds->copy_len = copy_len;
2749 ds->copy_pos = copy_pos;
2750 ds->length_header = length_header;
2751 ds->position_slot = position_slot;
2752 ds->r0 = r0; ds->r1 = r1; ds->r2 = r2;
2754 strm->avail_out = endp - outp;
2755 return (ARCHIVE_EOF);
2758 /* Output buffer is empty. */
2761 if (!lzx_br_read_ahead(strm, &bre,
2765 /* Remaining bits are less than
2766 * maximum bits(mt.max_bits) but maybe
2767 * it still remains as much as we need,
2768 * so we should try to use it with
2770 c = lzx_decode_huffman(mt,
2772 &bre, mt_max_bits));
2773 lzx_br_consume(&bre, mt_bitlen[c]);
2774 if (!lzx_br_has(&bre, 0))
2775 goto failed;/* Over read. */
2777 c = lzx_decode_huffman(mt,
2778 lzx_br_bits(&bre, mt_max_bits));
2779 lzx_br_consume(&bre, mt_bitlen[c]);
2784 * 'c' is exactly literal code.
2786 /* Save a decoded code to reference it
2789 w_pos = (w_pos + 1) & w_mask;
2790 /* Store the decoded code to output buffer. */
2792 block_bytes_avail--;
2795 * Get a match code, its length and offset.
2798 length_header = c & 7;
2799 position_slot = c >> 3;
2805 if (length_header == 7) {
2806 if (!lzx_br_read_ahead(strm, &bre,
2812 c = lzx_decode_huffman(lt,
2814 &bre, lt_max_bits));
2815 lzx_br_consume(&bre, lt_bitlen[c]);
2816 if (!lzx_br_has(&bre, 0))
2817 goto failed;/* Over read. */
2819 c = lzx_decode_huffman(lt,
2820 lzx_br_bits(&bre, lt_max_bits));
2821 lzx_br_consume(&bre, lt_bitlen[c]);
2823 copy_len = c + 7 + 2;
2825 copy_len = length_header + 2;
2826 if ((size_t)copy_len > block_bytes_avail)
2831 switch (position_slot) {
2832 case 0: /* Use repeated offset 0. */
2834 state = ST_REAL_POS;
2836 case 1: /* Use repeated offset 1. */
2838 /* Swap repeated offset. */
2841 state = ST_REAL_POS;
2843 case 2: /* Use repeated offset 2. */
2845 /* Swap repeated offset. */
2848 state = ST_REAL_POS;
2852 pos_tbl[position_slot].footer_bits;
2858 * Get the offset, which is a distance from
2859 * current window position.
2861 if (block_type == ALIGNED_OFFSET_BLOCK &&
2863 int offbits = offset_bits - 3;
2865 if (!lzx_br_read_ahead(strm, &bre, offbits)) {
2871 copy_pos = lzx_br_bits(&bre, offbits) << 3;
2873 /* Get an aligned number. */
2874 if (!lzx_br_read_ahead(strm, &bre,
2875 offbits + at_max_bits)) {
2880 lzx_br_consume(&bre, offbits);
2881 c = lzx_decode_huffman(at,
2882 lzx_br_bits_forced(&bre,
2884 lzx_br_consume(&bre, at_bitlen[c]);
2885 if (!lzx_br_has(&bre, 0))
2886 goto failed;/* Over read. */
2888 lzx_br_consume(&bre, offbits);
2889 c = lzx_decode_huffman(at,
2890 lzx_br_bits(&bre, at_max_bits));
2891 lzx_br_consume(&bre, at_bitlen[c]);
2893 /* Add an aligned number. */
2896 if (!lzx_br_read_ahead(strm, &bre,
2903 copy_pos = lzx_br_bits(&bre, offset_bits);
2904 lzx_br_consume(&bre, offset_bits);
2906 copy_pos += pos_tbl[position_slot].base -2;
2908 /* Update repeated offset LRU queue. */
2915 * Compute a real position in window.
2917 copy_pos = (w_pos - copy_pos) & w_mask;
2921 * Copy several bytes as extracted data from the window
2922 * into the output buffer.
2925 const unsigned char *s;
2929 if (copy_pos > w_pos) {
2930 if (l > w_size - copy_pos)
2931 l = w_size - copy_pos;
2933 if (l > w_size - w_pos)
2936 if (outp + l >= endp)
2938 s = w_buff + copy_pos;
2939 if (l >= 8 && ((copy_pos + l < w_pos)
2940 || (w_pos + l < copy_pos))) {
2941 memcpy(w_buff + w_pos, s, l);
2948 for (li = 0; li < l; li++)
2949 outp[li] = d[li] = s[li];
2952 copy_pos = (copy_pos + l) & w_mask;
2953 w_pos = (w_pos + l) & w_mask;
2954 block_bytes_avail -= l;
2956 /* A copy of current pattern ended. */
2960 /* Output buffer is empty. */
2970 return (ds->error = ARCHIVE_FAILED);
2973 ds->block_bytes_avail = block_bytes_avail;
2974 ds->copy_len = copy_len;
2975 ds->copy_pos = copy_pos;
2976 ds->length_header = length_header;
2977 ds->offset_bits = offset_bits;
2978 ds->position_slot = position_slot;
2979 ds->r0 = r0; ds->r1 = r1; ds->r2 = r2;
2982 strm->avail_out = endp - outp;
2983 return (ARCHIVE_OK);
2987 lzx_read_pre_tree(struct lzx_stream *strm)
2989 struct lzx_dec *ds = strm->ds;
2990 struct lzx_br *br = &(ds->br);
2994 memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2995 for (i = ds->loop; i < ds->pt.len_size; i++) {
2996 if (!lzx_br_read_ahead(strm, br, 4)) {
3000 ds->pt.bitlen[i] = lzx_br_bits(br, 4);
3001 ds->pt.freq[ds->pt.bitlen[i]]++;
3002 lzx_br_consume(br, 4);
3009 * Read a bunch of bit-lengths from pre-tree.
3012 lzx_read_bitlen(struct lzx_stream *strm, struct huffman *d, int end)
3014 struct lzx_dec *ds = strm->ds;
3015 struct lzx_br *br = &(ds->br);
3016 int c, i, j, ret, same;
3021 memset(d->freq, 0, sizeof(d->freq));
3027 if (!lzx_br_read_ahead(strm, br, ds->pt.max_bits))
3029 rbits = lzx_br_bits(br, ds->pt.max_bits);
3030 c = lzx_decode_huffman(&(ds->pt), rbits);
3032 case 17:/* several zero lengths, from 4 to 19. */
3033 if (!lzx_br_read_ahead(strm, br, ds->pt.bitlen[c]+4))
3035 lzx_br_consume(br, ds->pt.bitlen[c]);
3036 same = lzx_br_bits(br, 4) + 4;
3038 return (-1);/* Invalid */
3039 lzx_br_consume(br, 4);
3040 for (j = 0; j < same; j++)
3043 case 18:/* many zero lengths, from 20 to 51. */
3044 if (!lzx_br_read_ahead(strm, br, ds->pt.bitlen[c]+5))
3046 lzx_br_consume(br, ds->pt.bitlen[c]);
3047 same = lzx_br_bits(br, 5) + 20;
3049 return (-1);/* Invalid */
3050 lzx_br_consume(br, 5);
3051 memset(d->bitlen + i, 0, same);
3054 case 19:/* a few same lengths. */
3055 if (!lzx_br_read_ahead(strm, br,
3056 ds->pt.bitlen[c]+1+ds->pt.max_bits))
3058 lzx_br_consume(br, ds->pt.bitlen[c]);
3059 same = lzx_br_bits(br, 1) + 4;
3062 lzx_br_consume(br, 1);
3063 rbits = lzx_br_bits(br, ds->pt.max_bits);
3064 c = lzx_decode_huffman(&(ds->pt), rbits);
3065 lzx_br_consume(br, ds->pt.bitlen[c]);
3066 c = (d->bitlen[i] - c + 17) % 17;
3068 return (-1);/* Invalid */
3069 for (j = 0; j < same; j++)
3074 lzx_br_consume(br, ds->pt.bitlen[c]);
3075 c = (d->bitlen[i] - c + 17) % 17;
3077 return (-1);/* Invalid */
3090 lzx_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
3094 if (hf->bitlen == NULL || hf->len_size != (int)len_size) {
3096 hf->bitlen = calloc(len_size, sizeof(hf->bitlen[0]));
3097 if (hf->bitlen == NULL)
3098 return (ARCHIVE_FATAL);
3099 hf->len_size = len_size;
3101 memset(hf->bitlen, 0, len_size * sizeof(hf->bitlen[0]));
3102 if (hf->tbl == NULL) {
3103 if (tbl_bits < HTBL_BITS)
3107 hf->tbl = malloc((1 << bits) * sizeof(hf->tbl[0]));
3108 if (hf->tbl == NULL)
3109 return (ARCHIVE_FATAL);
3110 hf->tbl_bits = tbl_bits;
3112 if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
3113 hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
3114 hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
3115 if (hf->tree == NULL)
3116 return (ARCHIVE_FATAL);
3118 return (ARCHIVE_OK);
3122 lzx_huffman_free(struct huffman *hf)
3130 * Make a huffman coding table.
3133 lzx_make_huffman_table(struct huffman *hf)
3136 const unsigned char *bitlen;
3137 int bitptn[17], weight[17];
3138 int i, maxbits = 0, ptn, tbl_size, w;
3139 int diffbits, len_avail;
3142 * Initialize bit patterns.
3145 for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
3149 ptn += hf->freq[i] * w;
3153 if ((ptn & 0xffff) != 0 || maxbits > hf->tbl_bits)
3154 return (0);/* Invalid */
3156 hf->max_bits = maxbits;
3159 * Cut out extra bits which we won't house in the table.
3160 * This preparation reduces the same calculation in the for-loop
3164 int ebits = 16 - maxbits;
3165 for (i = 1; i <= maxbits; i++) {
3166 bitptn[i] >>= ebits;
3167 weight[i] >>= ebits;
3170 if (maxbits > HTBL_BITS) {
3174 diffbits = maxbits - HTBL_BITS;
3175 for (i = 1; i <= HTBL_BITS; i++) {
3176 bitptn[i] >>= diffbits;
3177 weight[i] >>= diffbits;
3179 htbl_max = bitptn[HTBL_BITS] +
3180 weight[HTBL_BITS] * hf->freq[HTBL_BITS];
3181 p = &(hf->tbl[htbl_max]);
3182 while (p < &hf->tbl[1U<<HTBL_BITS])
3186 hf->shift_bits = diffbits;
3191 tbl_size = 1 << HTBL_BITS;
3193 bitlen = hf->bitlen;
3194 len_avail = hf->len_size;
3196 for (i = 0; i < len_avail; i++) {
3205 /* Get a bit pattern */
3209 if (len <= HTBL_BITS) {
3210 /* Calculate next bit pattern */
3211 if ((bitptn[len] = ptn + cnt) > tbl_size)
3212 return (0);/* Invalid */
3213 /* Update the table */
3216 p[cnt] = (uint16_t)i;
3221 * A bit length is too big to be housed to a direct table,
3222 * so we use a tree model for its extra bits.
3224 bitptn[len] = ptn + cnt;
3225 bit = 1U << (diffbits -1);
3226 extlen = len - HTBL_BITS;
3228 p = &(tbl[ptn >> diffbits]);
3230 *p = len_avail + hf->tree_used;
3231 ht = &(hf->tree[hf->tree_used++]);
3232 if (hf->tree_used > hf->tree_avail)
3233 return (0);/* Invalid */
3237 if (*p < len_avail ||
3238 *p >= (len_avail + hf->tree_used))
3239 return (0);/* Invalid */
3240 ht = &(hf->tree[*p - len_avail]);
3242 while (--extlen > 0) {
3244 if (ht->left < len_avail) {
3245 ht->left = len_avail + hf->tree_used;
3246 ht = &(hf->tree[hf->tree_used++]);
3247 if (hf->tree_used > hf->tree_avail)
3248 return (0);/* Invalid */
3252 ht = &(hf->tree[ht->left - len_avail]);
3255 if (ht->right < len_avail) {
3256 ht->right = len_avail + hf->tree_used;
3257 ht = &(hf->tree[hf->tree_used++]);
3258 if (hf->tree_used > hf->tree_avail)
3259 return (0);/* Invalid */
3263 ht = &(hf->tree[ht->right - len_avail]);
3270 return (0);/* Invalid */
3271 ht->left = (uint16_t)i;
3274 return (0);/* Invalid */
3275 ht->right = (uint16_t)i;
3282 lzx_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
3288 extlen = hf->shift_bits;
3289 while (c >= hf->len_size) {
3291 if (extlen-- <= 0 || c >= hf->tree_used)
3293 if (rbits & (1U << extlen))
3302 lzx_decode_huffman(struct huffman *hf, unsigned rbits)
3306 * At first search an index table for a bit pattern.
3307 * If it fails, search a huffman tree for.
3309 c = hf->tbl[rbits >> hf->shift_bits];
3310 if (c < hf->len_size)
3312 /* This bit pattern needs to be found out at a huffman tree. */
3313 return (lzx_decode_huffman_tree(hf, rbits, c));