2 * Copyright (c) 2008-2014 Michihiro NAKAJIMA
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
42 #include "archive_entry.h"
43 #include "archive_entry_locale.h"
44 #include "archive_private.h"
45 #include "archive_read_private.h"
46 #include "archive_endian.h"
49 #define MAXMATCH 256 /* Maximum match length. */
50 #define MINMATCH 3 /* Minimum match length. */
52 * Literal table format:
54 * +---------------+-------------------------+
55 * | literal code | match length |
56 * | 0 ... 255 | MINMATCH ... MAXMATCH |
57 * +---------------+-------------------------+
58 * <--- LT_BITLEN_SIZE --->
60 /* Literal table size. */
61 #define LT_BITLEN_SIZE (UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
62 /* Position table size.
63 * Note: this used for both position table and pre literal table.*/
64 #define PT_BITLEN_SIZE (3 + 16)
67 /* Decoding status. */
71 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
76 /* Window buffer, which is a loop buffer. */
77 unsigned char *w_buff;
78 /* The insert position to the window. */
80 /* The position where we can copy decoded code from the window. */
82 /* The length how many bytes we can copy decoded code from
90 #define CACHE_TYPE uint64_t
91 #define CACHE_BITS (8 * sizeof(CACHE_TYPE))
93 CACHE_TYPE cache_buffer;
94 /* Indicates how many bits avail in cache_buffer. */
106 unsigned char *bitlen;
109 * Use a index table. It's faster than searching a huffman
110 * coding tree, which is a binary tree. But a use of a large
111 * index table causes L1 cache read miss many times.
119 /* Direct access table. */
121 /* Binary tree table for extra bits over the direct access. */
131 int literal_pt_len_size;
132 int literal_pt_len_bits;
133 int reading_position;
139 const unsigned char *next_in;
142 const unsigned char *ref_ptr;
149 /* entry_bytes_remaining is the number of bytes we expect. */
150 int64_t entry_offset;
151 int64_t entry_bytes_remaining;
152 int64_t entry_unconsumed;
153 uint16_t entry_crc_calculated;
155 size_t header_size; /* header size */
156 unsigned char level; /* header level */
157 char method[3]; /* compress type */
158 int64_t compsize; /* compressed data size */
159 int64_t origsize; /* original file size */
161 #define BIRTHTIME_IS_SET 1
162 #define ATIME_IS_SET 2
163 #define UNIX_MODE_IS_SET 4
166 long birthtime_tv_nsec;
174 struct archive_string uname;
175 struct archive_string gname;
178 /* dirname and filename could be in different codepages */
179 struct archive_string_conv *sconv_dir;
180 struct archive_string_conv *sconv_fname;
181 struct archive_string_conv *opt_sconv;
183 struct archive_string dirname;
184 struct archive_string filename;
185 struct archive_wstring ws;
187 unsigned char dos_attr;
189 /* Flag to mark progress that an archive was read their first header.*/
190 char found_first_header;
191 /* Flag to mark that indicates an empty directory. */
194 /* Flags to mark progress of decompression. */
195 char decompress_init;
197 char end_of_entry_cleanup;
198 char entry_is_compressed;
200 char format_name[64];
202 struct lzh_stream strm;
206 * LHA header common member offset.
208 #define H_METHOD_OFFSET 2 /* Compress type. */
209 #define H_ATTR_OFFSET 19 /* DOS attribute. */
210 #define H_LEVEL_OFFSET 20 /* Header Level. */
211 #define H_SIZE 22 /* Minimum header size. */
213 static int archive_read_format_lha_bid(struct archive_read *, int);
214 static int archive_read_format_lha_options(struct archive_read *,
215 const char *, const char *);
216 static int archive_read_format_lha_read_header(struct archive_read *,
217 struct archive_entry *);
218 static int archive_read_format_lha_read_data(struct archive_read *,
219 const void **, size_t *, int64_t *);
220 static int archive_read_format_lha_read_data_skip(struct archive_read *);
221 static int archive_read_format_lha_cleanup(struct archive_read *);
223 static void lha_replace_path_separator(struct lha *,
224 struct archive_entry *);
225 static int lha_read_file_header_0(struct archive_read *, struct lha *);
226 static int lha_read_file_header_1(struct archive_read *, struct lha *);
227 static int lha_read_file_header_2(struct archive_read *, struct lha *);
228 static int lha_read_file_header_3(struct archive_read *, struct lha *);
229 static int lha_read_file_extended_header(struct archive_read *,
230 struct lha *, uint16_t *, int, size_t, size_t *);
231 static size_t lha_check_header_format(const void *);
232 static int lha_skip_sfx(struct archive_read *);
233 static time_t lha_dos_time(const unsigned char *);
234 static time_t lha_win_time(uint64_t, long *);
235 static unsigned char lha_calcsum(unsigned char, const void *,
237 static int lha_parse_linkname(struct archive_wstring *,
238 struct archive_wstring *);
239 static int lha_read_data_none(struct archive_read *, const void **,
240 size_t *, int64_t *);
241 static int lha_read_data_lzh(struct archive_read *, const void **,
242 size_t *, int64_t *);
243 static void lha_crc16_init(void);
244 static uint16_t lha_crc16(uint16_t, const void *, size_t);
245 static int lzh_decode_init(struct lzh_stream *, const char *);
246 static void lzh_decode_free(struct lzh_stream *);
247 static int lzh_decode(struct lzh_stream *, int);
248 static int lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
249 static int lzh_huffman_init(struct huffman *, size_t, int);
250 static void lzh_huffman_free(struct huffman *);
251 static int lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
252 static int lzh_make_fake_table(struct huffman *, uint16_t);
253 static int lzh_make_huffman_table(struct huffman *);
254 static inline int lzh_decode_huffman(struct huffman *, unsigned);
255 static int lzh_decode_huffman_tree(struct huffman *, unsigned, int);
259 archive_read_support_format_lha(struct archive *_a)
261 struct archive_read *a = (struct archive_read *)_a;
265 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
266 ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
268 lha = (struct lha *)calloc(1, sizeof(*lha));
270 archive_set_error(&a->archive, ENOMEM,
271 "Can't allocate lha data");
272 return (ARCHIVE_FATAL);
274 archive_string_init(&lha->ws);
276 r = __archive_read_register_format(a,
279 archive_read_format_lha_bid,
280 archive_read_format_lha_options,
281 archive_read_format_lha_read_header,
282 archive_read_format_lha_read_data,
283 archive_read_format_lha_read_data_skip,
285 archive_read_format_lha_cleanup,
295 lha_check_header_format(const void *h)
297 const unsigned char *p = h;
298 size_t next_skip_bytes;
300 switch (p[H_METHOD_OFFSET+3]) {
302 * "-lh0-" ... "-lh7-" "-lhd-"
305 case '0': case '1': case '2': case '3':
306 case '4': case '5': case '6': case '7':
311 /* b0 == 0 means the end of an LHa archive file. */
314 if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
315 || p[H_METHOD_OFFSET+4] != '-')
318 if (p[H_METHOD_OFFSET+2] == 'h') {
320 if (p[H_METHOD_OFFSET+3] == 's')
322 if (p[H_LEVEL_OFFSET] == 0)
324 if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
327 if (p[H_METHOD_OFFSET+2] == 'z') {
328 /* LArc extensions: -lzs-,-lz4- and -lz5- */
329 if (p[H_LEVEL_OFFSET] != 0)
331 if (p[H_METHOD_OFFSET+3] == 's'
332 || p[H_METHOD_OFFSET+3] == '4'
333 || p[H_METHOD_OFFSET+3] == '5')
337 case 'h': next_skip_bytes = 1; break;
338 case 'z': next_skip_bytes = 1; break;
339 case 'l': next_skip_bytes = 2; break;
340 case '-': next_skip_bytes = 3; break;
341 default : next_skip_bytes = 4; break;
344 return (next_skip_bytes);
348 archive_read_format_lha_bid(struct archive_read *a, int best_bid)
352 ssize_t bytes_avail, offset, window;
355 /* If there's already a better bid than we can ever
356 make, don't bother testing. */
360 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
363 if (lha_check_header_format(p) == 0)
366 if (p[0] == 'M' && p[1] == 'Z') {
370 while (offset < (1024 * 20)) {
371 buff = __archive_read_ahead(a, offset + window,
374 /* Remaining bytes are less than window. */
376 if (window < (H_SIZE + 3))
380 p = (const char *)buff + offset;
381 while (p + H_SIZE < (const char *)buff + bytes_avail) {
382 if ((next = lha_check_header_format(p)) == 0)
386 offset = p - (const char *)buff;
393 archive_read_format_lha_options(struct archive_read *a,
394 const char *key, const char *val)
397 int ret = ARCHIVE_FAILED;
399 lha = (struct lha *)(a->format->data);
400 if (strcmp(key, "hdrcharset") == 0) {
401 if (val == NULL || val[0] == 0)
402 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
403 "lha: hdrcharset option needs a character-set name");
406 archive_string_conversion_from_charset(
407 &a->archive, val, 0);
408 if (lha->opt_sconv != NULL)
416 /* Note: The "warn" return is just to inform the options
417 * supervisor that we didn't handle it. It will generate
418 * a suitable error if no one used this option. */
419 return (ARCHIVE_WARN);
423 lha_skip_sfx(struct archive_read *a)
428 ssize_t bytes, window;
432 h = __archive_read_ahead(a, window, &bytes);
434 /* Remaining bytes are less than window. */
436 if (window < (H_SIZE + 3))
446 * Scan ahead until we find something that looks
447 * like the lha header.
449 while (p + H_SIZE < q) {
450 if ((next = lha_check_header_format(p)) == 0) {
451 skip = p - (const char *)h;
452 __archive_read_consume(a, skip);
457 skip = p - (const char *)h;
458 __archive_read_consume(a, skip);
461 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
462 "Couldn't find out LHa header");
463 return (ARCHIVE_FATAL);
467 truncated_error(struct archive_read *a)
469 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
470 "Truncated LHa header");
471 return (ARCHIVE_FATAL);
475 archive_read_format_lha_read_header(struct archive_read *a,
476 struct archive_entry *entry)
478 struct archive_wstring linkname;
479 struct archive_wstring pathname;
481 const unsigned char *p;
482 const char *signature;
484 struct archive_mstring conv_buffer;
485 const wchar_t *conv_buffer_p;
489 a->archive.archive_format = ARCHIVE_FORMAT_LHA;
490 if (a->archive.archive_format_name == NULL)
491 a->archive.archive_format_name = "lha";
493 lha = (struct lha *)(a->format->data);
494 lha->decompress_init = 0;
495 lha->end_of_entry = 0;
496 lha->end_of_entry_cleanup = 0;
497 lha->entry_unconsumed = 0;
499 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
501 * LHa archiver added 0 to the tail of its archive file as
502 * the mark of the end of the archive.
504 signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
505 if (signature == NULL || signature[0] == 0)
506 return (ARCHIVE_EOF);
507 return (truncated_error(a));
510 signature = (const char *)p;
511 if (lha->found_first_header == 0 &&
512 signature[0] == 'M' && signature[1] == 'Z') {
513 /* This is an executable? Must be self-extracting... */
514 err = lha_skip_sfx(a);
515 if (err < ARCHIVE_WARN)
518 if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
519 return (truncated_error(a));
520 signature = (const char *)p;
522 /* signature[0] == 0 means the end of an LHa archive file. */
523 if (signature[0] == 0)
524 return (ARCHIVE_EOF);
527 * Check the header format and method type.
529 if (lha_check_header_format(p) != 0) {
530 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
532 return (ARCHIVE_FATAL);
535 /* We've found the first header. */
536 lha->found_first_header = 1;
537 /* Set a default value and common data */
538 lha->header_size = 0;
539 lha->level = p[H_LEVEL_OFFSET];
540 lha->method[0] = p[H_METHOD_OFFSET+1];
541 lha->method[1] = p[H_METHOD_OFFSET+2];
542 lha->method[2] = p[H_METHOD_OFFSET+3];
543 if (memcmp(lha->method, "lhd", 3) == 0)
547 if (memcmp(lha->method, "lh0", 3) == 0 ||
548 memcmp(lha->method, "lz4", 3) == 0)
549 lha->entry_is_compressed = 0;
551 lha->entry_is_compressed = 1;
557 lha->birthtime_tv_nsec = 0;
559 lha->mtime_tv_nsec = 0;
561 lha->atime_tv_nsec = 0;
562 lha->mode = (lha->directory)? 0777 : 0666;
565 archive_string_empty(&lha->dirname);
566 archive_string_empty(&lha->filename);
568 if (lha->opt_sconv != NULL) {
569 lha->sconv_dir = lha->opt_sconv;
570 lha->sconv_fname = lha->opt_sconv;
572 lha->sconv_dir = NULL;
573 lha->sconv_fname = NULL;
576 switch (p[H_LEVEL_OFFSET]) {
578 err = lha_read_file_header_0(a, lha);
581 err = lha_read_file_header_1(a, lha);
584 err = lha_read_file_header_2(a, lha);
587 err = lha_read_file_header_3(a, lha);
590 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
591 "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
595 if (err < ARCHIVE_WARN)
599 if (!lha->directory && archive_strlen(&lha->filename) == 0)
600 /* The filename has not been set */
601 return (truncated_error(a));
604 * Make a pathname from a dirname and a filename, after converting to Unicode.
605 * This is because codepages might differ between dirname and filename.
607 archive_string_init(&pathname);
608 archive_string_init(&linkname);
609 archive_string_init(&conv_buffer.aes_mbs);
610 archive_string_init(&conv_buffer.aes_mbs_in_locale);
611 archive_string_init(&conv_buffer.aes_utf8);
612 archive_string_init(&conv_buffer.aes_wcs);
613 if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->dirname.s, lha->dirname.length, lha->sconv_dir)) {
614 archive_set_error(&a->archive,
615 ARCHIVE_ERRNO_FILE_FORMAT,
616 "Pathname cannot be converted "
617 "from %s to Unicode.",
618 archive_string_conversion_charset_name(lha->sconv_dir));
620 } else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
622 if (err == ARCHIVE_FATAL) {
623 archive_mstring_clean(&conv_buffer);
624 archive_wstring_free(&pathname);
625 archive_wstring_free(&linkname);
628 archive_wstring_copy(&pathname, &conv_buffer.aes_wcs);
630 archive_string_empty(&conv_buffer.aes_mbs);
631 archive_string_empty(&conv_buffer.aes_mbs_in_locale);
632 archive_string_empty(&conv_buffer.aes_utf8);
633 archive_wstring_empty(&conv_buffer.aes_wcs);
634 if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->filename.s, lha->filename.length, lha->sconv_fname)) {
635 archive_set_error(&a->archive,
636 ARCHIVE_ERRNO_FILE_FORMAT,
637 "Pathname cannot be converted "
638 "from %s to Unicode.",
639 archive_string_conversion_charset_name(lha->sconv_fname));
642 else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
644 if (err == ARCHIVE_FATAL) {
645 archive_mstring_clean(&conv_buffer);
646 archive_wstring_free(&pathname);
647 archive_wstring_free(&linkname);
650 archive_wstring_concat(&pathname, &conv_buffer.aes_wcs);
651 archive_mstring_clean(&conv_buffer);
653 if ((lha->mode & AE_IFMT) == AE_IFLNK) {
655 * Extract the symlink-name if it's included in the pathname.
657 if (!lha_parse_linkname(&linkname, &pathname)) {
658 /* We couldn't get the symlink-name. */
659 archive_set_error(&a->archive,
660 ARCHIVE_ERRNO_FILE_FORMAT,
661 "Unknown symlink-name");
662 archive_wstring_free(&pathname);
663 archive_wstring_free(&linkname);
664 return (ARCHIVE_FAILED);
668 * Make sure a file-type is set.
669 * The mode has been overridden if it is in the extended data.
671 lha->mode = (lha->mode & ~AE_IFMT) |
672 ((lha->directory)? AE_IFDIR: AE_IFREG);
674 if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
675 (lha->dos_attr & 1) != 0)
676 lha->mode &= ~(0222);/* read only. */
679 * Set basic file parameters.
681 archive_entry_copy_pathname_w(entry, pathname.s);
682 archive_wstring_free(&pathname);
683 if (archive_strlen(&linkname) > 0) {
684 archive_entry_copy_symlink_w(entry, linkname.s);
686 archive_entry_set_symlink(entry, NULL);
687 archive_wstring_free(&linkname);
689 * When a header level is 0, there is a possibility that
690 * a pathname and a symlink has '\' character, a directory
691 * separator in DOS/Windows. So we should convert it to '/'.
693 if (p[H_LEVEL_OFFSET] == 0)
694 lha_replace_path_separator(lha, entry);
696 archive_entry_set_mode(entry, lha->mode);
697 archive_entry_set_uid(entry, lha->uid);
698 archive_entry_set_gid(entry, lha->gid);
699 if (archive_strlen(&lha->uname) > 0)
700 archive_entry_set_uname(entry, lha->uname.s);
701 if (archive_strlen(&lha->gname) > 0)
702 archive_entry_set_gname(entry, lha->gname.s);
703 if (lha->setflag & BIRTHTIME_IS_SET) {
704 archive_entry_set_birthtime(entry, lha->birthtime,
705 lha->birthtime_tv_nsec);
706 archive_entry_set_ctime(entry, lha->birthtime,
707 lha->birthtime_tv_nsec);
709 archive_entry_unset_birthtime(entry);
710 archive_entry_unset_ctime(entry);
712 archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
713 if (lha->setflag & ATIME_IS_SET)
714 archive_entry_set_atime(entry, lha->atime,
717 archive_entry_unset_atime(entry);
718 if (lha->directory || archive_entry_symlink(entry) != NULL)
719 archive_entry_unset_size(entry);
721 archive_entry_set_size(entry, lha->origsize);
724 * Prepare variables used to read a file content.
726 lha->entry_bytes_remaining = lha->compsize;
727 if (lha->entry_bytes_remaining < 0) {
728 archive_set_error(&a->archive,
729 ARCHIVE_ERRNO_FILE_FORMAT,
730 "Invalid LHa entry size");
731 return (ARCHIVE_FATAL);
733 lha->entry_offset = 0;
734 lha->entry_crc_calculated = 0;
737 * This file does not have a content.
739 if (lha->directory || lha->compsize == 0)
740 lha->end_of_entry = 1;
742 sprintf(lha->format_name, "lha -%c%c%c-",
743 lha->method[0], lha->method[1], lha->method[2]);
744 a->archive.archive_format_name = lha->format_name;
750 * Replace a DOS path separator '\' by a character '/'.
751 * Some multi-byte character set have a character '\' in its second byte.
754 lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
759 if ((wp = archive_entry_pathname_w(entry)) != NULL) {
760 archive_wstrcpy(&(lha->ws), wp);
761 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
762 if (lha->ws.s[i] == L'\\')
765 archive_entry_copy_pathname_w(entry, lha->ws.s);
768 if ((wp = archive_entry_symlink_w(entry)) != NULL) {
769 archive_wstrcpy(&(lha->ws), wp);
770 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
771 if (lha->ws.s[i] == L'\\')
774 archive_entry_copy_symlink_w(entry, lha->ws.s);
782 * +---------------+----------+----------------+-------------------+
783 * |header size(*1)|header sum|compression type|compressed size(*2)|
784 * +---------------+----------+----------------+-------------------+
785 * <---------------------(*1)----------*
787 * +11 +15 +17 +19 +20 +21
788 * +-----------------+---------+---------+--------------+----------------+
789 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
790 * +-----------------+---------+---------+--------------+----------------+
791 * *--------------------------------(*1)---------------------------------*
793 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+2+(*4)
794 * +---------------+---------+----------+----------------+------------------+
795 * |name length(*3)|file name|file CRC16|extra header(*4)| compressed data |
796 * +---------------+---------+----------+----------------+------------------+
797 * <--(*3)-> <------(*2)------>
798 * *----------------------(*1)-------------------------->
801 #define H0_HEADER_SIZE_OFFSET 0
802 #define H0_HEADER_SUM_OFFSET 1
803 #define H0_COMP_SIZE_OFFSET 7
804 #define H0_ORIG_SIZE_OFFSET 11
805 #define H0_DOS_TIME_OFFSET 15
806 #define H0_NAME_LEN_OFFSET 21
807 #define H0_FILE_NAME_OFFSET 22
808 #define H0_FIXED_SIZE 24
810 lha_read_file_header_0(struct archive_read *a, struct lha *lha)
812 const unsigned char *p;
813 int extdsize, namelen;
814 unsigned char headersum, sum_calculated;
816 if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
817 return (truncated_error(a));
818 lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
819 headersum = p[H0_HEADER_SUM_OFFSET];
820 lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
821 lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
822 lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET);
823 namelen = p[H0_NAME_LEN_OFFSET];
824 extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
825 if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
826 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
827 "Invalid LHa header");
828 return (ARCHIVE_FATAL);
830 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
831 return (truncated_error(a));
833 archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
834 /* When extdsize == -2, A CRC16 value is not present in the header. */
836 lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
837 lha->setflag |= CRC_IS_SET;
839 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
841 /* Read an extended header */
843 /* This extended data is set by 'LHa for UNIX' only.
846 p += H0_FILE_NAME_OFFSET + namelen + 2;
847 if (p[0] == 'U' && extdsize == 12) {
848 /* p[1] is a minor version. */
849 lha->mtime = archive_le32dec(&p[2]);
850 lha->mode = archive_le16dec(&p[6]);
851 lha->uid = archive_le16dec(&p[8]);
852 lha->gid = archive_le16dec(&p[10]);
853 lha->setflag |= UNIX_MODE_IS_SET;
856 __archive_read_consume(a, lha->header_size);
858 if (sum_calculated != headersum) {
859 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
860 "LHa header sum error");
861 return (ARCHIVE_FATAL);
871 * +---------------+----------+----------------+-------------+
872 * |header size(*1)|header sum|compression type|skip size(*2)|
873 * +---------------+----------+----------------+-------------+
874 * <---------------(*1)----------*
876 * +11 +15 +17 +19 +20 +21
877 * +-----------------+---------+---------+--------------+----------------+
878 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
879 * +-----------------+---------+---------+--------------+----------------+
880 * *-------------------------------(*1)----------------------------------*
882 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+3 +22+(*3)+3+(*4)
883 * +---------------+---------+----------+-----------+-----------+
884 * |name length(*3)|file name|file CRC16| creator |padding(*4)|
885 * +---------------+---------+----------+-----------+-----------+
887 * *----------------------------(*1)----------------------------*
889 * +22+(*3)+3+(*4) +22+(*3)+3+(*4)+2 +22+(*3)+3+(*4)+2+(*5)
890 * +----------------+---------------------+------------------------+
891 * |next header size| extended header(*5) | compressed data |
892 * +----------------+---------------------+------------------------+
893 * *------(*1)-----> <--------------------(*2)-------------------->
895 #define H1_HEADER_SIZE_OFFSET 0
896 #define H1_HEADER_SUM_OFFSET 1
897 #define H1_COMP_SIZE_OFFSET 7
898 #define H1_ORIG_SIZE_OFFSET 11
899 #define H1_DOS_TIME_OFFSET 15
900 #define H1_NAME_LEN_OFFSET 21
901 #define H1_FILE_NAME_OFFSET 22
902 #define H1_FIXED_SIZE 27
904 lha_read_file_header_1(struct archive_read *a, struct lha *lha)
906 const unsigned char *p;
909 int namelen, padding;
910 unsigned char headersum, sum_calculated;
914 if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
915 return (truncated_error(a));
917 lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
918 headersum = p[H1_HEADER_SUM_OFFSET];
919 /* Note: An extended header size is included in a compsize. */
920 lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
921 lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
922 lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET);
923 namelen = p[H1_NAME_LEN_OFFSET];
924 /* Calculate a padding size. The result will be normally 0 only(?) */
925 padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
927 if (namelen > 230 || padding < 0)
930 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
931 return (truncated_error(a));
933 for (i = 0; i < namelen; i++) {
934 if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
935 goto invalid;/* Invalid filename. */
937 archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
938 lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
939 lha->setflag |= CRC_IS_SET;
941 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
942 /* Consume used bytes but not include `next header size' data
943 * since it will be consumed in lha_read_file_extended_header(). */
944 __archive_read_consume(a, lha->header_size - 2);
946 /* Read extended headers */
947 err2 = lha_read_file_extended_header(a, lha, NULL, 2,
948 (size_t)(lha->compsize + 2), &extdsize);
949 if (err2 < ARCHIVE_WARN)
953 /* Get a real compressed file size. */
954 lha->compsize -= extdsize - 2;
956 if (lha->compsize < 0)
957 goto invalid; /* Invalid compressed file size */
959 if (sum_calculated != headersum) {
960 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
961 "LHa header sum error");
962 return (ARCHIVE_FATAL);
966 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
967 "Invalid LHa header");
968 return (ARCHIVE_FATAL);
975 * +---------------+----------------+-------------------+-----------------+
976 * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
977 * +---------------+----------------+-------------------+-----------------+
978 * <--------------------------------(*1)---------------------------------*
980 * +15 +19 +20 +21 +23 +24
981 * +-----------------+------------+----------------+----------+-----------+
982 * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16| creator |
983 * +-----------------+------------+----------------+----------+-----------+
984 * *---------------------------------(*1)---------------------------------*
986 * +24 +26 +26+(*3) +26+(*3)+(*4)
987 * +----------------+-------------------+-------------+-------------------+
988 * |next header size|extended header(*3)| padding(*4) | compressed data |
989 * +----------------+-------------------+-------------+-------------------+
990 * *--------------------------(*1)-------------------> <------(*2)------->
993 #define H2_HEADER_SIZE_OFFSET 0
994 #define H2_COMP_SIZE_OFFSET 7
995 #define H2_ORIG_SIZE_OFFSET 11
996 #define H2_TIME_OFFSET 15
997 #define H2_CRC_OFFSET 21
998 #define H2_FIXED_SIZE 24
1000 lha_read_file_header_2(struct archive_read *a, struct lha *lha)
1002 const unsigned char *p;
1005 uint16_t header_crc;
1007 if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
1008 return (truncated_error(a));
1010 lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
1011 lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
1012 lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
1013 lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
1014 lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
1015 lha->setflag |= CRC_IS_SET;
1017 if (lha->header_size < H2_FIXED_SIZE) {
1018 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1019 "Invalid LHa header size");
1020 return (ARCHIVE_FATAL);
1023 header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
1024 __archive_read_consume(a, H2_FIXED_SIZE);
1026 /* Read extended headers */
1027 err = lha_read_file_extended_header(a, lha, &header_crc, 2,
1028 lha->header_size - H2_FIXED_SIZE, &extdsize);
1029 if (err < ARCHIVE_WARN)
1032 /* Calculate a padding size. The result will be normally 0 or 1. */
1033 padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1035 if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1036 return (truncated_error(a));
1037 header_crc = lha_crc16(header_crc, p, padding);
1038 __archive_read_consume(a, padding);
1041 if (header_crc != lha->header_crc) {
1042 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1043 "LHa header CRC error");
1044 return (ARCHIVE_FATAL);
1053 * +------------+----------------+-------------------+-----------------+
1054 * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1055 * +------------+----------------+-------------------+-----------------+
1056 * <-------------------------------(*1)-------------------------------*
1058 * +15 +19 +20 +21 +23 +24
1059 * +-----------------+------------+----------------+----------+-----------+
1060 * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16| creator |
1061 * +-----------------+------------+----------------+----------+-----------+
1062 * *--------------------------------(*1)----------------------------------*
1064 * +24 +28 +32 +32+(*3)
1065 * +---------------+----------------+-------------------+-----------------+
1066 * |header size(*1)|next header size|extended header(*3)| compressed data |
1067 * +---------------+----------------+-------------------+-----------------+
1068 * *------------------------(*1)-----------------------> <------(*2)----->
1071 #define H3_FIELD_LEN_OFFSET 0
1072 #define H3_COMP_SIZE_OFFSET 7
1073 #define H3_ORIG_SIZE_OFFSET 11
1074 #define H3_TIME_OFFSET 15
1075 #define H3_CRC_OFFSET 21
1076 #define H3_HEADER_SIZE_OFFSET 24
1077 #define H3_FIXED_SIZE 28
1079 lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1081 const unsigned char *p;
1084 uint16_t header_crc;
1086 if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1087 return (truncated_error(a));
1089 if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1091 lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1092 lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1093 lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1094 lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1095 lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1096 lha->setflag |= CRC_IS_SET;
1098 if (lha->header_size < H3_FIXED_SIZE + 4)
1100 header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1101 __archive_read_consume(a, H3_FIXED_SIZE);
1103 /* Read extended headers */
1104 err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1105 lha->header_size - H3_FIXED_SIZE, &extdsize);
1106 if (err < ARCHIVE_WARN)
1109 if (header_crc != lha->header_crc) {
1110 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1111 "LHa header CRC error");
1112 return (ARCHIVE_FATAL);
1116 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1117 "Invalid LHa header");
1118 return (ARCHIVE_FATAL);
1122 * Extended header format
1124 * +0 +2 +3 -- used in header 1 and 2
1125 * +0 +4 +5 -- used in header 3
1126 * +--------------+---------+-------------------+--------------+--
1127 * |ex-header size|header id| data |ex-header size| .......
1128 * +--------------+---------+-------------------+--------------+--
1129 * <-------------( ex-header size)------------> <-- next extended header --*
1131 * If the ex-header size is zero, it is the make of the end of extended
1136 lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1137 uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size)
1140 const unsigned char *extdheader;
1144 unsigned char extdtype;
1146 #define EXT_HEADER_CRC 0x00 /* Header CRC and information*/
1147 #define EXT_FILENAME 0x01 /* Filename */
1148 #define EXT_DIRECTORY 0x02 /* Directory name */
1149 #define EXT_DOS_ATTR 0x40 /* MS-DOS attribute */
1150 #define EXT_TIMESTAMP 0x41 /* Windows time stamp */
1151 #define EXT_FILESIZE 0x42 /* Large file size */
1152 #define EXT_TIMEZONE 0x43 /* Time zone */
1153 #define EXT_UTF16_FILENAME 0x44 /* UTF-16 filename */
1154 #define EXT_UTF16_DIRECTORY 0x45 /* UTF-16 directory name */
1155 #define EXT_CODEPAGE 0x46 /* Codepage */
1156 #define EXT_UNIX_MODE 0x50 /* File permission */
1157 #define EXT_UNIX_GID_UID 0x51 /* gid,uid */
1158 #define EXT_UNIX_GNAME 0x52 /* Group name */
1159 #define EXT_UNIX_UNAME 0x53 /* User name */
1160 #define EXT_UNIX_MTIME 0x54 /* Modified time */
1161 #define EXT_OS2_NEW_ATTR 0x7f /* new attribute(OS/2 only) */
1162 #define EXT_NEW_ATTR 0xff /* new attribute */
1164 *total_size = sizefield_length;
1167 /* Read an extended header size. */
1169 __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1170 return (truncated_error(a));
1171 /* Check if the size is the zero indicates the end of the
1172 * extended header. */
1173 if (sizefield_length == sizeof(uint16_t))
1174 extdsize = archive_le16dec(h);
1176 extdsize = archive_le32dec(h);
1177 if (extdsize == 0) {
1178 /* End of extended header */
1180 *crc = lha_crc16(*crc, h, sizefield_length);
1181 __archive_read_consume(a, sizefield_length);
1182 return (ARCHIVE_OK);
1185 /* Sanity check to the extended header size. */
1186 if (((uint64_t)*total_size + extdsize) >
1187 (uint64_t)limitsize ||
1188 extdsize <= (size_t)sizefield_length)
1191 /* Read the extended header. */
1192 if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1193 return (truncated_error(a));
1194 *total_size += extdsize;
1196 extdheader = (const unsigned char *)h;
1197 /* Get the extended header type. */
1198 extdtype = extdheader[sizefield_length];
1199 /* Calculate an extended data size. */
1200 datasize = extdsize - (1 + sizefield_length);
1201 /* Skip an extended header size field and type field. */
1202 extdheader += sizefield_length + 1;
1204 if (crc != NULL && extdtype != EXT_HEADER_CRC)
1205 *crc = lha_crc16(*crc, h, extdsize);
1207 case EXT_HEADER_CRC:
1208 /* We only use a header CRC. Following data will not
1210 if (datasize >= 2) {
1211 lha->header_crc = archive_le16dec(extdheader);
1213 static const char zeros[2] = {0, 0};
1214 *crc = lha_crc16(*crc, h,
1215 extdsize - datasize);
1216 /* CRC value itself as zero */
1217 *crc = lha_crc16(*crc, zeros, 2);
1218 *crc = lha_crc16(*crc,
1219 extdheader+2, datasize - 2);
1224 if (datasize == 0) {
1225 /* maybe directory header */
1226 archive_string_empty(&lha->filename);
1229 if (extdheader[0] == '\0')
1231 archive_strncpy(&lha->filename,
1232 (const char *)extdheader, datasize);
1234 case EXT_UTF16_FILENAME:
1235 if (datasize == 0) {
1236 /* maybe directory header */
1237 archive_string_empty(&lha->filename);
1239 } else if (datasize & 1) {
1240 /* UTF-16 characters take always 2 or 4 bytes */
1243 if (extdheader[0] == '\0')
1245 archive_string_empty(&lha->filename);
1246 archive_array_append(&lha->filename,
1247 (const char *)extdheader, datasize);
1248 /* Setup a string conversion for a filename. */
1250 archive_string_conversion_from_charset(&a->archive,
1252 if (lha->sconv_fname == NULL)
1253 return (ARCHIVE_FATAL);
1256 if (datasize == 0 || extdheader[0] == '\0')
1257 /* no directory name data. exit this case. */
1260 archive_strncpy(&lha->dirname,
1261 (const char *)extdheader, datasize);
1263 * Convert directory delimiter from 0xFF
1264 * to '/' for local system.
1266 for (i = 0; i < lha->dirname.length; i++) {
1267 if ((unsigned char)lha->dirname.s[i] == 0xFF)
1268 lha->dirname.s[i] = '/';
1270 /* Is last character directory separator? */
1271 if (lha->dirname.s[lha->dirname.length-1] != '/')
1272 /* invalid directory data */
1275 case EXT_UTF16_DIRECTORY:
1276 /* UTF-16 characters take always 2 or 4 bytes */
1277 if (datasize == 0 || (datasize & 1) ||
1278 extdheader[0] == '\0') {
1279 /* no directory name data. exit this case. */
1283 archive_string_empty(&lha->dirname);
1284 archive_array_append(&lha->dirname,
1285 (const char *)extdheader, datasize);
1287 archive_string_conversion_from_charset(&a->archive,
1289 if (lha->sconv_dir == NULL)
1290 return (ARCHIVE_FATAL);
1293 * Convert directory delimiter from 0xFFFF
1294 * to '/' for local system.
1298 if (archive_be16dec(&d) == 1)
1303 /* UTF-16LE character */
1304 uint16_t *utf16name =
1305 (uint16_t *)lha->dirname.s;
1306 for (i = 0; i < lha->dirname.length / 2; i++) {
1307 if (utf16name[i] == 0xFFFF) {
1308 utf16name[i] = dirSep;
1311 /* Is last character directory separator? */
1312 if (utf16name[lha->dirname.length / 2 - 1] !=
1314 /* invalid directory data */
1321 lha->dos_attr = (unsigned char)
1322 (archive_le16dec(extdheader) & 0xff);
1325 if (datasize == (sizeof(uint64_t) * 3)) {
1326 lha->birthtime = lha_win_time(
1327 archive_le64dec(extdheader),
1328 &lha->birthtime_tv_nsec);
1329 extdheader += sizeof(uint64_t);
1330 lha->mtime = lha_win_time(
1331 archive_le64dec(extdheader),
1332 &lha->mtime_tv_nsec);
1333 extdheader += sizeof(uint64_t);
1334 lha->atime = lha_win_time(
1335 archive_le64dec(extdheader),
1336 &lha->atime_tv_nsec);
1337 lha->setflag |= BIRTHTIME_IS_SET |
1342 if (datasize == sizeof(uint64_t) * 2) {
1343 lha->compsize = archive_le64dec(extdheader);
1344 extdheader += sizeof(uint64_t);
1345 lha->origsize = archive_le64dec(extdheader);
1349 /* Get an archived filename charset from codepage.
1350 * This overwrites the charset specified by
1351 * hdrcharset option. */
1352 if (datasize == sizeof(uint32_t)) {
1353 struct archive_string cp;
1354 const char *charset;
1356 archive_string_init(&cp);
1357 switch (archive_le32dec(extdheader)) {
1358 case 65001: /* UTF-8 */
1362 archive_string_sprintf(&cp, "CP%d",
1363 (int)archive_le32dec(extdheader));
1368 archive_string_conversion_from_charset(
1369 &(a->archive), charset, 1);
1371 archive_string_conversion_from_charset(
1372 &(a->archive), charset, 1);
1373 archive_string_free(&cp);
1374 if (lha->sconv_dir == NULL)
1375 return (ARCHIVE_FATAL);
1376 if (lha->sconv_fname == NULL)
1377 return (ARCHIVE_FATAL);
1381 if (datasize == sizeof(uint16_t)) {
1382 lha->mode = archive_le16dec(extdheader);
1383 lha->setflag |= UNIX_MODE_IS_SET;
1386 case EXT_UNIX_GID_UID:
1387 if (datasize == (sizeof(uint16_t) * 2)) {
1388 lha->gid = archive_le16dec(extdheader);
1389 lha->uid = archive_le16dec(extdheader+2);
1392 case EXT_UNIX_GNAME:
1394 archive_strncpy(&lha->gname,
1395 (const char *)extdheader, datasize);
1397 case EXT_UNIX_UNAME:
1399 archive_strncpy(&lha->uname,
1400 (const char *)extdheader, datasize);
1402 case EXT_UNIX_MTIME:
1403 if (datasize == sizeof(uint32_t))
1404 lha->mtime = archive_le32dec(extdheader);
1406 case EXT_OS2_NEW_ATTR:
1407 /* This extended header is OS/2 depend. */
1408 if (datasize == 16) {
1409 lha->dos_attr = (unsigned char)
1410 (archive_le16dec(extdheader) & 0xff);
1411 lha->mode = archive_le16dec(extdheader+2);
1412 lha->gid = archive_le16dec(extdheader+4);
1413 lha->uid = archive_le16dec(extdheader+6);
1414 lha->birthtime = archive_le32dec(extdheader+8);
1415 lha->atime = archive_le32dec(extdheader+12);
1416 lha->setflag |= UNIX_MODE_IS_SET
1417 | BIRTHTIME_IS_SET | ATIME_IS_SET;
1421 if (datasize == 20) {
1422 lha->mode = (mode_t)archive_le32dec(extdheader);
1423 lha->gid = archive_le32dec(extdheader+4);
1424 lha->uid = archive_le32dec(extdheader+8);
1425 lha->birthtime = archive_le32dec(extdheader+12);
1426 lha->atime = archive_le32dec(extdheader+16);
1427 lha->setflag |= UNIX_MODE_IS_SET
1428 | BIRTHTIME_IS_SET | ATIME_IS_SET;
1431 case EXT_TIMEZONE: /* Not supported */
1437 __archive_read_consume(a, extdsize);
1440 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1441 "Invalid extended LHa header");
1442 return (ARCHIVE_FATAL);
1446 lha_end_of_entry(struct archive_read *a)
1448 struct lha *lha = (struct lha *)(a->format->data);
1449 int r = ARCHIVE_EOF;
1451 if (!lha->end_of_entry_cleanup) {
1452 if ((lha->setflag & CRC_IS_SET) &&
1453 lha->crc != lha->entry_crc_calculated) {
1454 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1455 "LHa data CRC error");
1459 /* End-of-entry cleanup done. */
1460 lha->end_of_entry_cleanup = 1;
1466 archive_read_format_lha_read_data(struct archive_read *a,
1467 const void **buff, size_t *size, int64_t *offset)
1469 struct lha *lha = (struct lha *)(a->format->data);
1472 if (lha->entry_unconsumed) {
1473 /* Consume as much as the decompressor actually used. */
1474 __archive_read_consume(a, lha->entry_unconsumed);
1475 lha->entry_unconsumed = 0;
1477 if (lha->end_of_entry) {
1478 *offset = lha->entry_offset;
1481 return (lha_end_of_entry(a));
1484 if (lha->entry_is_compressed)
1485 r = lha_read_data_lzh(a, buff, size, offset);
1487 /* No compression. */
1488 r = lha_read_data_none(a, buff, size, offset);
1493 * Read a file content in no compression.
1495 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1496 * lha->end_of_entry if it consumes all of the data.
1499 lha_read_data_none(struct archive_read *a, const void **buff,
1500 size_t *size, int64_t *offset)
1502 struct lha *lha = (struct lha *)(a->format->data);
1503 ssize_t bytes_avail;
1505 if (lha->entry_bytes_remaining == 0) {
1508 *offset = lha->entry_offset;
1509 lha->end_of_entry = 1;
1510 return (ARCHIVE_OK);
1513 * Note: '1' here is a performance optimization.
1514 * Recall that the decompression layer returns a count of
1515 * available bytes; asking for more than that forces the
1516 * decompressor to combine reads by copying data.
1518 *buff = __archive_read_ahead(a, 1, &bytes_avail);
1519 if (bytes_avail <= 0) {
1520 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1521 "Truncated LHa file data");
1522 return (ARCHIVE_FATAL);
1524 if (bytes_avail > lha->entry_bytes_remaining)
1525 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1526 lha->entry_crc_calculated =
1527 lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1528 *size = bytes_avail;
1529 *offset = lha->entry_offset;
1530 lha->entry_offset += bytes_avail;
1531 lha->entry_bytes_remaining -= bytes_avail;
1532 if (lha->entry_bytes_remaining == 0)
1533 lha->end_of_entry = 1;
1534 lha->entry_unconsumed = bytes_avail;
1535 return (ARCHIVE_OK);
1539 * Read a file content in LZHUFF encoding.
1541 * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1542 * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1546 lha_read_data_lzh(struct archive_read *a, const void **buff,
1547 size_t *size, int64_t *offset)
1549 struct lha *lha = (struct lha *)(a->format->data);
1550 ssize_t bytes_avail;
1553 /* If we haven't yet read any data, initialize the decompressor. */
1554 if (!lha->decompress_init) {
1555 r = lzh_decode_init(&(lha->strm), lha->method);
1559 case ARCHIVE_FAILED:
1560 /* Unsupported compression. */
1564 archive_set_error(&a->archive,
1565 ARCHIVE_ERRNO_FILE_FORMAT,
1566 "Unsupported lzh compression method -%c%c%c-",
1567 lha->method[0], lha->method[1], lha->method[2]);
1568 /* We know compressed size; just skip it. */
1569 archive_read_format_lha_read_data_skip(a);
1570 return (ARCHIVE_WARN);
1572 archive_set_error(&a->archive, ENOMEM,
1573 "Couldn't allocate memory "
1574 "for lzh decompression");
1575 return (ARCHIVE_FATAL);
1577 /* We've initialized decompression for this stream. */
1578 lha->decompress_init = 1;
1579 lha->strm.avail_out = 0;
1580 lha->strm.total_out = 0;
1584 * Note: '1' here is a performance optimization.
1585 * Recall that the decompression layer returns a count of
1586 * available bytes; asking for more than that forces the
1587 * decompressor to combine reads by copying data.
1589 lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1590 if (bytes_avail <= 0) {
1591 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1592 "Truncated LHa file body");
1593 return (ARCHIVE_FATAL);
1595 if (bytes_avail > lha->entry_bytes_remaining)
1596 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1598 lha->strm.avail_in = (int)bytes_avail;
1599 lha->strm.total_in = 0;
1600 lha->strm.avail_out = 0;
1602 r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1607 lha->end_of_entry = 1;
1610 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1612 return (ARCHIVE_FAILED);
1614 lha->entry_unconsumed = lha->strm.total_in;
1615 lha->entry_bytes_remaining -= lha->strm.total_in;
1617 if (lha->strm.avail_out) {
1618 *offset = lha->entry_offset;
1619 *size = lha->strm.avail_out;
1620 *buff = lha->strm.ref_ptr;
1621 lha->entry_crc_calculated =
1622 lha_crc16(lha->entry_crc_calculated, *buff, *size);
1623 lha->entry_offset += *size;
1625 *offset = lha->entry_offset;
1628 if (lha->end_of_entry)
1629 return (lha_end_of_entry(a));
1631 return (ARCHIVE_OK);
1635 * Skip a file content.
1638 archive_read_format_lha_read_data_skip(struct archive_read *a)
1641 int64_t bytes_skipped;
1643 lha = (struct lha *)(a->format->data);
1645 if (lha->entry_unconsumed) {
1646 /* Consume as much as the decompressor actually used. */
1647 __archive_read_consume(a, lha->entry_unconsumed);
1648 lha->entry_unconsumed = 0;
1651 /* if we've already read to end of data, we're done. */
1652 if (lha->end_of_entry_cleanup)
1653 return (ARCHIVE_OK);
1656 * If the length is at the beginning, we can skip the
1657 * compressed data much more quickly.
1659 bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1660 if (bytes_skipped < 0)
1661 return (ARCHIVE_FATAL);
1663 /* This entry is finished and done. */
1664 lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1665 return (ARCHIVE_OK);
1669 archive_read_format_lha_cleanup(struct archive_read *a)
1671 struct lha *lha = (struct lha *)(a->format->data);
1673 lzh_decode_free(&(lha->strm));
1674 archive_string_free(&(lha->dirname));
1675 archive_string_free(&(lha->filename));
1676 archive_string_free(&(lha->uname));
1677 archive_string_free(&(lha->gname));
1678 archive_wstring_free(&(lha->ws));
1680 (a->format->data) = NULL;
1681 return (ARCHIVE_OK);
1685 * 'LHa for UNIX' utility has archived a symbolic-link name after
1686 * a pathname with '|' character.
1687 * This function extracts the symbolic-link name from the pathname.
1690 * 1. a symbolic-name is 'aaa/bb/cc'
1691 * 2. a filename is 'xxx/bbb'
1692 * then a archived pathname is 'xxx/bbb|aaa/bb/cc'
1695 lha_parse_linkname(struct archive_wstring *linkname,
1696 struct archive_wstring *pathname)
1701 linkptr = wcschr(pathname->s, L'|');
1702 if (linkptr != NULL) {
1703 symlen = wcslen(linkptr + 1);
1704 archive_wstrncpy(linkname, linkptr+1, symlen);
1707 pathname->length = wcslen(pathname->s);
1714 /* Convert an MSDOS-style date/time into Unix-style time. */
1716 lha_dos_time(const unsigned char *p)
1721 msTime = archive_le16dec(p);
1722 msDate = archive_le16dec(p+2);
1724 memset(&ts, 0, sizeof(ts));
1725 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */
1726 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */
1727 ts.tm_mday = msDate & 0x1f; /* Day of month. */
1728 ts.tm_hour = (msTime >> 11) & 0x1f;
1729 ts.tm_min = (msTime >> 5) & 0x3f;
1730 ts.tm_sec = (msTime << 1) & 0x3e;
1732 return (mktime(&ts));
1735 /* Convert an MS-Windows-style date/time into Unix-style time. */
1737 lha_win_time(uint64_t wintime, long *ns)
1739 #define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000)
1741 if (wintime >= EPOC_TIME) {
1742 wintime -= EPOC_TIME; /* 1970-01-01 00:00:00 (UTC) */
1744 *ns = (long)(wintime % 10000000) * 100;
1745 return (wintime / 10000000);
1753 static unsigned char
1754 lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1756 unsigned char const *p = (unsigned char const *)pp;
1759 for (;size > 0; --size)
1764 static uint16_t crc16tbl[2][256];
1766 lha_crc16_init(void)
1769 static int crc16init = 0;
1775 for (i = 0; i < 256; i++) {
1777 uint16_t crc = (uint16_t)i;
1779 crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1780 crc16tbl[0][i] = crc;
1783 for (i = 0; i < 256; i++) {
1784 crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1785 ^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1790 lha_crc16(uint16_t crc, const void *pp, size_t len)
1792 const unsigned char *p = (const unsigned char *)pp;
1793 const uint16_t *buff;
1797 } u = { 0x01020304 };
1802 /* Process unaligned address. */
1803 if (((uintptr_t)p) & (uintptr_t)0x1) {
1804 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1807 buff = (const uint16_t *)p;
1809 * Modern C compiler such as GCC does not unroll automatically yet
1810 * without unrolling pragma, and Clang is so. So we should
1811 * unroll this loop for its performance.
1813 for (;len >= 8; len -= 8) {
1814 /* This if statement expects compiler optimization will
1815 * remove the statement which will not be executed. */
1817 #if defined(_MSC_VER) && _MSC_VER >= 1400 /* Visual Studio */
1818 # define bswap16(x) _byteswap_ushort(x)
1819 #elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
1820 /* GCC 4.8 and later has __builtin_bswap16() */
1821 # define bswap16(x) __builtin_bswap16(x)
1822 #elif defined(__clang__)
1823 /* All clang versions have __builtin_bswap16() */
1824 # define bswap16(x) __builtin_bswap16(x)
1826 # define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1828 #define CRC16W do { \
1829 if(u.c[0] == 1) { /* Big endian */ \
1830 crc ^= bswap16(*buff); buff++; \
1833 crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1843 p = (const unsigned char *)buff;
1845 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1851 * Initialize LZHUF decoder.
1853 * Returns ARCHIVE_OK if initialization was successful.
1854 * Returns ARCHIVE_FAILED if method is unsupported.
1855 * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1859 lzh_decode_init(struct lzh_stream *strm, const char *method)
1864 if (strm->ds == NULL) {
1865 strm->ds = calloc(1, sizeof(*strm->ds));
1866 if (strm->ds == NULL)
1867 return (ARCHIVE_FATAL);
1870 ds->error = ARCHIVE_FAILED;
1871 if (method == NULL || method[0] != 'l' || method[1] != 'h')
1872 return (ARCHIVE_FAILED);
1873 switch (method[2]) {
1875 w_bits = 13;/* 8KiB for window */
1878 w_bits = 15;/* 32KiB for window */
1881 w_bits = 16;/* 64KiB for window */
1884 return (ARCHIVE_FAILED);/* Not supported. */
1886 ds->error = ARCHIVE_FATAL;
1887 /* Expand a window size up to 128 KiB for decompressing process
1888 * performance whatever its original window size is. */
1889 ds->w_size = 1U << 17;
1890 ds->w_mask = ds->w_size -1;
1891 if (ds->w_buff == NULL) {
1892 ds->w_buff = malloc(ds->w_size);
1893 if (ds->w_buff == NULL)
1894 return (ARCHIVE_FATAL);
1896 w_size = 1U << w_bits;
1897 memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1900 ds->pos_pt_len_size = w_bits + 1;
1901 ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1902 ds->literal_pt_len_size = PT_BITLEN_SIZE;
1903 ds->literal_pt_len_bits = 5;
1904 ds->br.cache_buffer = 0;
1905 ds->br.cache_avail = 0;
1907 if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1909 return (ARCHIVE_FATAL);
1910 ds->lt.len_bits = 9;
1911 if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1913 return (ARCHIVE_FATAL);
1916 return (ARCHIVE_OK);
1920 * Release LZHUF decoder.
1923 lzh_decode_free(struct lzh_stream *strm)
1926 if (strm->ds == NULL)
1928 free(strm->ds->w_buff);
1929 lzh_huffman_free(&(strm->ds->lt));
1930 lzh_huffman_free(&(strm->ds->pt));
1936 * Bit stream reader.
1938 /* Check that the cache buffer has enough bits. */
1939 #define lzh_br_has(br, n) ((br)->cache_avail >= n)
1940 /* Get compressed data by bit. */
1941 #define lzh_br_bits(br, n) \
1942 (((uint16_t)((br)->cache_buffer >> \
1943 ((br)->cache_avail - (n)))) & cache_masks[n])
1944 #define lzh_br_bits_forced(br, n) \
1945 (((uint16_t)((br)->cache_buffer << \
1946 ((n) - (br)->cache_avail))) & cache_masks[n])
1947 /* Read ahead to make sure the cache buffer has enough compressed data we
1949 * True : completed, there is enough data in the cache buffer.
1950 * False : we met that strm->next_in is empty, we have to get following
1952 #define lzh_br_read_ahead_0(strm, br, n) \
1953 (lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1954 /* True : the cache buffer has some bits as much as we need.
1955 * False : there are no enough bits in the cache buffer to be used,
1956 * we have to get following bytes if we could. */
1957 #define lzh_br_read_ahead(strm, br, n) \
1958 (lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1960 /* Notify how many bits we consumed. */
1961 #define lzh_br_consume(br, n) ((br)->cache_avail -= (n))
1962 #define lzh_br_unconsume(br, n) ((br)->cache_avail += (n))
1964 static const uint16_t cache_masks[] = {
1965 0x0000, 0x0001, 0x0003, 0x0007,
1966 0x000F, 0x001F, 0x003F, 0x007F,
1967 0x00FF, 0x01FF, 0x03FF, 0x07FF,
1968 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1969 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1973 * Shift away used bits in the cache data and fill it up with following bits.
1974 * Call this when cache buffer does not have enough bits you need.
1976 * Returns 1 if the cache buffer is full.
1977 * Returns 0 if the cache buffer is not full; input buffer is empty.
1980 lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1982 int n = CACHE_BITS - br->cache_avail;
1985 const int x = n >> 3;
1986 if (strm->avail_in >= x) {
1990 ((uint64_t)strm->next_in[0]) << 56 |
1991 ((uint64_t)strm->next_in[1]) << 48 |
1992 ((uint64_t)strm->next_in[2]) << 40 |
1993 ((uint64_t)strm->next_in[3]) << 32 |
1994 ((uint32_t)strm->next_in[4]) << 24 |
1995 ((uint32_t)strm->next_in[5]) << 16 |
1996 ((uint32_t)strm->next_in[6]) << 8 |
1997 (uint32_t)strm->next_in[7];
1999 strm->avail_in -= 8;
2000 br->cache_avail += 8 * 8;
2004 (br->cache_buffer << 56) |
2005 ((uint64_t)strm->next_in[0]) << 48 |
2006 ((uint64_t)strm->next_in[1]) << 40 |
2007 ((uint64_t)strm->next_in[2]) << 32 |
2008 ((uint32_t)strm->next_in[3]) << 24 |
2009 ((uint32_t)strm->next_in[4]) << 16 |
2010 ((uint32_t)strm->next_in[5]) << 8 |
2011 (uint32_t)strm->next_in[6];
2013 strm->avail_in -= 7;
2014 br->cache_avail += 7 * 8;
2018 (br->cache_buffer << 48) |
2019 ((uint64_t)strm->next_in[0]) << 40 |
2020 ((uint64_t)strm->next_in[1]) << 32 |
2021 ((uint32_t)strm->next_in[2]) << 24 |
2022 ((uint32_t)strm->next_in[3]) << 16 |
2023 ((uint32_t)strm->next_in[4]) << 8 |
2024 (uint32_t)strm->next_in[5];
2026 strm->avail_in -= 6;
2027 br->cache_avail += 6 * 8;
2030 /* We have enough compressed data in
2031 * the cache buffer.*/
2037 if (strm->avail_in == 0) {
2038 /* There is not enough compressed data to fill up the
2043 (br->cache_buffer << 8) | *strm->next_in++;
2045 br->cache_avail += 8;
2053 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
2054 * Please set available buffer and call this function again.
2055 * 2. Returns ARCHIVE_EOF if decompression has been completed.
2056 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
2057 * is broken or you do not set 'last' flag properly.
2058 * 4. 'last' flag is very important, you must set 1 to the flag if there
2059 * is no input data. The lha compressed data format does not provide how
2060 * to know the compressed data is really finished.
2061 * Note: lha command utility check if the total size of output bytes is
2062 * reached the uncompressed size recorded in its header. it does not mind
2063 * that the decoding process is properly finished.
2064 * GNU ZIP can decompress another compressed file made by SCO LZH compress.
2065 * it handles EOF as null to fill read buffer with zero until the decoding
2066 * process meet 2 bytes of zeros at reading a size of a next chunk, so the
2067 * zeros are treated as the mark of the end of the data although the zeros
2068 * is dummy, not the file data.
2070 static int lzh_read_blocks(struct lzh_stream *, int);
2071 static int lzh_decode_blocks(struct lzh_stream *, int);
2072 #define ST_RD_BLOCK 0
2073 #define ST_RD_PT_1 1
2074 #define ST_RD_PT_2 2
2075 #define ST_RD_PT_3 3
2076 #define ST_RD_PT_4 4
2077 #define ST_RD_LITERAL_1 5
2078 #define ST_RD_LITERAL_2 6
2079 #define ST_RD_LITERAL_3 7
2080 #define ST_RD_POS_DATA_1 8
2081 #define ST_GET_LITERAL 9
2082 #define ST_GET_POS_1 10
2083 #define ST_GET_POS_2 11
2084 #define ST_COPY_DATA 12
2087 lzh_decode(struct lzh_stream *strm, int last)
2089 struct lzh_dec *ds = strm->ds;
2096 avail_in = strm->avail_in;
2098 if (ds->state < ST_GET_LITERAL)
2099 r = lzh_read_blocks(strm, last);
2101 r = lzh_decode_blocks(strm, last);
2103 strm->total_in += avail_in - strm->avail_in;
2108 lzh_emit_window(struct lzh_stream *strm, size_t s)
2110 strm->ref_ptr = strm->ds->w_buff;
2111 strm->avail_out = (int)s;
2112 strm->total_out += s;
2116 lzh_read_blocks(struct lzh_stream *strm, int last)
2118 struct lzh_dec *ds = strm->ds;
2119 struct lzh_br *br = &(ds->br);
2124 switch (ds->state) {
2127 * Read a block number indicates how many blocks
2128 * we will handle. The block is composed of a
2129 * literal and a match, sometimes a literal only
2130 * in particular, there are no reference data at
2131 * the beginning of the decompression.
2133 if (!lzh_br_read_ahead_0(strm, br, 16)) {
2135 /* We need following data. */
2136 return (ARCHIVE_OK);
2137 if (lzh_br_has(br, 8)) {
2139 * It seems there are extra bits.
2140 * 1. Compressed data is broken.
2141 * 2. `last' flag does not properly
2146 if (ds->w_pos > 0) {
2147 lzh_emit_window(strm, ds->w_pos);
2149 return (ARCHIVE_OK);
2151 /* End of compressed data; we have completely
2152 * handled all compressed data. */
2153 return (ARCHIVE_EOF);
2155 ds->blocks_avail = lzh_br_bits(br, 16);
2156 if (ds->blocks_avail == 0)
2158 lzh_br_consume(br, 16);
2160 * Read a literal table compressed in huffman
2163 ds->pt.len_size = ds->literal_pt_len_size;
2164 ds->pt.len_bits = ds->literal_pt_len_bits;
2165 ds->reading_position = 0;
2168 /* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2169 * used in reading both a literal table and a
2170 * position table. */
2171 if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2173 goto failed;/* Truncated data. */
2174 ds->state = ST_RD_PT_1;
2175 return (ARCHIVE_OK);
2177 ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2178 lzh_br_consume(br, ds->pt.len_bits);
2181 if (ds->pt.len_avail == 0) {
2182 /* There is no bitlen. */
2183 if (!lzh_br_read_ahead(strm, br,
2186 goto failed;/* Truncated data.*/
2187 ds->state = ST_RD_PT_2;
2188 return (ARCHIVE_OK);
2190 if (!lzh_make_fake_table(&(ds->pt),
2191 lzh_br_bits(br, ds->pt.len_bits)))
2192 goto failed;/* Invalid data. */
2193 lzh_br_consume(br, ds->pt.len_bits);
2194 if (ds->reading_position)
2195 ds->state = ST_GET_LITERAL;
2197 ds->state = ST_RD_LITERAL_1;
2199 } else if (ds->pt.len_avail > ds->pt.len_size)
2200 goto failed;/* Invalid data. */
2202 memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2203 if (ds->pt.len_avail < 3 ||
2204 ds->pt.len_size == ds->pos_pt_len_size) {
2205 ds->state = ST_RD_PT_4;
2210 ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2212 if (ds->loop < 0 || last)
2213 goto failed;/* Invalid data. */
2214 /* Not completed, get following data. */
2215 ds->state = ST_RD_PT_3;
2216 return (ARCHIVE_OK);
2218 /* There are some null in bitlen of the literal. */
2219 if (!lzh_br_read_ahead(strm, br, 2)) {
2221 goto failed;/* Truncated data. */
2222 ds->state = ST_RD_PT_3;
2223 return (ARCHIVE_OK);
2225 c = lzh_br_bits(br, 2);
2226 lzh_br_consume(br, 2);
2227 if (c > ds->pt.len_avail - 3)
2228 goto failed;/* Invalid data. */
2229 for (i = 3; c-- > 0 ;)
2230 ds->pt.bitlen[i++] = 0;
2234 ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2236 if (ds->loop < ds->pt.len_avail) {
2237 if (ds->loop < 0 || last)
2238 goto failed;/* Invalid data. */
2239 /* Not completed, get following data. */
2240 ds->state = ST_RD_PT_4;
2241 return (ARCHIVE_OK);
2243 if (!lzh_make_huffman_table(&(ds->pt)))
2244 goto failed;/* Invalid data */
2245 if (ds->reading_position) {
2246 ds->state = ST_GET_LITERAL;
2250 case ST_RD_LITERAL_1:
2251 if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2253 goto failed;/* Truncated data. */
2254 ds->state = ST_RD_LITERAL_1;
2255 return (ARCHIVE_OK);
2257 ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2258 lzh_br_consume(br, ds->lt.len_bits);
2260 case ST_RD_LITERAL_2:
2261 if (ds->lt.len_avail == 0) {
2262 /* There is no bitlen. */
2263 if (!lzh_br_read_ahead(strm, br,
2266 goto failed;/* Truncated data.*/
2267 ds->state = ST_RD_LITERAL_2;
2268 return (ARCHIVE_OK);
2270 if (!lzh_make_fake_table(&(ds->lt),
2271 lzh_br_bits(br, ds->lt.len_bits)))
2272 goto failed;/* Invalid data */
2273 lzh_br_consume(br, ds->lt.len_bits);
2274 ds->state = ST_RD_POS_DATA_1;
2276 } else if (ds->lt.len_avail > ds->lt.len_size)
2277 goto failed;/* Invalid data */
2279 memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2281 case ST_RD_LITERAL_3:
2283 while (i < ds->lt.len_avail) {
2284 if (!lzh_br_read_ahead(strm, br,
2287 goto failed;/* Truncated data.*/
2289 ds->state = ST_RD_LITERAL_3;
2290 return (ARCHIVE_OK);
2292 rbits = lzh_br_bits(br, ds->pt.max_bits);
2293 c = lzh_decode_huffman(&(ds->pt), rbits);
2295 /* Note: 'c' will never be more than
2296 * eighteen since it's limited by
2297 * PT_BITLEN_SIZE, which is being set
2298 * to ds->pt.len_size through
2299 * ds->literal_pt_len_size. */
2300 lzh_br_consume(br, ds->pt.bitlen[c]);
2303 ds->lt.bitlen[i++] = c;
2304 } else if (c == 0) {
2305 lzh_br_consume(br, ds->pt.bitlen[c]);
2306 ds->lt.bitlen[i++] = 0;
2308 /* c == 1 or c == 2 */
2309 int n = (c == 1)?4:9;
2310 if (!lzh_br_read_ahead(strm, br,
2311 ds->pt.bitlen[c] + n)) {
2312 if (last) /* Truncated data. */
2315 ds->state = ST_RD_LITERAL_3;
2316 return (ARCHIVE_OK);
2318 lzh_br_consume(br, ds->pt.bitlen[c]);
2319 c = lzh_br_bits(br, n);
2320 lzh_br_consume(br, n);
2322 if (i + c > ds->lt.len_avail)
2323 goto failed;/* Invalid data */
2324 memset(&(ds->lt.bitlen[i]), 0, c);
2328 if (i > ds->lt.len_avail ||
2329 !lzh_make_huffman_table(&(ds->lt)))
2330 goto failed;/* Invalid data */
2332 case ST_RD_POS_DATA_1:
2334 * Read a position table compressed in huffman
2337 ds->pt.len_size = ds->pos_pt_len_size;
2338 ds->pt.len_bits = ds->pos_pt_len_bits;
2339 ds->reading_position = 1;
2340 ds->state = ST_RD_PT_1;
2342 case ST_GET_LITERAL:
2347 return (ds->error = ARCHIVE_FAILED);
2351 lzh_decode_blocks(struct lzh_stream *strm, int last)
2353 struct lzh_dec *ds = strm->ds;
2354 struct lzh_br bre = ds->br;
2355 struct huffman *lt = &(ds->lt);
2356 struct huffman *pt = &(ds->pt);
2357 unsigned char *w_buff = ds->w_buff;
2358 unsigned char *lt_bitlen = lt->bitlen;
2359 unsigned char *pt_bitlen = pt->bitlen;
2360 int blocks_avail = ds->blocks_avail, c = 0;
2361 int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2362 int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2363 int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2364 int state = ds->state;
2368 case ST_GET_LITERAL:
2370 if (blocks_avail == 0) {
2371 /* We have decoded all blocks.
2372 * Let's handle next blocks. */
2373 ds->state = ST_RD_BLOCK;
2375 ds->blocks_avail = 0;
2381 /* lzh_br_read_ahead() always try to fill the
2382 * cache buffer up. In specific situation we
2383 * are close to the end of the data, the cache
2384 * buffer will not be full and thus we have to
2385 * determine if the cache buffer has some bits
2386 * as much as we need after lzh_br_read_ahead()
2388 if (!lzh_br_read_ahead(strm, &bre,
2392 /* Remaining bits are less than
2393 * maximum bits(lt.max_bits) but maybe
2394 * it still remains as much as we need,
2395 * so we should try to use it with
2397 c = lzh_decode_huffman(lt,
2398 lzh_br_bits_forced(&bre,
2400 lzh_br_consume(&bre, lt_bitlen[c]);
2401 if (!lzh_br_has(&bre, 0))
2402 goto failed;/* Over read. */
2404 c = lzh_decode_huffman(lt,
2405 lzh_br_bits(&bre, lt_max_bits));
2406 lzh_br_consume(&bre, lt_bitlen[c]);
2410 /* Current block is a match data. */
2413 * 'c' is exactly a literal code.
2415 /* Save a decoded code to reference it
2418 if (++w_pos >= w_size) {
2420 lzh_emit_window(strm, w_size);
2424 /* 'c' is the length of a match pattern we have
2425 * already extracted, which has be stored in
2426 * window(ds->w_buff). */
2427 copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2431 * Get a reference position.
2433 if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2435 state = ST_GET_POS_1;
2436 ds->copy_len = copy_len;
2439 copy_pos = lzh_decode_huffman(pt,
2440 lzh_br_bits_forced(&bre, pt_max_bits));
2441 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2442 if (!lzh_br_has(&bre, 0))
2443 goto failed;/* Over read. */
2445 copy_pos = lzh_decode_huffman(pt,
2446 lzh_br_bits(&bre, pt_max_bits));
2447 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2452 /* We need an additional adjustment number to
2454 int p = copy_pos - 1;
2455 if (!lzh_br_read_ahead(strm, &bre, p)) {
2457 goto failed;/* Truncated data.*/
2458 state = ST_GET_POS_2;
2459 ds->copy_len = copy_len;
2460 ds->copy_pos = copy_pos;
2463 copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2464 lzh_br_consume(&bre, p);
2466 /* The position is actually a distance from the last
2467 * code we had extracted and thus we have to convert
2468 * it to a position of the window. */
2469 copy_pos = (w_pos - copy_pos - 1) & w_mask;
2473 * Copy `copy_len' bytes as extracted data from
2474 * the window into the output buffer.
2480 if (copy_pos > w_pos) {
2481 if (l > w_size - copy_pos)
2482 l = w_size - copy_pos;
2484 if (l > w_size - w_pos)
2487 if ((copy_pos + l < w_pos)
2488 || (w_pos + l < copy_pos)) {
2490 memcpy(w_buff + w_pos,
2491 w_buff + copy_pos, l);
2493 const unsigned char *s;
2498 s = w_buff + copy_pos;
2499 for (li = 0; li < l-1;) {
2507 if (w_pos == w_size) {
2509 lzh_emit_window(strm, w_size);
2511 state = ST_GET_LITERAL;
2513 state = ST_COPY_DATA;
2514 ds->copy_len = copy_len - l;
2516 (copy_pos + l) & w_mask;
2521 /* A copy of current pattern ended. */
2524 copy_pos = (copy_pos + l) & w_mask;
2526 state = ST_GET_LITERAL;
2531 return (ds->error = ARCHIVE_FAILED);
2534 ds->blocks_avail = blocks_avail;
2537 return (ARCHIVE_OK);
2541 lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2545 if (hf->bitlen == NULL) {
2546 hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2547 if (hf->bitlen == NULL)
2548 return (ARCHIVE_FATAL);
2550 if (hf->tbl == NULL) {
2551 if (tbl_bits < HTBL_BITS)
2555 hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2556 if (hf->tbl == NULL)
2557 return (ARCHIVE_FATAL);
2559 if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2560 hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2561 hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2562 if (hf->tree == NULL)
2563 return (ARCHIVE_FATAL);
2565 hf->len_size = (int)len_size;
2566 hf->tbl_bits = tbl_bits;
2567 return (ARCHIVE_OK);
2571 lzh_huffman_free(struct huffman *hf)
2578 static const char bitlen_tbl[0x400] = {
2579 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2580 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2581 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2582 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2583 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2584 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2585 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2586 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2587 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2588 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2589 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2590 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2591 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2592 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2593 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2594 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2595 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2596 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2597 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2598 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2599 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2600 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2601 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2602 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2603 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2604 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2605 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2606 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2607 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2608 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2609 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2610 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2611 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2612 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2613 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2614 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2615 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2616 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2617 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2618 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2619 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2620 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2621 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2622 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2623 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2624 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2625 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2626 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2627 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2628 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2629 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2630 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2631 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2632 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2633 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2634 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2635 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2636 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2637 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2638 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2639 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2640 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2641 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2642 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16, 0
2645 lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2647 struct lzh_dec *ds = strm->ds;
2648 struct lzh_br *br = &(ds->br);
2651 for (i = start; i < end; ) {
2653 * bit pattern the number we need
2662 * 1111111111110 -> 16
2664 if (!lzh_br_read_ahead(strm, br, 3))
2666 if ((c = lzh_br_bits(br, 3)) == 7) {
2667 if (!lzh_br_read_ahead(strm, br, 13))
2669 c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2671 lzh_br_consume(br, c - 3);
2673 return (-1);/* Invalid data. */
2675 lzh_br_consume(br, 3);
2676 ds->pt.bitlen[i++] = c;
2683 lzh_make_fake_table(struct huffman *hf, uint16_t c)
2685 if (c >= hf->len_size)
2690 hf->bitlen[hf->tbl[0]] = 0;
2695 * Make a huffman coding table.
2698 lzh_make_huffman_table(struct huffman *hf)
2701 const unsigned char *bitlen;
2702 int bitptn[17], weight[17];
2703 int i, maxbits = 0, ptn, tbl_size, w;
2704 int diffbits, len_avail;
2707 * Initialize bit patterns.
2710 for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2714 ptn += hf->freq[i] * w;
2718 if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2719 return (0);/* Invalid */
2721 hf->max_bits = maxbits;
2724 * Cut out extra bits which we won't house in the table.
2725 * This preparation reduces the same calculation in the for-loop
2729 int ebits = 16 - maxbits;
2730 for (i = 1; i <= maxbits; i++) {
2731 bitptn[i] >>= ebits;
2732 weight[i] >>= ebits;
2735 if (maxbits > HTBL_BITS) {
2739 diffbits = maxbits - HTBL_BITS;
2740 for (i = 1; i <= HTBL_BITS; i++) {
2741 bitptn[i] >>= diffbits;
2742 weight[i] >>= diffbits;
2744 htbl_max = bitptn[HTBL_BITS] +
2745 weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2746 p = &(hf->tbl[htbl_max]);
2747 while (p < &hf->tbl[1U<<HTBL_BITS])
2751 hf->shift_bits = diffbits;
2756 tbl_size = 1 << HTBL_BITS;
2758 bitlen = hf->bitlen;
2759 len_avail = hf->len_avail;
2761 for (i = 0; i < len_avail; i++) {
2770 /* Get a bit pattern */
2774 if (len <= HTBL_BITS) {
2775 /* Calculate next bit pattern */
2776 if ((bitptn[len] = ptn + cnt) > tbl_size)
2777 return (0);/* Invalid */
2778 /* Update the table */
2785 pc[0] = (uint16_t)i;
2786 pc[1] = (uint16_t)i;
2787 pc[2] = (uint16_t)i;
2788 pc[3] = (uint16_t)i;
2789 pc[4] = (uint16_t)i;
2790 pc[5] = (uint16_t)i;
2791 pc[6] = (uint16_t)i;
2792 pc[7] = (uint16_t)i;
2796 8 * sizeof(uint16_t));
2801 16 * sizeof(uint16_t));
2805 memcpy(p, pc, cnt * sizeof(uint16_t));
2808 p[--cnt] = (uint16_t)i;
2809 p[--cnt] = (uint16_t)i;
2812 p[--cnt] = (uint16_t)i;
2818 * A bit length is too big to be housed to a direct table,
2819 * so we use a tree model for its extra bits.
2821 bitptn[len] = ptn + cnt;
2822 bit = 1U << (diffbits -1);
2823 extlen = len - HTBL_BITS;
2825 p = &(tbl[ptn >> diffbits]);
2827 *p = len_avail + hf->tree_used;
2828 ht = &(hf->tree[hf->tree_used++]);
2829 if (hf->tree_used > hf->tree_avail)
2830 return (0);/* Invalid */
2834 if (*p < len_avail ||
2835 *p >= (len_avail + hf->tree_used))
2836 return (0);/* Invalid */
2837 ht = &(hf->tree[*p - len_avail]);
2839 while (--extlen > 0) {
2841 if (ht->left < len_avail) {
2842 ht->left = len_avail + hf->tree_used;
2843 ht = &(hf->tree[hf->tree_used++]);
2844 if (hf->tree_used > hf->tree_avail)
2845 return (0);/* Invalid */
2849 ht = &(hf->tree[ht->left - len_avail]);
2852 if (ht->right < len_avail) {
2853 ht->right = len_avail + hf->tree_used;
2854 ht = &(hf->tree[hf->tree_used++]);
2855 if (hf->tree_used > hf->tree_avail)
2856 return (0);/* Invalid */
2860 ht = &(hf->tree[ht->right - len_avail]);
2867 return (0);/* Invalid */
2868 ht->left = (uint16_t)i;
2871 return (0);/* Invalid */
2872 ht->right = (uint16_t)i;
2879 lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2885 extlen = hf->shift_bits;
2886 while (c >= hf->len_avail) {
2888 if (extlen-- <= 0 || c >= hf->tree_used)
2890 if (rbits & (1U << extlen))
2899 lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2903 * At first search an index table for a bit pattern.
2904 * If it fails, search a huffman tree for.
2906 c = hf->tbl[rbits >> hf->shift_bits];
2907 if (c < hf->len_avail || hf->len_avail == 0)
2909 /* This bit pattern needs to be found out at a huffman tree. */
2910 return (lzh_decode_huffman_tree(hf, rbits, c));