2 * Copyright (c) 2009-2011 Michihiro NAKAJIMA
3 * Copyright (c) 2003-2008 Tim Kientzle and Miklos Vajna
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "archive_platform.h"
29 __FBSDID("$FreeBSD$");
49 #include "archive_endian.h"
50 #include "archive_private.h"
51 #include "archive_read_private.h"
53 #if HAVE_LZMA_H && HAVE_LIBLZMA
57 unsigned char *out_block;
58 size_t out_block_size;
60 char eof; /* True = found end of compressed data. */
63 /* Following variables are used for lzip only. */
70 #if LZMA_VERSION_MAJOR >= 5
71 /* Effectively disable the limiter. */
72 #define LZMA_MEMLIMIT UINT64_MAX
74 /* NOTE: This needs to check memory size which running system has. */
75 #define LZMA_MEMLIMIT (1U << 30)
78 /* Combined lzip/lzma/xz filter */
79 static ssize_t xz_filter_read(struct archive_read_filter *, const void **);
80 static int xz_filter_close(struct archive_read_filter *);
81 static int xz_lzma_bidder_init(struct archive_read_filter *);
86 * Note that we can detect xz and lzma compressed files even if we
87 * can't decompress them. (In fact, we like detecting them because we
88 * can give better error messages.) So the bid framework here gets
89 * compiled even if no lzma library is available.
91 static int xz_bidder_bid(struct archive_read_filter_bidder *,
92 struct archive_read_filter *);
93 static int xz_bidder_init(struct archive_read_filter *);
94 static int lzma_bidder_bid(struct archive_read_filter_bidder *,
95 struct archive_read_filter *);
96 static int lzma_bidder_init(struct archive_read_filter *);
97 static int lzip_has_member(struct archive_read_filter *);
98 static int lzip_bidder_bid(struct archive_read_filter_bidder *,
99 struct archive_read_filter *);
100 static int lzip_bidder_init(struct archive_read_filter *);
102 #if ARCHIVE_VERSION_NUMBER < 4000000
103 /* Deprecated; remove in libarchive 4.0 */
105 archive_read_support_compression_xz(struct archive *a)
107 return archive_read_support_filter_xz(a);
111 static const struct archive_read_filter_bidder_vtable
113 .bid = xz_bidder_bid,
114 .init = xz_bidder_init,
118 archive_read_support_filter_xz(struct archive *_a)
120 struct archive_read *a = (struct archive_read *)_a;
122 if (__archive_read_register_bidder(a, NULL, "xz",
123 &xz_bidder_vtable) != ARCHIVE_OK)
124 return (ARCHIVE_FATAL);
126 #if HAVE_LZMA_H && HAVE_LIBLZMA
129 archive_set_error(_a, ARCHIVE_ERRNO_MISC,
130 "Using external xz program for xz decompression");
131 return (ARCHIVE_WARN);
135 #if ARCHIVE_VERSION_NUMBER < 4000000
137 archive_read_support_compression_lzma(struct archive *a)
139 return archive_read_support_filter_lzma(a);
143 static const struct archive_read_filter_bidder_vtable
144 lzma_bidder_vtable = {
145 .bid = lzma_bidder_bid,
146 .init = lzma_bidder_init,
150 archive_read_support_filter_lzma(struct archive *_a)
152 struct archive_read *a = (struct archive_read *)_a;
154 if (__archive_read_register_bidder(a, NULL, "lzma",
155 &lzma_bidder_vtable) != ARCHIVE_OK)
156 return (ARCHIVE_FATAL);
158 #if HAVE_LZMA_H && HAVE_LIBLZMA
161 archive_set_error(_a, ARCHIVE_ERRNO_MISC,
162 "Using external lzma program for lzma decompression");
163 return (ARCHIVE_WARN);
168 #if ARCHIVE_VERSION_NUMBER < 4000000
170 archive_read_support_compression_lzip(struct archive *a)
172 return archive_read_support_filter_lzip(a);
176 static const struct archive_read_filter_bidder_vtable
177 lzip_bidder_vtable = {
178 .bid = lzip_bidder_bid,
179 .init = lzip_bidder_init,
183 archive_read_support_filter_lzip(struct archive *_a)
185 struct archive_read *a = (struct archive_read *)_a;
187 if (__archive_read_register_bidder(a, NULL, "lzip",
188 &lzip_bidder_vtable) != ARCHIVE_OK)
189 return (ARCHIVE_FATAL);
191 #if HAVE_LZMA_H && HAVE_LIBLZMA
194 archive_set_error(_a, ARCHIVE_ERRNO_MISC,
195 "Using external lzip program for lzip decompression");
196 return (ARCHIVE_WARN);
201 * Test whether we can handle this data.
204 xz_bidder_bid(struct archive_read_filter_bidder *self,
205 struct archive_read_filter *filter)
207 const unsigned char *buffer;
210 (void)self; /* UNUSED */
212 buffer = __archive_read_filter_ahead(filter, 6, &avail);
217 * Verify Header Magic Bytes : FD 37 7A 58 5A 00
219 if (memcmp(buffer, "\xFD\x37\x7A\x58\x5A\x00", 6) != 0)
226 * Test whether we can handle this data.
228 * <sigh> LZMA has a rather poor file signature. Zeros do not
229 * make good signature bytes as a rule, and the only non-zero byte
230 * here is an ASCII character. For example, an uncompressed tar
231 * archive whose first file is ']' would satisfy this check. It may
232 * be necessary to exclude LZMA from compression_all() because of
233 * this. Clients of libarchive would then have to explicitly enable
234 * LZMA checking instead of (or in addition to) compression_all() when
235 * they have other evidence (file name, command-line option) to go on.
238 lzma_bidder_bid(struct archive_read_filter_bidder *self,
239 struct archive_read_filter *filter)
241 const unsigned char *buffer;
244 uint64_t uncompressed_size;
247 (void)self; /* UNUSED */
249 buffer = __archive_read_filter_ahead(filter, 14, &avail);
253 /* First byte of raw LZMA stream is commonly 0x5d.
254 * The first byte is a special number, which consists of
255 * three parameters of LZMA compression, a number of literal
256 * context bits(which is from 0 to 8, default is 3), a number
257 * of literal pos bits(which is from 0 to 4, default is 0),
258 * a number of pos bits(which is from 0 to 4, default is 2).
259 * The first byte is made by
260 * (pos bits * 5 + literal pos bit) * 9 + * literal contest bit,
261 * and so the default value in this field is
262 * (2 * 5 + 0) * 9 + 3 = 0x5d.
263 * lzma of LZMA SDK has options to change those parameters.
264 * It means a range of this field is from 0 to 224. And lzma of
265 * XZ Utils with option -e records 0x5e in this field. */
266 /* NOTE: If this checking of the first byte increases false
267 * recognition, we should allow only 0x5d and 0x5e for the first
268 * byte of LZMA stream. */
270 if (buffer[0] > (4 * 5 + 4) * 9 + 8)
272 /* Most likely value in the first byte of LZMA stream. */
273 if (buffer[0] == 0x5d || buffer[0] == 0x5e)
276 /* Sixth through fourteenth bytes are uncompressed size,
277 * stored in little-endian order. `-1' means uncompressed
278 * size is unknown and lzma of XZ Utils always records `-1'
280 uncompressed_size = archive_le64dec(buffer+5);
281 if (uncompressed_size == (uint64_t)ARCHIVE_LITERAL_LL(-1))
284 /* Second through fifth bytes are dictionary size, stored in
285 * little-endian order. The minimum dictionary size is
286 * 1 << 12(4KiB) which the lzma of LZMA SDK uses with option
287 * -d12 and the maximum dictionary size is 1 << 29(512MiB)
288 * which the one uses with option -d29.
289 * NOTE: A comment of LZMA SDK source code says this dictionary
290 * range is from 1 << 12 to 1 << 30. */
291 dicsize = archive_le32dec(buffer+1);
293 case 0x00001000:/* lzma of LZMA SDK option -d12. */
294 case 0x00002000:/* lzma of LZMA SDK option -d13. */
295 case 0x00004000:/* lzma of LZMA SDK option -d14. */
296 case 0x00008000:/* lzma of LZMA SDK option -d15. */
297 case 0x00010000:/* lzma of XZ Utils option -0 and -1.
298 * lzma of LZMA SDK option -d16. */
299 case 0x00020000:/* lzma of LZMA SDK option -d17. */
300 case 0x00040000:/* lzma of LZMA SDK option -d18. */
301 case 0x00080000:/* lzma of XZ Utils option -2.
302 * lzma of LZMA SDK option -d19. */
303 case 0x00100000:/* lzma of XZ Utils option -3.
304 * lzma of LZMA SDK option -d20. */
305 case 0x00200000:/* lzma of XZ Utils option -4.
306 * lzma of LZMA SDK option -d21. */
307 case 0x00400000:/* lzma of XZ Utils option -5.
308 * lzma of LZMA SDK option -d22. */
309 case 0x00800000:/* lzma of XZ Utils option -6.
310 * lzma of LZMA SDK option -d23. */
311 case 0x01000000:/* lzma of XZ Utils option -7.
312 * lzma of LZMA SDK option -d24. */
313 case 0x02000000:/* lzma of XZ Utils option -8.
314 * lzma of LZMA SDK option -d25. */
315 case 0x04000000:/* lzma of XZ Utils option -9.
316 * lzma of LZMA SDK option -d26. */
317 case 0x08000000:/* lzma of LZMA SDK option -d27. */
321 /* If a memory usage for encoding was not enough on
322 * the platform where LZMA stream was made, lzma of
323 * XZ Utils automatically decreased the dictionary
324 * size to enough memory for encoding by 1Mi bytes
326 if (dicsize <= 0x03F00000 && dicsize >= 0x00300000 &&
327 (dicsize & ((1 << 20)-1)) == 0 &&
328 bits_checked == 8 + 64) {
332 /* Otherwise dictionary size is unlikely. But it is
333 * possible that someone makes lzma stream with
334 * liblzma/LZMA SDK in one's dictionary size. */
338 /* TODO: The above test is still very weak. It would be
339 * good to do better. */
341 return (bits_checked);
345 lzip_has_member(struct archive_read_filter *filter)
347 const unsigned char *buffer;
352 buffer = __archive_read_filter_ahead(filter, 6, &avail);
357 * Verify Header Magic Bytes : 4C 5A 49 50 (`LZIP')
360 if (memcmp(buffer, "LZIP", 4) != 0)
364 /* A version number must be 0 or 1 */
365 if (buffer[4] != 0 && buffer[4] != 1)
369 /* Dictionary size. */
370 log2dic = buffer[5] & 0x1f;
371 if (log2dic < 12 || log2dic > 29)
375 return (bits_checked);
379 lzip_bidder_bid(struct archive_read_filter_bidder *self,
380 struct archive_read_filter *filter)
383 (void)self; /* UNUSED */
384 return (lzip_has_member(filter));
387 #if HAVE_LZMA_H && HAVE_LIBLZMA
390 * liblzma 4.999.7 and later support both lzma and xz streams.
393 xz_bidder_init(struct archive_read_filter *self)
395 self->code = ARCHIVE_FILTER_XZ;
397 return (xz_lzma_bidder_init(self));
401 lzma_bidder_init(struct archive_read_filter *self)
403 self->code = ARCHIVE_FILTER_LZMA;
405 return (xz_lzma_bidder_init(self));
409 lzip_bidder_init(struct archive_read_filter *self)
411 self->code = ARCHIVE_FILTER_LZIP;
413 return (xz_lzma_bidder_init(self));
417 * Set an error code and choose an error message
420 set_error(struct archive_read_filter *self, int ret)
424 case LZMA_STREAM_END: /* Found end of stream. */
425 case LZMA_OK: /* Decompressor made some progress. */
428 archive_set_error(&self->archive->archive, ENOMEM,
429 "Lzma library error: Cannot allocate memory");
431 case LZMA_MEMLIMIT_ERROR:
432 archive_set_error(&self->archive->archive, ENOMEM,
433 "Lzma library error: Out of memory");
435 case LZMA_FORMAT_ERROR:
436 archive_set_error(&self->archive->archive,
438 "Lzma library error: format not recognized");
440 case LZMA_OPTIONS_ERROR:
441 archive_set_error(&self->archive->archive,
443 "Lzma library error: Invalid options");
445 case LZMA_DATA_ERROR:
446 archive_set_error(&self->archive->archive,
448 "Lzma library error: Corrupted input data");
451 archive_set_error(&self->archive->archive,
453 "Lzma library error: No progress is possible");
456 /* Return an error. */
457 archive_set_error(&self->archive->archive,
459 "Lzma decompression failed: Unknown error");
464 static const struct archive_read_filter_vtable
465 xz_lzma_reader_vtable = {
466 .read = xz_filter_read,
467 .close = xz_filter_close,
471 * Setup the callbacks.
474 xz_lzma_bidder_init(struct archive_read_filter *self)
476 static const size_t out_block_size = 64 * 1024;
478 struct private_data *state;
481 state = (struct private_data *)calloc(sizeof(*state), 1);
482 out_block = (unsigned char *)malloc(out_block_size);
483 if (state == NULL || out_block == NULL) {
484 archive_set_error(&self->archive->archive, ENOMEM,
485 "Can't allocate data for xz decompression");
488 return (ARCHIVE_FATAL);
492 state->out_block_size = out_block_size;
493 state->out_block = out_block;
494 self->vtable = &xz_lzma_reader_vtable;
496 state->stream.avail_in = 0;
498 state->stream.next_out = state->out_block;
499 state->stream.avail_out = state->out_block_size;
502 if (self->code == ARCHIVE_FILTER_LZIP) {
504 * We have to read a lzip header and use it to initialize
505 * compression library, thus we cannot initialize the
506 * library for lzip here.
508 state->in_stream = 0;
511 state->in_stream = 1;
513 /* Initialize compression library. */
514 if (self->code == ARCHIVE_FILTER_XZ)
515 ret = lzma_stream_decoder(&(state->stream),
516 LZMA_MEMLIMIT,/* memlimit */
519 ret = lzma_alone_decoder(&(state->stream),
520 LZMA_MEMLIMIT);/* memlimit */
525 /* Library setup failed: Choose an error message and clean up. */
526 set_error(self, ret);
528 free(state->out_block);
531 return (ARCHIVE_FATAL);
535 lzip_init(struct archive_read_filter *self)
537 struct private_data *state;
538 const unsigned char *h;
539 lzma_filter filters[2];
540 unsigned char props[5];
545 state = (struct private_data *)self->data;
546 h = __archive_read_filter_ahead(self->upstream, 6, &avail_in);
548 return (ARCHIVE_FATAL);
550 /* Get a version number. */
551 state->lzip_ver = h[4];
554 * Setup lzma property.
558 /* Get dictionary size. */
559 log2dic = h[5] & 0x1f;
560 if (log2dic < 12 || log2dic > 29)
561 return (ARCHIVE_FATAL);
562 dicsize = 1U << log2dic;
564 dicsize -= (dicsize / 16) * (h[5] >> 5);
565 archive_le32enc(props+1, dicsize);
567 /* Consume lzip header. */
568 __archive_read_filter_consume(self->upstream, 6);
569 state->member_in = 6;
571 filters[0].id = LZMA_FILTER_LZMA1;
572 filters[0].options = NULL;
573 filters[1].id = LZMA_VLI_UNKNOWN;
574 filters[1].options = NULL;
576 ret = lzma_properties_decode(&filters[0], NULL, props, sizeof(props));
577 if (ret != LZMA_OK) {
578 set_error(self, ret);
579 return (ARCHIVE_FATAL);
581 ret = lzma_raw_decoder(&(state->stream), filters);
582 free(filters[0].options);
583 if (ret != LZMA_OK) {
584 set_error(self, ret);
585 return (ARCHIVE_FATAL);
591 lzip_tail(struct archive_read_filter *self)
593 struct private_data *state;
594 const unsigned char *f;
598 state = (struct private_data *)self->data;
599 if (state->lzip_ver == 0)
603 f = __archive_read_filter_ahead(self->upstream, tail, &avail_in);
604 if (f == NULL && avail_in < 0)
605 return (ARCHIVE_FATAL);
606 if (f == NULL || avail_in < tail) {
607 archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
608 "Lzip: Remaining data is less bytes");
609 return (ARCHIVE_FAILED);
612 /* Check the crc32 value of the uncompressed data of the current
614 if (state->crc32 != archive_le32dec(f)) {
615 archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
616 "Lzip: CRC32 error");
617 return (ARCHIVE_FAILED);
620 /* Check the uncompressed size of the current member */
621 if ((uint64_t)state->member_out != archive_le64dec(f + 4)) {
622 archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
623 "Lzip: Uncompressed size error");
624 return (ARCHIVE_FAILED);
627 /* Check the total size of the current member */
628 if (state->lzip_ver == 1 &&
629 (uint64_t)state->member_in + tail != archive_le64dec(f + 12)) {
630 archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
631 "Lzip: Member size error");
632 return (ARCHIVE_FAILED);
634 __archive_read_filter_consume(self->upstream, tail);
636 /* If current lzip data consists of multi member, try decompressing
638 if (lzip_has_member(self->upstream) != 0) {
639 state->in_stream = 0;
641 state->member_out = 0;
642 state->member_in = 0;
649 * Return the next block of decompressed data.
652 xz_filter_read(struct archive_read_filter *self, const void **p)
654 struct private_data *state;
659 state = (struct private_data *)self->data;
661 /* Empty our output buffer. */
662 state->stream.next_out = state->out_block;
663 state->stream.avail_out = state->out_block_size;
665 /* Try to fill the output buffer. */
666 while (state->stream.avail_out > 0 && !state->eof) {
667 if (!state->in_stream) {
669 * Initialize liblzma for lzip
671 ret = lzip_init(self);
672 if (ret != ARCHIVE_OK)
674 state->in_stream = 1;
676 state->stream.next_in =
677 __archive_read_filter_ahead(self->upstream, 1, &avail_in);
678 if (state->stream.next_in == NULL && avail_in < 0) {
679 archive_set_error(&self->archive->archive,
682 return (ARCHIVE_FATAL);
684 state->stream.avail_in = avail_in;
686 /* Decompress as much as we can in one pass. */
687 ret = lzma_code(&(state->stream),
688 (state->stream.avail_in == 0)? LZMA_FINISH: LZMA_RUN);
690 case LZMA_STREAM_END: /* Found end of stream. */
693 case LZMA_OK: /* Decompressor made some progress. */
694 __archive_read_filter_consume(self->upstream,
695 avail_in - state->stream.avail_in);
697 avail_in - state->stream.avail_in;
700 set_error(self, ret);
701 return (ARCHIVE_FATAL);
705 decompressed = state->stream.next_out - state->out_block;
706 state->total_out += decompressed;
707 state->member_out += decompressed;
708 if (decompressed == 0)
711 *p = state->out_block;
712 if (self->code == ARCHIVE_FILTER_LZIP) {
713 state->crc32 = lzma_crc32(state->out_block,
714 decompressed, state->crc32);
716 ret = lzip_tail(self);
717 if (ret != ARCHIVE_OK)
722 return (decompressed);
726 * Clean up the decompressor.
729 xz_filter_close(struct archive_read_filter *self)
731 struct private_data *state;
733 state = (struct private_data *)self->data;
734 lzma_end(&(state->stream));
735 free(state->out_block);
744 * If we have no suitable library on this system, we can't actually do
745 * the decompression. We can, however, still detect compressed
746 * archives and emit a useful message.
750 lzma_bidder_init(struct archive_read_filter *self)
754 r = __archive_read_program(self, "lzma -d -qq");
755 /* Note: We set the format here even if __archive_read_program()
756 * above fails. We do, after all, know what the format is
757 * even if we weren't able to read it. */
758 self->code = ARCHIVE_FILTER_LZMA;
764 xz_bidder_init(struct archive_read_filter *self)
768 r = __archive_read_program(self, "xz -d -qq");
769 /* Note: We set the format here even if __archive_read_program()
770 * above fails. We do, after all, know what the format is
771 * even if we weren't able to read it. */
772 self->code = ARCHIVE_FILTER_XZ;
778 lzip_bidder_init(struct archive_read_filter *self)
782 r = __archive_read_program(self, "lzip -d -q");
783 /* Note: We set the format here even if __archive_read_program()
784 * above fails. We do, after all, know what the format is
785 * even if we weren't able to read it. */
786 self->code = ARCHIVE_FILTER_LZIP;
791 #endif /* HAVE_LZMA_H */