2 * Copyright (c) 2009-2011 Michihiro NAKAJIMA
3 * Copyright (c) 2003-2008 Tim Kientzle and Miklos Vajna
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "archive_platform.h"
29 __FBSDID("$FreeBSD$");
49 #include "archive_endian.h"
50 #include "archive_private.h"
51 #include "archive_read_private.h"
53 #if HAVE_LZMA_H && HAVE_LIBLZMA
57 unsigned char *out_block;
58 size_t out_block_size;
60 char eof; /* True = found end of compressed data. */
63 /* Following variables are used for lzip only. */
70 #if LZMA_VERSION_MAJOR >= 5
71 /* Effectively disable the limiter. */
72 #define LZMA_MEMLIMIT UINT64_MAX
74 /* NOTE: This needs to check memory size which running system has. */
75 #define LZMA_MEMLIMIT (1U << 30)
78 /* Combined lzip/lzma/xz filter */
79 static ssize_t xz_filter_read(struct archive_read_filter *, const void **);
80 static int xz_filter_close(struct archive_read_filter *);
81 static int xz_lzma_bidder_init(struct archive_read_filter *);
86 * Note that we can detect xz and lzma compressed files even if we
87 * can't decompress them. (In fact, we like detecting them because we
88 * can give better error messages.) So the bid framework here gets
89 * compiled even if no lzma library is available.
91 static int xz_bidder_bid(struct archive_read_filter_bidder *,
92 struct archive_read_filter *);
93 static int xz_bidder_init(struct archive_read_filter *);
94 static int lzma_bidder_bid(struct archive_read_filter_bidder *,
95 struct archive_read_filter *);
96 static int lzma_bidder_init(struct archive_read_filter *);
97 static int lzip_has_member(struct archive_read_filter *);
98 static int lzip_bidder_bid(struct archive_read_filter_bidder *,
99 struct archive_read_filter *);
100 static int lzip_bidder_init(struct archive_read_filter *);
102 #if ARCHIVE_VERSION_NUMBER < 4000000
103 /* Deprecated; remove in libarchive 4.0 */
105 archive_read_support_compression_xz(struct archive *a)
107 return archive_read_support_filter_xz(a);
112 archive_read_support_filter_xz(struct archive *_a)
114 struct archive_read *a = (struct archive_read *)_a;
115 struct archive_read_filter_bidder *bidder;
117 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
118 ARCHIVE_STATE_NEW, "archive_read_support_filter_xz");
120 if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK)
121 return (ARCHIVE_FATAL);
125 bidder->bid = xz_bidder_bid;
126 bidder->init = xz_bidder_init;
127 bidder->options = NULL;
129 #if HAVE_LZMA_H && HAVE_LIBLZMA
132 archive_set_error(_a, ARCHIVE_ERRNO_MISC,
133 "Using external xz program for xz decompression");
134 return (ARCHIVE_WARN);
138 #if ARCHIVE_VERSION_NUMBER < 4000000
140 archive_read_support_compression_lzma(struct archive *a)
142 return archive_read_support_filter_lzma(a);
147 archive_read_support_filter_lzma(struct archive *_a)
149 struct archive_read *a = (struct archive_read *)_a;
150 struct archive_read_filter_bidder *bidder;
152 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
153 ARCHIVE_STATE_NEW, "archive_read_support_filter_lzma");
155 if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK)
156 return (ARCHIVE_FATAL);
159 bidder->name = "lzma";
160 bidder->bid = lzma_bidder_bid;
161 bidder->init = lzma_bidder_init;
162 bidder->options = NULL;
164 #if HAVE_LZMA_H && HAVE_LIBLZMA
167 archive_set_error(_a, ARCHIVE_ERRNO_MISC,
168 "Using external lzma program for lzma decompression");
169 return (ARCHIVE_WARN);
174 #if ARCHIVE_VERSION_NUMBER < 4000000
176 archive_read_support_compression_lzip(struct archive *a)
178 return archive_read_support_filter_lzip(a);
183 archive_read_support_filter_lzip(struct archive *_a)
185 struct archive_read *a = (struct archive_read *)_a;
186 struct archive_read_filter_bidder *bidder;
188 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
189 ARCHIVE_STATE_NEW, "archive_read_support_filter_lzip");
191 if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK)
192 return (ARCHIVE_FATAL);
195 bidder->name = "lzip";
196 bidder->bid = lzip_bidder_bid;
197 bidder->init = lzip_bidder_init;
198 bidder->options = NULL;
200 #if HAVE_LZMA_H && HAVE_LIBLZMA
203 archive_set_error(_a, ARCHIVE_ERRNO_MISC,
204 "Using external lzip program for lzip decompression");
205 return (ARCHIVE_WARN);
210 * Test whether we can handle this data.
213 xz_bidder_bid(struct archive_read_filter_bidder *self,
214 struct archive_read_filter *filter)
216 const unsigned char *buffer;
219 (void)self; /* UNUSED */
221 buffer = __archive_read_filter_ahead(filter, 6, &avail);
226 * Verify Header Magic Bytes : FD 37 7A 58 5A 00
228 if (memcmp(buffer, "\xFD\x37\x7A\x58\x5A\x00", 6) != 0)
235 * Test whether we can handle this data.
237 * <sigh> LZMA has a rather poor file signature. Zeros do not
238 * make good signature bytes as a rule, and the only non-zero byte
239 * here is an ASCII character. For example, an uncompressed tar
240 * archive whose first file is ']' would satisfy this check. It may
241 * be necessary to exclude LZMA from compression_all() because of
242 * this. Clients of libarchive would then have to explicitly enable
243 * LZMA checking instead of (or in addition to) compression_all() when
244 * they have other evidence (file name, command-line option) to go on.
247 lzma_bidder_bid(struct archive_read_filter_bidder *self,
248 struct archive_read_filter *filter)
250 const unsigned char *buffer;
253 uint64_t uncompressed_size;
256 (void)self; /* UNUSED */
258 buffer = __archive_read_filter_ahead(filter, 14, &avail);
262 /* First byte of raw LZMA stream is commonly 0x5d.
263 * The first byte is a special number, which consists of
264 * three parameters of LZMA compression, a number of literal
265 * context bits(which is from 0 to 8, default is 3), a number
266 * of literal pos bits(which is from 0 to 4, default is 0),
267 * a number of pos bits(which is from 0 to 4, default is 2).
268 * The first byte is made by
269 * (pos bits * 5 + literal pos bit) * 9 + * literal contest bit,
270 * and so the default value in this field is
271 * (2 * 5 + 0) * 9 + 3 = 0x5d.
272 * lzma of LZMA SDK has options to change those parameters.
273 * It means a range of this field is from 0 to 224. And lzma of
274 * XZ Utils with option -e records 0x5e in this field. */
275 /* NOTE: If this checking of the first byte increases false
276 * recognition, we should allow only 0x5d and 0x5e for the first
277 * byte of LZMA stream. */
279 if (buffer[0] > (4 * 5 + 4) * 9 + 8)
281 /* Most likely value in the first byte of LZMA stream. */
282 if (buffer[0] == 0x5d || buffer[0] == 0x5e)
285 /* Sixth through fourteenth bytes are uncompressed size,
286 * stored in little-endian order. `-1' means uncompressed
287 * size is unknown and lzma of XZ Utils always records `-1'
289 uncompressed_size = archive_le64dec(buffer+5);
290 if (uncompressed_size == (uint64_t)ARCHIVE_LITERAL_LL(-1))
293 /* Second through fifth bytes are dictionary size, stored in
294 * little-endian order. The minimum dictionary size is
295 * 1 << 12(4KiB) which the lzma of LZMA SDK uses with option
296 * -d12 and the maximum dictionary size is 1 << 27(128MiB)
297 * which the one uses with option -d27.
298 * NOTE: A comment of LZMA SDK source code says this dictionary
299 * range is from 1 << 12 to 1 << 30. */
300 dicsize = archive_le32dec(buffer+1);
302 case 0x00001000:/* lzma of LZMA SDK option -d12. */
303 case 0x00002000:/* lzma of LZMA SDK option -d13. */
304 case 0x00004000:/* lzma of LZMA SDK option -d14. */
305 case 0x00008000:/* lzma of LZMA SDK option -d15. */
306 case 0x00010000:/* lzma of XZ Utils option -0 and -1.
307 * lzma of LZMA SDK option -d16. */
308 case 0x00020000:/* lzma of LZMA SDK option -d17. */
309 case 0x00040000:/* lzma of LZMA SDK option -d18. */
310 case 0x00080000:/* lzma of XZ Utils option -2.
311 * lzma of LZMA SDK option -d19. */
312 case 0x00100000:/* lzma of XZ Utils option -3.
313 * lzma of LZMA SDK option -d20. */
314 case 0x00200000:/* lzma of XZ Utils option -4.
315 * lzma of LZMA SDK option -d21. */
316 case 0x00400000:/* lzma of XZ Utils option -5.
317 * lzma of LZMA SDK option -d22. */
318 case 0x00800000:/* lzma of XZ Utils option -6.
319 * lzma of LZMA SDK option -d23. */
320 case 0x01000000:/* lzma of XZ Utils option -7.
321 * lzma of LZMA SDK option -d24. */
322 case 0x02000000:/* lzma of XZ Utils option -8.
323 * lzma of LZMA SDK option -d25. */
324 case 0x04000000:/* lzma of XZ Utils option -9.
325 * lzma of LZMA SDK option -d26. */
326 case 0x08000000:/* lzma of LZMA SDK option -d27. */
330 /* If a memory usage for encoding was not enough on
331 * the platform where LZMA stream was made, lzma of
332 * XZ Utils automatically decreased the dictionary
333 * size to enough memory for encoding by 1Mi bytes
335 if (dicsize <= 0x03F00000 && dicsize >= 0x00300000 &&
336 (dicsize & ((1 << 20)-1)) == 0 &&
337 bits_checked == 8 + 64) {
341 /* Otherwise dictionary size is unlikely. But it is
342 * possible that someone makes lzma stream with
343 * liblzma/LZMA SDK in one's dictionary size. */
347 /* TODO: The above test is still very weak. It would be
348 * good to do better. */
350 return (bits_checked);
354 lzip_has_member(struct archive_read_filter *filter)
356 const unsigned char *buffer;
361 buffer = __archive_read_filter_ahead(filter, 6, &avail);
366 * Verify Header Magic Bytes : 4C 5A 49 50 (`LZIP')
369 if (memcmp(buffer, "LZIP", 4) != 0)
373 /* A version number must be 0 or 1 */
374 if (buffer[4] != 0 && buffer[4] != 1)
378 /* Dictionary size. */
379 log2dic = buffer[5] & 0x1f;
380 if (log2dic < 12 || log2dic > 27)
384 return (bits_checked);
388 lzip_bidder_bid(struct archive_read_filter_bidder *self,
389 struct archive_read_filter *filter)
392 (void)self; /* UNUSED */
393 return (lzip_has_member(filter));
396 #if HAVE_LZMA_H && HAVE_LIBLZMA
399 * liblzma 4.999.7 and later support both lzma and xz streams.
402 xz_bidder_init(struct archive_read_filter *self)
404 self->code = ARCHIVE_FILTER_XZ;
406 return (xz_lzma_bidder_init(self));
410 lzma_bidder_init(struct archive_read_filter *self)
412 self->code = ARCHIVE_FILTER_LZMA;
414 return (xz_lzma_bidder_init(self));
418 lzip_bidder_init(struct archive_read_filter *self)
420 self->code = ARCHIVE_FILTER_LZIP;
422 return (xz_lzma_bidder_init(self));
426 * Set an error code and choose an error message
429 set_error(struct archive_read_filter *self, int ret)
433 case LZMA_STREAM_END: /* Found end of stream. */
434 case LZMA_OK: /* Decompressor made some progress. */
437 archive_set_error(&self->archive->archive, ENOMEM,
438 "Lzma library error: Cannot allocate memory");
440 case LZMA_MEMLIMIT_ERROR:
441 archive_set_error(&self->archive->archive, ENOMEM,
442 "Lzma library error: Out of memory");
444 case LZMA_FORMAT_ERROR:
445 archive_set_error(&self->archive->archive,
447 "Lzma library error: format not recognized");
449 case LZMA_OPTIONS_ERROR:
450 archive_set_error(&self->archive->archive,
452 "Lzma library error: Invalid options");
454 case LZMA_DATA_ERROR:
455 archive_set_error(&self->archive->archive,
457 "Lzma library error: Corrupted input data");
460 archive_set_error(&self->archive->archive,
462 "Lzma library error: No progress is possible");
465 /* Return an error. */
466 archive_set_error(&self->archive->archive,
468 "Lzma decompression failed: Unknown error");
474 * Setup the callbacks.
477 xz_lzma_bidder_init(struct archive_read_filter *self)
479 static const size_t out_block_size = 64 * 1024;
481 struct private_data *state;
484 state = (struct private_data *)calloc(sizeof(*state), 1);
485 out_block = (unsigned char *)malloc(out_block_size);
486 if (state == NULL || out_block == NULL) {
487 archive_set_error(&self->archive->archive, ENOMEM,
488 "Can't allocate data for xz decompression");
491 return (ARCHIVE_FATAL);
495 state->out_block_size = out_block_size;
496 state->out_block = out_block;
497 self->read = xz_filter_read;
498 self->skip = NULL; /* not supported */
499 self->close = xz_filter_close;
501 state->stream.avail_in = 0;
503 state->stream.next_out = state->out_block;
504 state->stream.avail_out = state->out_block_size;
507 if (self->code == ARCHIVE_FILTER_LZIP) {
509 * We have to read a lzip header and use it to initialize
510 * compression library, thus we cannot initialize the
511 * library for lzip here.
513 state->in_stream = 0;
516 state->in_stream = 1;
518 /* Initialize compression library. */
519 if (self->code == ARCHIVE_FILTER_XZ)
520 ret = lzma_stream_decoder(&(state->stream),
521 LZMA_MEMLIMIT,/* memlimit */
524 ret = lzma_alone_decoder(&(state->stream),
525 LZMA_MEMLIMIT);/* memlimit */
530 /* Library setup failed: Choose an error message and clean up. */
531 set_error(self, ret);
533 free(state->out_block);
536 return (ARCHIVE_FATAL);
540 lzip_init(struct archive_read_filter *self)
542 struct private_data *state;
543 const unsigned char *h;
544 lzma_filter filters[2];
545 unsigned char props[5];
550 state = (struct private_data *)self->data;
551 h = __archive_read_filter_ahead(self->upstream, 6, &avail_in);
553 return (ARCHIVE_FATAL);
555 /* Get a version number. */
556 state->lzip_ver = h[4];
559 * Setup lzma property.
563 /* Get dictionary size. */
564 log2dic = h[5] & 0x1f;
565 if (log2dic < 12 || log2dic > 27)
566 return (ARCHIVE_FATAL);
567 dicsize = 1U << log2dic;
569 dicsize -= (dicsize / 16) * (h[5] >> 5);
570 archive_le32enc(props+1, dicsize);
572 /* Consume lzip header. */
573 __archive_read_filter_consume(self->upstream, 6);
574 state->member_in = 6;
576 filters[0].id = LZMA_FILTER_LZMA1;
577 filters[0].options = NULL;
578 filters[1].id = LZMA_VLI_UNKNOWN;
579 filters[1].options = NULL;
581 ret = lzma_properties_decode(&filters[0], NULL, props, sizeof(props));
582 if (ret != LZMA_OK) {
583 set_error(self, ret);
584 return (ARCHIVE_FATAL);
586 ret = lzma_raw_decoder(&(state->stream), filters);
587 free(filters[0].options);
588 if (ret != LZMA_OK) {
589 set_error(self, ret);
590 return (ARCHIVE_FATAL);
596 lzip_tail(struct archive_read_filter *self)
598 struct private_data *state;
599 const unsigned char *f;
603 state = (struct private_data *)self->data;
604 if (state->lzip_ver == 0)
608 f = __archive_read_filter_ahead(self->upstream, tail, &avail_in);
609 if (f == NULL && avail_in < 0)
610 return (ARCHIVE_FATAL);
611 if (f == NULL || avail_in < tail) {
612 archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
613 "Lzip: Remaining data is less bytes");
614 return (ARCHIVE_FAILED);
617 /* Check the crc32 value of the uncompressed data of the current
619 if (state->crc32 != archive_le32dec(f)) {
620 archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
621 "Lzip: CRC32 error");
622 return (ARCHIVE_FAILED);
625 /* Check the uncompressed size of the current member */
626 if ((uint64_t)state->member_out != archive_le64dec(f + 4)) {
627 archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
628 "Lzip: Uncompressed size error");
629 return (ARCHIVE_FAILED);
632 /* Check the total size of the current member */
633 if (state->lzip_ver == 1 &&
634 (uint64_t)state->member_in + tail != archive_le64dec(f + 12)) {
635 archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
636 "Lzip: Member size error");
637 return (ARCHIVE_FAILED);
639 __archive_read_filter_consume(self->upstream, tail);
641 /* If current lzip data consists of multi member, try decompressing
643 if (lzip_has_member(self->upstream) != 0) {
644 state->in_stream = 0;
646 state->member_out = 0;
647 state->member_in = 0;
654 * Return the next block of decompressed data.
657 xz_filter_read(struct archive_read_filter *self, const void **p)
659 struct private_data *state;
664 state = (struct private_data *)self->data;
666 /* Empty our output buffer. */
667 state->stream.next_out = state->out_block;
668 state->stream.avail_out = state->out_block_size;
670 /* Try to fill the output buffer. */
671 while (state->stream.avail_out > 0 && !state->eof) {
672 if (!state->in_stream) {
674 * Initialize liblzma for lzip
676 ret = lzip_init(self);
677 if (ret != ARCHIVE_OK)
679 state->in_stream = 1;
681 state->stream.next_in =
682 __archive_read_filter_ahead(self->upstream, 1, &avail_in);
683 if (state->stream.next_in == NULL && avail_in < 0) {
684 archive_set_error(&self->archive->archive,
687 return (ARCHIVE_FATAL);
689 state->stream.avail_in = avail_in;
691 /* Decompress as much as we can in one pass. */
692 ret = lzma_code(&(state->stream),
693 (state->stream.avail_in == 0)? LZMA_FINISH: LZMA_RUN);
695 case LZMA_STREAM_END: /* Found end of stream. */
698 case LZMA_OK: /* Decompressor made some progress. */
699 __archive_read_filter_consume(self->upstream,
700 avail_in - state->stream.avail_in);
702 avail_in - state->stream.avail_in;
705 set_error(self, ret);
706 return (ARCHIVE_FATAL);
710 decompressed = state->stream.next_out - state->out_block;
711 state->total_out += decompressed;
712 state->member_out += decompressed;
713 if (decompressed == 0)
716 *p = state->out_block;
717 if (self->code == ARCHIVE_FILTER_LZIP) {
718 state->crc32 = lzma_crc32(state->out_block,
719 decompressed, state->crc32);
721 ret = lzip_tail(self);
722 if (ret != ARCHIVE_OK)
727 return (decompressed);
731 * Clean up the decompressor.
734 xz_filter_close(struct archive_read_filter *self)
736 struct private_data *state;
738 state = (struct private_data *)self->data;
739 lzma_end(&(state->stream));
740 free(state->out_block);
749 * If we have no suitable library on this system, we can't actually do
750 * the decompression. We can, however, still detect compressed
751 * archives and emit a useful message.
755 lzma_bidder_init(struct archive_read_filter *self)
759 r = __archive_read_program(self, "lzma -d -qq");
760 /* Note: We set the format here even if __archive_read_program()
761 * above fails. We do, after all, know what the format is
762 * even if we weren't able to read it. */
763 self->code = ARCHIVE_FILTER_LZMA;
769 xz_bidder_init(struct archive_read_filter *self)
773 r = __archive_read_program(self, "xz -d -qq");
774 /* Note: We set the format here even if __archive_read_program()
775 * above fails. We do, after all, know what the format is
776 * even if we weren't able to read it. */
777 self->code = ARCHIVE_FILTER_XZ;
783 lzip_bidder_init(struct archive_read_filter *self)
787 r = __archive_read_program(self, "lzip -d -q");
788 /* Note: We set the format here even if __archive_read_program()
789 * above fails. We do, after all, know what the format is
790 * even if we weren't able to read it. */
791 self->code = ARCHIVE_FILTER_LZIP;
796 #endif /* HAVE_LZMA_H */