2 * Copyright (c) 2014 Michihiro NAKAJIMA
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
28 __FBSDID("$FreeBSD$");
48 #include "archive_endian.h"
49 #include "archive_private.h"
50 #include "archive_read_private.h"
51 #include "archive_xxhash.h"
53 #define LZ4_MAGICNUMBER 0x184d2204
54 #define LZ4_SKIPPABLED 0x184d2a50
55 #define LZ4_LEGACY 0x184c2102
57 #if defined(HAVE_LIBLZ4)
66 unsigned block_independence:1;
67 unsigned block_checksum:3;
68 unsigned stream_size:1;
69 unsigned stream_checksum:1;
70 unsigned preset_dictionary:1;
71 int block_maximum_size;
76 size_t out_block_size;
78 /* Bytes read but not yet consumed via __archive_read_consume() */
83 char valid; /* True = decompressor is initialized */
84 char eof; /* True = found end of compressed data. */
87 #define LEGACY_BLOCK_SIZE (8 * 1024 * 1024)
90 static ssize_t lz4_filter_read(struct archive_read_filter *, const void **);
91 static int lz4_filter_close(struct archive_read_filter *);
95 * Note that we can detect lz4 archives even if we can't decompress
96 * them. (In fact, we like detecting them because we can give better
97 * error messages.) So the bid framework here gets compiled even
98 * if liblz4 is unavailable.
100 static int lz4_reader_bid(struct archive_read_filter_bidder *, struct archive_read_filter *);
101 static int lz4_reader_init(struct archive_read_filter *);
102 static int lz4_reader_free(struct archive_read_filter_bidder *);
103 #if defined(HAVE_LIBLZ4)
104 static ssize_t lz4_filter_read_default_stream(struct archive_read_filter *,
106 static ssize_t lz4_filter_read_legacy_stream(struct archive_read_filter *,
111 archive_read_support_filter_lz4(struct archive *_a)
113 struct archive_read *a = (struct archive_read *)_a;
114 struct archive_read_filter_bidder *reader;
116 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
117 ARCHIVE_STATE_NEW, "archive_read_support_filter_lz4");
119 if (__archive_read_get_bidder(a, &reader) != ARCHIVE_OK)
120 return (ARCHIVE_FATAL);
123 reader->name = "lz4";
124 reader->bid = lz4_reader_bid;
125 reader->init = lz4_reader_init;
126 reader->options = NULL;
127 reader->free = lz4_reader_free;
128 #if defined(HAVE_LIBLZ4)
131 archive_set_error(_a, ARCHIVE_ERRNO_MISC,
132 "Using external lz4 program");
133 return (ARCHIVE_WARN);
138 lz4_reader_free(struct archive_read_filter_bidder *self){
139 (void)self; /* UNUSED */
144 * Test whether we can handle this data.
146 * This logic returns zero if any part of the signature fails. It
147 * also tries to Do The Right Thing if a very short buffer prevents us
148 * from verifying as much as we would like.
151 lz4_reader_bid(struct archive_read_filter_bidder *self,
152 struct archive_read_filter *filter)
154 const unsigned char *buffer;
159 (void)self; /* UNUSED */
161 /* Minimal lz4 archive is 11 bytes. */
162 buffer = __archive_read_filter_ahead(filter, 11, &avail);
166 /* First four bytes must be LZ4 magic numbers. */
168 if ((number = archive_le32dec(buffer)) == LZ4_MAGICNUMBER) {
169 unsigned char flag, BD;
172 /* Next follows a stream descriptor. */
173 /* Descriptor Flags. */
175 /* A version number must be "01". */
176 if (((flag & 0xc0) >> 6) != 1)
178 /* A reserved bit must be "0". */
183 /* A block maximum size should be more than 3. */
184 if (((BD & 0x70) >> 4) < 4)
186 /* Reserved bits must be "0". */
190 } else if (number == LZ4_LEGACY) {
194 return (bits_checked);
197 #if !defined(HAVE_LIBLZ4)
200 * If we don't have the library on this system, we can't actually do the
201 * decompression. We can, however, still detect compressed archives
202 * and emit a useful message.
205 lz4_reader_init(struct archive_read_filter *self)
209 r = __archive_read_program(self, "lz4 -d -q");
210 /* Note: We set the format here even if __archive_read_program()
211 * above fails. We do, after all, know what the format is
212 * even if we weren't able to read it. */
213 self->code = ARCHIVE_FILTER_LZ4;
222 * Setup the callbacks.
225 lz4_reader_init(struct archive_read_filter *self)
227 struct private_data *state;
229 self->code = ARCHIVE_FILTER_LZ4;
232 state = (struct private_data *)calloc(sizeof(*state), 1);
234 archive_set_error(&self->archive->archive, ENOMEM,
235 "Can't allocate data for lz4 decompression");
236 return (ARCHIVE_FATAL);
240 state->stage = SELECT_STREAM;
241 self->read = lz4_filter_read;
242 self->skip = NULL; /* not supported */
243 self->close = lz4_filter_close;
249 lz4_allocate_out_block(struct archive_read_filter *self)
251 struct private_data *state = (struct private_data *)self->data;
252 size_t out_block_size = state->flags.block_maximum_size;
255 if (!state->flags.block_independence)
256 out_block_size += 64 * 1024;
257 if (state->out_block_size < out_block_size) {
258 free(state->out_block);
259 out_block = (unsigned char *)malloc(out_block_size);
260 state->out_block_size = out_block_size;
261 if (out_block == NULL) {
262 archive_set_error(&self->archive->archive, ENOMEM,
263 "Can't allocate data for lz4 decompression");
264 return (ARCHIVE_FATAL);
266 state->out_block = out_block;
268 if (!state->flags.block_independence)
269 memset(state->out_block, 0, 64 * 1024);
274 lz4_allocate_out_block_for_legacy(struct archive_read_filter *self)
276 struct private_data *state = (struct private_data *)self->data;
277 size_t out_block_size = LEGACY_BLOCK_SIZE;
280 if (state->out_block_size < out_block_size) {
281 free(state->out_block);
282 out_block = (unsigned char *)malloc(out_block_size);
283 state->out_block_size = out_block_size;
284 if (out_block == NULL) {
285 archive_set_error(&self->archive->archive, ENOMEM,
286 "Can't allocate data for lz4 decompression");
287 return (ARCHIVE_FATAL);
289 state->out_block = out_block;
295 * Return the next block of decompressed data.
298 lz4_filter_read(struct archive_read_filter *self, const void **p)
300 struct private_data *state = (struct private_data *)self->data;
308 __archive_read_filter_consume(self->upstream, state->unconsumed);
309 state->unconsumed = 0;
311 switch (state->stage) {
314 case READ_DEFAULT_STREAM:
315 case READ_LEGACY_STREAM:
316 /* Reading a lz4 stream already failed. */
317 archive_set_error(&self->archive->archive,
318 ARCHIVE_ERRNO_MISC, "Invalid sequence.");
319 return (ARCHIVE_FATAL);
320 case READ_DEFAULT_BLOCK:
321 ret = lz4_filter_read_default_stream(self, p);
322 if (ret != 0 || state->stage != SELECT_STREAM)
325 case READ_LEGACY_BLOCK:
326 ret = lz4_filter_read_legacy_stream(self, p);
327 if (ret != 0 || state->stage != SELECT_STREAM)
331 archive_set_error(&self->archive->archive,
332 ARCHIVE_ERRNO_MISC, "Program error.");
333 return (ARCHIVE_FATAL);
337 while (state->stage == SELECT_STREAM) {
338 const char *read_buf;
340 /* Read a magic number. */
341 read_buf = __archive_read_filter_ahead(self->upstream, 4,
343 if (read_buf == NULL) {
348 uint32_t number = archive_le32dec(read_buf);
349 __archive_read_filter_consume(self->upstream, 4);
350 if (number == LZ4_MAGICNUMBER)
351 return lz4_filter_read_default_stream(self, p);
352 else if (number == LZ4_LEGACY)
353 return lz4_filter_read_legacy_stream(self, p);
354 else if ((number & ~0xF) == LZ4_SKIPPABLED) {
355 read_buf = __archive_read_filter_ahead(
356 self->upstream, 4, NULL);
357 if (read_buf == NULL) {
359 &self->archive->archive,
361 "Malformed lz4 data");
362 return (ARCHIVE_FATAL);
364 uint32_t skip_bytes = archive_le32dec(read_buf);
365 __archive_read_filter_consume(self->upstream,
368 /* Ignore following unrecognized data. */
380 lz4_filter_read_descriptor(struct archive_read_filter *self)
382 struct private_data *state = (struct private_data *)self->data;
383 const char *read_buf;
384 ssize_t bytes_remaining;
385 ssize_t descriptor_bytes;
386 unsigned char flag, bd;
387 unsigned int chsum, chsum_verifier;
389 /* Make sure we have 2 bytes for flags. */
390 read_buf = __archive_read_filter_ahead(self->upstream, 2,
392 if (read_buf == NULL) {
393 archive_set_error(&self->archive->archive,
395 "truncated lz4 input");
396 return (ARCHIVE_FATAL);
402 flag = (unsigned char)read_buf[0];
403 /* Verify version number. */
404 if ((flag & 0xc0) != 1<<6)
405 goto malformed_error;
406 /* A reserved bit must be zero. */
408 goto malformed_error;
409 state->flags.block_independence = (flag & 0x20) != 0;
410 state->flags.block_checksum = (flag & 0x10)?4:0;
411 state->flags.stream_size = (flag & 0x08) != 0;
412 state->flags.stream_checksum = (flag & 0x04) != 0;
413 state->flags.preset_dictionary = (flag & 0x01) != 0;
416 bd = (unsigned char)read_buf[1];
417 /* Reserved bits must be zero. */
419 goto malformed_error;
420 /* Get a maximum block size. */
421 switch (read_buf[1] >> 4) {
423 state->flags.block_maximum_size = 64 * 1024;
426 state->flags.block_maximum_size = 256 * 1024;
429 state->flags.block_maximum_size = 1024 * 1024;
432 state->flags.block_maximum_size = 4 * 1024 * 1024;
435 goto malformed_error;
438 /* Read the whole descriptor in a stream block. */
439 descriptor_bytes = 3;
440 if (state->flags.stream_size)
441 descriptor_bytes += 8;
442 if (state->flags.preset_dictionary)
443 descriptor_bytes += 4;
444 if (bytes_remaining < descriptor_bytes) {
445 read_buf = __archive_read_filter_ahead(self->upstream,
446 descriptor_bytes, &bytes_remaining);
447 if (read_buf == NULL) {
448 archive_set_error(&self->archive->archive,
450 "truncated lz4 input");
451 return (ARCHIVE_FATAL);
454 /* Check if a descriptor is corrupted */
455 chsum = __archive_xxhash.XXH32(read_buf, (int)descriptor_bytes -1, 0);
456 chsum = (chsum >> 8) & 0xff;
457 chsum_verifier = read_buf[descriptor_bytes-1] & 0xff;
458 if (chsum != chsum_verifier)
459 goto malformed_error;
461 __archive_read_filter_consume(self->upstream, descriptor_bytes);
463 /* Make sure we have a large enough buffer for uncompressed data. */
464 if (lz4_allocate_out_block(self) != ARCHIVE_OK)
465 return (ARCHIVE_FATAL);
466 if (state->flags.stream_checksum)
467 state->xxh32_state = __archive_xxhash.XXH32_init(0);
469 state->decoded_size = 0;
473 archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
474 "malformed lz4 data");
475 return (ARCHIVE_FATAL);
479 lz4_filter_read_data_block(struct archive_read_filter *self, const void **p)
481 struct private_data *state = (struct private_data *)self->data;
482 ssize_t compressed_size;
483 const char *read_buf;
484 ssize_t bytes_remaining;
486 ssize_t uncompressed_size;
491 /* Make sure we have 4 bytes for a block size. */
492 read_buf = __archive_read_filter_ahead(self->upstream, 4,
494 if (read_buf == NULL)
495 goto truncated_error;
496 compressed_size = archive_le32dec(read_buf);
497 if ((compressed_size & 0x7fffffff) > state->flags.block_maximum_size)
498 goto malformed_error;
499 /* A compressed size == 0 means the end of stream blocks. */
500 if (compressed_size == 0) {
501 __archive_read_filter_consume(self->upstream, 4);
505 checksum_size = state->flags.block_checksum;
506 /* Check if the block is uncompressed. */
507 if (compressed_size & 0x80000000U) {
508 compressed_size &= 0x7fffffff;
509 uncompressed_size = compressed_size;
511 uncompressed_size = 0;/* Unknown yet. */
514 Unfortunately, lz4 decompression API requires a whole block
515 for its decompression speed, so we read a whole block and allocate
516 a huge buffer used for decoded data.
518 read_buf = __archive_read_filter_ahead(self->upstream,
519 4 + compressed_size + checksum_size, &bytes_remaining);
520 if (read_buf == NULL)
521 goto truncated_error;
523 /* Optional processing, checking a block sum. */
525 unsigned int chsum = __archive_xxhash.XXH32(
526 read_buf + 4, (int)compressed_size, 0);
527 unsigned int chsum_block =
528 archive_le32dec(read_buf + 4 + compressed_size);
529 if (chsum != chsum_block)
530 goto malformed_error;
534 /* If the block is uncompressed, there is nothing to do. */
535 if (uncompressed_size) {
536 /* Prepare a prefix 64k block for next block. */
537 if (!state->flags.block_independence) {
538 prefix64k = 64 * 1024;
539 if (uncompressed_size < (ssize_t)prefix64k) {
540 memcpy(state->out_block
541 + prefix64k - uncompressed_size,
544 memset(state->out_block, 0,
545 prefix64k - uncompressed_size);
547 memcpy(state->out_block,
549 + uncompressed_size - prefix64k,
552 state->decoded_size = 0;
554 state->unconsumed = 4 + uncompressed_size + checksum_size;
556 return uncompressed_size;
560 Decompress a block data.
562 if (state->flags.block_independence) {
564 uncompressed_size = LZ4_decompress_safe(read_buf + 4,
565 state->out_block, (int)compressed_size,
566 state->flags.block_maximum_size);
568 prefix64k = 64 * 1024;
569 if (state->decoded_size) {
570 if (state->decoded_size < prefix64k) {
571 memmove(state->out_block
572 + prefix64k - state->decoded_size,
573 state->out_block + prefix64k,
574 state->decoded_size);
575 memset(state->out_block, 0,
576 prefix64k - state->decoded_size);
578 memmove(state->out_block,
579 state->out_block + state->decoded_size,
583 #if LZ4_VERSION_MAJOR >= 1 && LZ4_VERSION_MINOR >= 7
584 uncompressed_size = LZ4_decompress_safe_usingDict(
586 state->out_block + prefix64k, (int)compressed_size,
587 state->flags.block_maximum_size,
591 uncompressed_size = LZ4_decompress_safe_withPrefix64k(
593 state->out_block + prefix64k, (int)compressed_size,
594 state->flags.block_maximum_size);
598 /* Check if an error occurred in the decompression process. */
599 if (uncompressed_size < 0) {
600 archive_set_error(&(self->archive->archive),
601 ARCHIVE_ERRNO_MISC, "lz4 decompression failed");
602 return (ARCHIVE_FATAL);
605 state->unconsumed = 4 + compressed_size + checksum_size;
606 *p = state->out_block + prefix64k;
607 state->decoded_size = uncompressed_size;
608 return uncompressed_size;
611 archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
612 "malformed lz4 data");
613 return (ARCHIVE_FATAL);
615 archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
616 "truncated lz4 input");
617 return (ARCHIVE_FATAL);
621 lz4_filter_read_default_stream(struct archive_read_filter *self, const void **p)
623 struct private_data *state = (struct private_data *)self->data;
624 const char *read_buf;
625 ssize_t bytes_remaining;
628 if (state->stage == SELECT_STREAM) {
629 state->stage = READ_DEFAULT_STREAM;
630 /* First, read a descriptor. */
631 if((ret = lz4_filter_read_descriptor(self)) != ARCHIVE_OK)
633 state->stage = READ_DEFAULT_BLOCK;
635 /* Decompress a block. */
636 ret = lz4_filter_read_data_block(self, p);
638 /* If the end of block is detected, change the filter status
639 to read next stream. */
640 if (ret == 0 && *p == NULL)
641 state->stage = SELECT_STREAM;
643 /* Optional processing, checking a stream sum. */
644 if (state->flags.stream_checksum) {
645 if (state->stage == SELECT_STREAM) {
646 unsigned int checksum;
647 unsigned int checksum_stream;
648 read_buf = __archive_read_filter_ahead(self->upstream,
649 4, &bytes_remaining);
650 if (read_buf == NULL) {
651 archive_set_error(&self->archive->archive,
652 ARCHIVE_ERRNO_MISC, "truncated lz4 input");
653 return (ARCHIVE_FATAL);
655 checksum = archive_le32dec(read_buf);
656 __archive_read_filter_consume(self->upstream, 4);
657 checksum_stream = __archive_xxhash.XXH32_digest(
659 state->xxh32_state = NULL;
660 if (checksum != checksum_stream) {
661 archive_set_error(&self->archive->archive,
663 "lz4 stream checksum error");
664 return (ARCHIVE_FATAL);
667 __archive_xxhash.XXH32_update(state->xxh32_state,
674 lz4_filter_read_legacy_stream(struct archive_read_filter *self, const void **p)
676 struct private_data *state = (struct private_data *)self->data;
678 const char *read_buf;
682 ret = lz4_allocate_out_block_for_legacy(self);
683 if (ret != ARCHIVE_OK)
686 /* Make sure we have 4 bytes for a block size. */
687 read_buf = __archive_read_filter_ahead(self->upstream, 4, NULL);
688 if (read_buf == NULL) {
689 if (state->stage == SELECT_STREAM) {
690 state->stage = READ_LEGACY_STREAM;
691 archive_set_error(&self->archive->archive,
693 "truncated lz4 input");
694 return (ARCHIVE_FATAL);
696 state->stage = SELECT_STREAM;
699 state->stage = READ_LEGACY_BLOCK;
700 compressed = archive_le32dec(read_buf);
701 if (compressed > LZ4_COMPRESSBOUND(LEGACY_BLOCK_SIZE)) {
702 state->stage = SELECT_STREAM;
706 /* Make sure we have a whole block. */
707 read_buf = __archive_read_filter_ahead(self->upstream,
708 4 + compressed, NULL);
709 if (read_buf == NULL) {
710 archive_set_error(&(self->archive->archive),
711 ARCHIVE_ERRNO_MISC, "truncated lz4 input");
712 return (ARCHIVE_FATAL);
714 ret = LZ4_decompress_safe(read_buf + 4, state->out_block,
715 compressed, (int)state->out_block_size);
717 archive_set_error(&(self->archive->archive),
718 ARCHIVE_ERRNO_MISC, "lz4 decompression failed");
719 return (ARCHIVE_FATAL);
721 *p = state->out_block;
722 state->unconsumed = 4 + compressed;
727 * Clean up the decompressor.
730 lz4_filter_close(struct archive_read_filter *self)
732 struct private_data *state;
733 int ret = ARCHIVE_OK;
735 state = (struct private_data *)self->data;
736 free(state->xxh32_state);
737 free(state->out_block);
742 #endif /* HAVE_LIBLZ4 */