2 * Copyright (c) 2014 Michihiro NAKAJIMA
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
28 __FBSDID("$FreeBSD$");
48 #include "archive_endian.h"
49 #include "archive_private.h"
50 #include "archive_read_private.h"
51 #include "archive_xxhash.h"
53 #define LZ4_MAGICNUMBER 0x184d2204
54 #define LZ4_SKIPPABLED 0x184d2a50
55 #define LZ4_LEGACY 0x184c2102
57 #if defined(HAVE_LIBLZ4)
66 unsigned block_independence:1;
67 unsigned block_checksum:3;
68 unsigned stream_size:1;
69 unsigned stream_checksum:1;
70 unsigned preset_dictionary:1;
71 int block_maximum_size;
76 size_t out_block_size;
78 /* Bytes read but not yet consumed via __archive_read_consume() */
83 char valid; /* True = decompressor is initialized */
84 char eof; /* True = found end of compressed data. */
87 #define LEGACY_BLOCK_SIZE (8 * 1024 * 1024)
90 static ssize_t lz4_filter_read(struct archive_read_filter *, const void **);
91 static int lz4_filter_close(struct archive_read_filter *);
95 * Note that we can detect lz4 archives even if we can't decompress
96 * them. (In fact, we like detecting them because we can give better
97 * error messages.) So the bid framework here gets compiled even
98 * if liblz4 is unavailable.
100 static int lz4_reader_bid(struct archive_read_filter_bidder *, struct archive_read_filter *);
101 static int lz4_reader_init(struct archive_read_filter *);
102 #if defined(HAVE_LIBLZ4)
103 static ssize_t lz4_filter_read_default_stream(struct archive_read_filter *,
105 static ssize_t lz4_filter_read_legacy_stream(struct archive_read_filter *,
109 static const struct archive_read_filter_bidder_vtable
110 lz4_bidder_vtable = {
111 .bid = lz4_reader_bid,
112 .init = lz4_reader_init,
116 archive_read_support_filter_lz4(struct archive *_a)
118 struct archive_read *a = (struct archive_read *)_a;
120 if (__archive_read_register_bidder(a, NULL, "lz4",
121 &lz4_bidder_vtable) != ARCHIVE_OK)
122 return (ARCHIVE_FATAL);
124 #if defined(HAVE_LIBLZ4)
127 archive_set_error(_a, ARCHIVE_ERRNO_MISC,
128 "Using external lz4 program");
129 return (ARCHIVE_WARN);
134 * Test whether we can handle this data.
136 * This logic returns zero if any part of the signature fails. It
137 * also tries to Do The Right Thing if a very short buffer prevents us
138 * from verifying as much as we would like.
141 lz4_reader_bid(struct archive_read_filter_bidder *self,
142 struct archive_read_filter *filter)
144 const unsigned char *buffer;
149 (void)self; /* UNUSED */
151 /* Minimal lz4 archive is 11 bytes. */
152 buffer = __archive_read_filter_ahead(filter, 11, &avail);
156 /* First four bytes must be LZ4 magic numbers. */
158 if ((number = archive_le32dec(buffer)) == LZ4_MAGICNUMBER) {
159 unsigned char flag, BD;
162 /* Next follows a stream descriptor. */
163 /* Descriptor Flags. */
165 /* A version number must be "01". */
166 if (((flag & 0xc0) >> 6) != 1)
168 /* A reserved bit must be "0". */
173 /* A block maximum size should be more than 3. */
174 if (((BD & 0x70) >> 4) < 4)
176 /* Reserved bits must be "0". */
180 } else if (number == LZ4_LEGACY) {
184 return (bits_checked);
187 #if !defined(HAVE_LIBLZ4)
190 * If we don't have the library on this system, we can't actually do the
191 * decompression. We can, however, still detect compressed archives
192 * and emit a useful message.
195 lz4_reader_init(struct archive_read_filter *self)
199 r = __archive_read_program(self, "lz4 -d -q");
200 /* Note: We set the format here even if __archive_read_program()
201 * above fails. We do, after all, know what the format is
202 * even if we weren't able to read it. */
203 self->code = ARCHIVE_FILTER_LZ4;
211 static const struct archive_read_filter_vtable
212 lz4_reader_vtable = {
213 .read = lz4_filter_read,
214 .close = lz4_filter_close,
218 * Setup the callbacks.
221 lz4_reader_init(struct archive_read_filter *self)
223 struct private_data *state;
225 self->code = ARCHIVE_FILTER_LZ4;
228 state = (struct private_data *)calloc(sizeof(*state), 1);
230 archive_set_error(&self->archive->archive, ENOMEM,
231 "Can't allocate data for lz4 decompression");
232 return (ARCHIVE_FATAL);
236 state->stage = SELECT_STREAM;
237 self->vtable = &lz4_reader_vtable;
243 lz4_allocate_out_block(struct archive_read_filter *self)
245 struct private_data *state = (struct private_data *)self->data;
246 size_t out_block_size = state->flags.block_maximum_size;
249 if (!state->flags.block_independence)
250 out_block_size += 64 * 1024;
251 if (state->out_block_size < out_block_size) {
252 free(state->out_block);
253 out_block = (unsigned char *)malloc(out_block_size);
254 state->out_block_size = out_block_size;
255 if (out_block == NULL) {
256 archive_set_error(&self->archive->archive, ENOMEM,
257 "Can't allocate data for lz4 decompression");
258 return (ARCHIVE_FATAL);
260 state->out_block = out_block;
262 if (!state->flags.block_independence)
263 memset(state->out_block, 0, 64 * 1024);
268 lz4_allocate_out_block_for_legacy(struct archive_read_filter *self)
270 struct private_data *state = (struct private_data *)self->data;
271 size_t out_block_size = LEGACY_BLOCK_SIZE;
274 if (state->out_block_size < out_block_size) {
275 free(state->out_block);
276 out_block = (unsigned char *)malloc(out_block_size);
277 state->out_block_size = out_block_size;
278 if (out_block == NULL) {
279 archive_set_error(&self->archive->archive, ENOMEM,
280 "Can't allocate data for lz4 decompression");
281 return (ARCHIVE_FATAL);
283 state->out_block = out_block;
289 * Return the next block of decompressed data.
292 lz4_filter_read(struct archive_read_filter *self, const void **p)
294 struct private_data *state = (struct private_data *)self->data;
302 __archive_read_filter_consume(self->upstream, state->unconsumed);
303 state->unconsumed = 0;
305 switch (state->stage) {
308 case READ_DEFAULT_STREAM:
309 case READ_LEGACY_STREAM:
310 /* Reading a lz4 stream already failed. */
311 archive_set_error(&self->archive->archive,
312 ARCHIVE_ERRNO_MISC, "Invalid sequence.");
313 return (ARCHIVE_FATAL);
314 case READ_DEFAULT_BLOCK:
315 ret = lz4_filter_read_default_stream(self, p);
316 if (ret != 0 || state->stage != SELECT_STREAM)
319 case READ_LEGACY_BLOCK:
320 ret = lz4_filter_read_legacy_stream(self, p);
321 if (ret != 0 || state->stage != SELECT_STREAM)
325 archive_set_error(&self->archive->archive,
326 ARCHIVE_ERRNO_MISC, "Program error.");
327 return (ARCHIVE_FATAL);
331 while (state->stage == SELECT_STREAM) {
332 const char *read_buf;
334 /* Read a magic number. */
335 read_buf = __archive_read_filter_ahead(self->upstream, 4,
337 if (read_buf == NULL) {
342 uint32_t number = archive_le32dec(read_buf);
343 __archive_read_filter_consume(self->upstream, 4);
344 if (number == LZ4_MAGICNUMBER)
345 return lz4_filter_read_default_stream(self, p);
346 else if (number == LZ4_LEGACY)
347 return lz4_filter_read_legacy_stream(self, p);
348 else if ((number & ~0xF) == LZ4_SKIPPABLED) {
349 read_buf = __archive_read_filter_ahead(
350 self->upstream, 4, NULL);
351 if (read_buf == NULL) {
353 &self->archive->archive,
355 "Malformed lz4 data");
356 return (ARCHIVE_FATAL);
358 uint32_t skip_bytes = archive_le32dec(read_buf);
359 __archive_read_filter_consume(self->upstream,
362 /* Ignore following unrecognized data. */
374 lz4_filter_read_descriptor(struct archive_read_filter *self)
376 struct private_data *state = (struct private_data *)self->data;
377 const char *read_buf;
378 ssize_t bytes_remaining;
379 ssize_t descriptor_bytes;
380 unsigned char flag, bd;
381 unsigned int chsum, chsum_verifier;
383 /* Make sure we have 2 bytes for flags. */
384 read_buf = __archive_read_filter_ahead(self->upstream, 2,
386 if (read_buf == NULL) {
387 archive_set_error(&self->archive->archive,
389 "truncated lz4 input");
390 return (ARCHIVE_FATAL);
396 flag = (unsigned char)read_buf[0];
397 /* Verify version number. */
398 if ((flag & 0xc0) != 1<<6)
399 goto malformed_error;
400 /* A reserved bit must be zero. */
402 goto malformed_error;
403 state->flags.block_independence = (flag & 0x20) != 0;
404 state->flags.block_checksum = (flag & 0x10)?4:0;
405 state->flags.stream_size = (flag & 0x08) != 0;
406 state->flags.stream_checksum = (flag & 0x04) != 0;
407 state->flags.preset_dictionary = (flag & 0x01) != 0;
410 bd = (unsigned char)read_buf[1];
411 /* Reserved bits must be zero. */
413 goto malformed_error;
414 /* Get a maximum block size. */
415 switch (read_buf[1] >> 4) {
417 state->flags.block_maximum_size = 64 * 1024;
420 state->flags.block_maximum_size = 256 * 1024;
423 state->flags.block_maximum_size = 1024 * 1024;
426 state->flags.block_maximum_size = 4 * 1024 * 1024;
429 goto malformed_error;
432 /* Read the whole descriptor in a stream block. */
433 descriptor_bytes = 3;
434 if (state->flags.stream_size)
435 descriptor_bytes += 8;
436 if (state->flags.preset_dictionary)
437 descriptor_bytes += 4;
438 if (bytes_remaining < descriptor_bytes) {
439 read_buf = __archive_read_filter_ahead(self->upstream,
440 descriptor_bytes, &bytes_remaining);
441 if (read_buf == NULL) {
442 archive_set_error(&self->archive->archive,
444 "truncated lz4 input");
445 return (ARCHIVE_FATAL);
448 /* Check if a descriptor is corrupted */
449 chsum = __archive_xxhash.XXH32(read_buf, (int)descriptor_bytes -1, 0);
450 chsum = (chsum >> 8) & 0xff;
451 chsum_verifier = read_buf[descriptor_bytes-1] & 0xff;
452 if (chsum != chsum_verifier)
453 goto malformed_error;
455 __archive_read_filter_consume(self->upstream, descriptor_bytes);
457 /* Make sure we have a large enough buffer for uncompressed data. */
458 if (lz4_allocate_out_block(self) != ARCHIVE_OK)
459 return (ARCHIVE_FATAL);
460 if (state->flags.stream_checksum)
461 state->xxh32_state = __archive_xxhash.XXH32_init(0);
463 state->decoded_size = 0;
467 archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
468 "malformed lz4 data");
469 return (ARCHIVE_FATAL);
473 lz4_filter_read_data_block(struct archive_read_filter *self, const void **p)
475 struct private_data *state = (struct private_data *)self->data;
476 ssize_t compressed_size;
477 const char *read_buf;
478 ssize_t bytes_remaining;
480 ssize_t uncompressed_size;
485 /* Make sure we have 4 bytes for a block size. */
486 read_buf = __archive_read_filter_ahead(self->upstream, 4,
488 if (read_buf == NULL)
489 goto truncated_error;
490 compressed_size = archive_le32dec(read_buf);
491 if ((compressed_size & 0x7fffffff) > state->flags.block_maximum_size)
492 goto malformed_error;
493 /* A compressed size == 0 means the end of stream blocks. */
494 if (compressed_size == 0) {
495 __archive_read_filter_consume(self->upstream, 4);
499 checksum_size = state->flags.block_checksum;
500 /* Check if the block is uncompressed. */
501 if (compressed_size & 0x80000000U) {
502 compressed_size &= 0x7fffffff;
503 uncompressed_size = compressed_size;
505 uncompressed_size = 0;/* Unknown yet. */
508 Unfortunately, lz4 decompression API requires a whole block
509 for its decompression speed, so we read a whole block and allocate
510 a huge buffer used for decoded data.
512 read_buf = __archive_read_filter_ahead(self->upstream,
513 4 + compressed_size + checksum_size, &bytes_remaining);
514 if (read_buf == NULL)
515 goto truncated_error;
517 /* Optional processing, checking a block sum. */
519 unsigned int chsum = __archive_xxhash.XXH32(
520 read_buf + 4, (int)compressed_size, 0);
521 unsigned int chsum_block =
522 archive_le32dec(read_buf + 4 + compressed_size);
523 if (chsum != chsum_block)
524 goto malformed_error;
528 /* If the block is uncompressed, there is nothing to do. */
529 if (uncompressed_size) {
530 /* Prepare a prefix 64k block for next block. */
531 if (!state->flags.block_independence) {
532 prefix64k = 64 * 1024;
533 if (uncompressed_size < (ssize_t)prefix64k) {
534 memcpy(state->out_block
535 + prefix64k - uncompressed_size,
538 memset(state->out_block, 0,
539 prefix64k - uncompressed_size);
541 memcpy(state->out_block,
543 + uncompressed_size - prefix64k,
546 state->decoded_size = 0;
548 state->unconsumed = 4 + uncompressed_size + checksum_size;
550 return uncompressed_size;
554 Decompress a block data.
556 if (state->flags.block_independence) {
558 uncompressed_size = LZ4_decompress_safe(read_buf + 4,
559 state->out_block, (int)compressed_size,
560 state->flags.block_maximum_size);
562 prefix64k = 64 * 1024;
563 if (state->decoded_size) {
564 if (state->decoded_size < prefix64k) {
565 memmove(state->out_block
566 + prefix64k - state->decoded_size,
567 state->out_block + prefix64k,
568 state->decoded_size);
569 memset(state->out_block, 0,
570 prefix64k - state->decoded_size);
572 memmove(state->out_block,
573 state->out_block + state->decoded_size,
577 #if LZ4_VERSION_MAJOR >= 1 && LZ4_VERSION_MINOR >= 7
578 uncompressed_size = LZ4_decompress_safe_usingDict(
580 state->out_block + prefix64k, (int)compressed_size,
581 state->flags.block_maximum_size,
585 uncompressed_size = LZ4_decompress_safe_withPrefix64k(
587 state->out_block + prefix64k, (int)compressed_size,
588 state->flags.block_maximum_size);
592 /* Check if an error occurred in the decompression process. */
593 if (uncompressed_size < 0) {
594 archive_set_error(&(self->archive->archive),
595 ARCHIVE_ERRNO_MISC, "lz4 decompression failed");
596 return (ARCHIVE_FATAL);
599 state->unconsumed = 4 + compressed_size + checksum_size;
600 *p = state->out_block + prefix64k;
601 state->decoded_size = uncompressed_size;
602 return uncompressed_size;
605 archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
606 "malformed lz4 data");
607 return (ARCHIVE_FATAL);
609 archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
610 "truncated lz4 input");
611 return (ARCHIVE_FATAL);
615 lz4_filter_read_default_stream(struct archive_read_filter *self, const void **p)
617 struct private_data *state = (struct private_data *)self->data;
618 const char *read_buf;
619 ssize_t bytes_remaining;
622 if (state->stage == SELECT_STREAM) {
623 state->stage = READ_DEFAULT_STREAM;
624 /* First, read a descriptor. */
625 if((ret = lz4_filter_read_descriptor(self)) != ARCHIVE_OK)
627 state->stage = READ_DEFAULT_BLOCK;
629 /* Decompress a block. */
630 ret = lz4_filter_read_data_block(self, p);
632 /* If the end of block is detected, change the filter status
633 to read next stream. */
634 if (ret == 0 && *p == NULL)
635 state->stage = SELECT_STREAM;
637 /* Optional processing, checking a stream sum. */
638 if (state->flags.stream_checksum) {
639 if (state->stage == SELECT_STREAM) {
640 unsigned int checksum;
641 unsigned int checksum_stream;
642 read_buf = __archive_read_filter_ahead(self->upstream,
643 4, &bytes_remaining);
644 if (read_buf == NULL) {
645 archive_set_error(&self->archive->archive,
646 ARCHIVE_ERRNO_MISC, "truncated lz4 input");
647 return (ARCHIVE_FATAL);
649 checksum = archive_le32dec(read_buf);
650 __archive_read_filter_consume(self->upstream, 4);
651 checksum_stream = __archive_xxhash.XXH32_digest(
653 state->xxh32_state = NULL;
654 if (checksum != checksum_stream) {
655 archive_set_error(&self->archive->archive,
657 "lz4 stream checksum error");
658 return (ARCHIVE_FATAL);
661 __archive_xxhash.XXH32_update(state->xxh32_state,
668 lz4_filter_read_legacy_stream(struct archive_read_filter *self, const void **p)
670 struct private_data *state = (struct private_data *)self->data;
672 const char *read_buf;
676 ret = lz4_allocate_out_block_for_legacy(self);
677 if (ret != ARCHIVE_OK)
680 /* Make sure we have 4 bytes for a block size. */
681 read_buf = __archive_read_filter_ahead(self->upstream, 4, NULL);
682 if (read_buf == NULL) {
683 if (state->stage == SELECT_STREAM) {
684 state->stage = READ_LEGACY_STREAM;
685 archive_set_error(&self->archive->archive,
687 "truncated lz4 input");
688 return (ARCHIVE_FATAL);
690 state->stage = SELECT_STREAM;
693 state->stage = READ_LEGACY_BLOCK;
694 compressed = archive_le32dec(read_buf);
695 if (compressed > LZ4_COMPRESSBOUND(LEGACY_BLOCK_SIZE)) {
696 state->stage = SELECT_STREAM;
700 /* Make sure we have a whole block. */
701 read_buf = __archive_read_filter_ahead(self->upstream,
702 4 + compressed, NULL);
703 if (read_buf == NULL) {
704 archive_set_error(&(self->archive->archive),
705 ARCHIVE_ERRNO_MISC, "truncated lz4 input");
706 return (ARCHIVE_FATAL);
708 ret = LZ4_decompress_safe(read_buf + 4, state->out_block,
709 compressed, (int)state->out_block_size);
711 archive_set_error(&(self->archive->archive),
712 ARCHIVE_ERRNO_MISC, "lz4 decompression failed");
713 return (ARCHIVE_FATAL);
715 *p = state->out_block;
716 state->unconsumed = 4 + compressed;
721 * Clean up the decompressor.
724 lz4_filter_close(struct archive_read_filter *self)
726 struct private_data *state;
727 int ret = ARCHIVE_OK;
729 state = (struct private_data *)self->data;
730 free(state->xxh32_state);
731 free(state->out_block);
736 #endif /* HAVE_LIBLZ4 */