61d9d0c63fd4d9aa577baadeabaa34beca14d2e6
[dragonfly.git] / contrib / libarchive / libarchive / archive_read_support_compression_xz.c
1 /*-
2  * Copyright (c) 2009 Michihiro NAKAJIMA
3  * Copyright (c) 2003-2008 Tim Kientzle and Miklos Vajna
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include "archive_platform.h"
28
29 __FBSDID("$FreeBSD$");
30
31 #ifdef HAVE_ERRNO_H
32 #include <errno.h>
33 #endif
34 #include <stdio.h>
35 #ifdef HAVE_STDLIB_H
36 #include <stdlib.h>
37 #endif
38 #ifdef HAVE_STRING_H
39 #include <string.h>
40 #endif
41 #ifdef HAVE_UNISTD_H
42 #include <unistd.h>
43 #endif
44 #if HAVE_LZMA_H
45 #include <lzma.h>
46 #elif HAVE_LZMADEC_H
47 #include <lzmadec.h>
48 #endif
49
50 #include "archive.h"
51 #include "archive_private.h"
52 #include "archive_read_private.h"
53
54 #if HAVE_LZMA_H && HAVE_LIBLZMA
55
56 struct private_data {
57         lzma_stream      stream;
58         unsigned char   *out_block;
59         size_t           out_block_size;
60         int64_t          total_out;
61         char             eof; /* True = found end of compressed data. */
62 };
63
64 /* Combined lzma/xz filter */
65 static ssize_t  xz_filter_read(struct archive_read_filter *, const void **);
66 static int      xz_filter_close(struct archive_read_filter *);
67 static int      xz_lzma_bidder_init(struct archive_read_filter *);
68
69 #elif HAVE_LZMADEC_H && HAVE_LIBLZMADEC
70
71 struct private_data {
72         lzmadec_stream   stream;
73         unsigned char   *out_block;
74         size_t           out_block_size;
75         int64_t          total_out;
76         char             eof; /* True = found end of compressed data. */
77 };
78
79 /* Lzma-only filter */
80 static ssize_t  lzma_filter_read(struct archive_read_filter *, const void **);
81 static int      lzma_filter_close(struct archive_read_filter *);
82 #endif
83
84 /*
85  * Note that we can detect xz and lzma compressed files even if we
86  * can't decompress them.  (In fact, we like detecting them because we
87  * can give better error messages.)  So the bid framework here gets
88  * compiled even if no lzma library is available.
89  */
90 static int      xz_bidder_bid(struct archive_read_filter_bidder *,
91                     struct archive_read_filter *);
92 static int      xz_bidder_init(struct archive_read_filter *);
93 static int      lzma_bidder_bid(struct archive_read_filter_bidder *,
94                     struct archive_read_filter *);
95 static int      lzma_bidder_init(struct archive_read_filter *);
96
97 int
98 archive_read_support_compression_xz(struct archive *_a)
99 {
100         struct archive_read *a = (struct archive_read *)_a;
101         struct archive_read_filter_bidder *bidder = __archive_read_get_bidder(a);
102
103         if (bidder == NULL)
104                 return (ARCHIVE_FATAL);
105
106         bidder->data = NULL;
107         bidder->bid = xz_bidder_bid;
108         bidder->init = xz_bidder_init;
109         bidder->options = NULL;
110         bidder->free = NULL;
111 #if HAVE_LZMA_H && HAVE_LIBLZMA
112         return (ARCHIVE_OK);
113 #else
114         archive_set_error(_a, ARCHIVE_ERRNO_MISC,
115             "Using external unxz program for xz decompression");
116         return (ARCHIVE_WARN);
117 #endif
118 }
119
120 int
121 archive_read_support_compression_lzma(struct archive *_a)
122 {
123         struct archive_read *a = (struct archive_read *)_a;
124         struct archive_read_filter_bidder *bidder = __archive_read_get_bidder(a);
125
126         if (bidder == NULL)
127                 return (ARCHIVE_FATAL);
128
129         bidder->data = NULL;
130         bidder->bid = lzma_bidder_bid;
131         bidder->init = lzma_bidder_init;
132         bidder->options = NULL;
133         bidder->free = NULL;
134 #if HAVE_LZMA_H && HAVE_LIBLZMA
135         return (ARCHIVE_OK);
136 #elif HAVE_LZMADEC_H && HAVE_LIBLZMADEC
137         return (ARCHIVE_OK);
138 #else
139         archive_set_error(_a, ARCHIVE_ERRNO_MISC,
140             "Using external unlzma program for lzma decompression");
141         return (ARCHIVE_WARN);
142 #endif
143 }
144
145 /*
146  * Test whether we can handle this data.
147  */
148 static int
149 xz_bidder_bid(struct archive_read_filter_bidder *self,
150     struct archive_read_filter *filter)
151 {
152         const unsigned char *buffer;
153         ssize_t avail;
154         int bits_checked;
155
156         (void)self; /* UNUSED */
157
158         buffer = __archive_read_filter_ahead(filter, 6, &avail);
159         if (buffer == NULL)
160                 return (0);
161
162         /*
163          * Verify Header Magic Bytes : FD 37 7A 58 5A 00
164          */
165         bits_checked = 0;
166         if (buffer[0] != 0xFD)
167                 return (0);
168         bits_checked += 8;
169         if (buffer[1] != 0x37)
170                 return (0);
171         bits_checked += 8;
172         if (buffer[2] != 0x7A)
173                 return (0);
174         bits_checked += 8;
175         if (buffer[3] != 0x58)
176                 return (0);
177         bits_checked += 8;
178         if (buffer[4] != 0x5A)
179                 return (0);
180         bits_checked += 8;
181         if (buffer[5] != 0x00)
182                 return (0);
183         bits_checked += 8;
184
185         return (bits_checked);
186 }
187
188 /*
189  * Test whether we can handle this data.
190  *
191  * <sigh> LZMA has a rather poor file signature.  Zeros do not
192  * make good signature bytes as a rule, and the only non-zero byte
193  * here is an ASCII character.  For example, an uncompressed tar
194  * archive whose first file is ']' would satisfy this check.  It may
195  * be necessary to exclude LZMA from compression_all() because of
196  * this.  Clients of libarchive would then have to explicitly enable
197  * LZMA checking instead of (or in addition to) compression_all() when
198  * they have other evidence (file name, command-line option) to go on.
199  */
200 static int
201 lzma_bidder_bid(struct archive_read_filter_bidder *self,
202     struct archive_read_filter *filter)
203 {
204         const unsigned char *buffer;
205         ssize_t avail;
206         int bits_checked;
207
208         (void)self; /* UNUSED */
209
210         buffer = __archive_read_filter_ahead(filter, 6, &avail);
211         if (buffer == NULL)
212                 return (0);
213
214         /* First byte of raw LZMA stream is always 0x5d. */
215         bits_checked = 0;
216         if (buffer[0] != 0x5d)
217                 return (0);
218         bits_checked += 8;
219
220         /* Second through fifth bytes are dictionary code, stored in
221          * little-endian order.  The two least-significant bytes are
222          * always zero. */
223         if (buffer[1] != 0 || buffer[2] != 0)
224                 return (0);
225         bits_checked += 16;
226
227         /* ??? TODO:  Fix this. ??? */
228         /* NSIS format check uses this, but I've seen tar.lzma
229          * archives where this byte is 0xff, not 0.  Can it
230          * ever be anything other than 0 or 0xff?
231          */
232 #if 0
233         if (buffer[5] != 0)
234                 return (0);
235         bits_checked += 8;
236 #endif
237
238         /* TODO: The above test is still very weak.  It would be
239          * good to do better. */
240
241         return (bits_checked);
242 }
243
244 #if HAVE_LZMA_H && HAVE_LIBLZMA
245
246 /*
247  * liblzma 4.999.7 and later support both lzma and xz streams.
248  */
249 static int
250 xz_bidder_init(struct archive_read_filter *self)
251 {
252         self->code = ARCHIVE_COMPRESSION_XZ;
253         self->name = "xz";
254         return (xz_lzma_bidder_init(self));
255 }
256
257 static int
258 lzma_bidder_init(struct archive_read_filter *self)
259 {
260         self->code = ARCHIVE_COMPRESSION_LZMA;
261         self->name = "lzma";
262         return (xz_lzma_bidder_init(self));
263 }
264
265 /*
266  * Setup the callbacks.
267  */
268 static int
269 xz_lzma_bidder_init(struct archive_read_filter *self)
270 {
271         static const size_t out_block_size = 64 * 1024;
272         void *out_block;
273         struct private_data *state;
274         int ret;
275
276         state = (struct private_data *)calloc(sizeof(*state), 1);
277         out_block = (unsigned char *)malloc(out_block_size);
278         if (state == NULL || out_block == NULL) {
279                 archive_set_error(&self->archive->archive, ENOMEM,
280                     "Can't allocate data for xz decompression");
281                 free(out_block);
282                 free(state);
283                 return (ARCHIVE_FATAL);
284         }
285
286         self->data = state;
287         state->out_block_size = out_block_size;
288         state->out_block = out_block;
289         self->read = xz_filter_read;
290         self->skip = NULL; /* not supported */
291         self->close = xz_filter_close;
292
293         state->stream.avail_in = 0;
294
295         state->stream.next_out = state->out_block;
296         state->stream.avail_out = state->out_block_size;
297
298         /* Initialize compression library.
299          * TODO: I don't know what value is best for memlimit.
300          *       maybe, it needs to check memory size which
301          *       running system has.
302          */
303         if (self->code == ARCHIVE_COMPRESSION_XZ)
304                 ret = lzma_stream_decoder(&(state->stream),
305                     (1U << 30),/* memlimit */
306                     LZMA_CONCATENATED);
307         else
308                 ret = lzma_alone_decoder(&(state->stream),
309                     (1U << 30));/* memlimit */
310
311         if (ret == LZMA_OK)
312                 return (ARCHIVE_OK);
313
314         /* Library setup failed: Choose an error message and clean up. */
315         switch (ret) {
316         case LZMA_MEM_ERROR:
317                 archive_set_error(&self->archive->archive, ENOMEM,
318                     "Internal error initializing compression library: "
319                     "Cannot allocate memory");
320                 break;
321         case LZMA_OPTIONS_ERROR:
322                 archive_set_error(&self->archive->archive,
323                     ARCHIVE_ERRNO_MISC,
324                     "Internal error initializing compression library: "
325                     "Invalid or unsupported options");
326                 break;
327         default:
328                 archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
329                     "Internal error initializing lzma library");
330                 break;
331         }
332
333         free(state->out_block);
334         free(state);
335         self->data = NULL;
336         return (ARCHIVE_FATAL);
337 }
338
339 /*
340  * Return the next block of decompressed data.
341  */
342 static ssize_t
343 xz_filter_read(struct archive_read_filter *self, const void **p)
344 {
345         struct private_data *state;
346         size_t decompressed;
347         ssize_t avail_in;
348         int ret;
349
350         state = (struct private_data *)self->data;
351
352         /* Empty our output buffer. */
353         state->stream.next_out = state->out_block;
354         state->stream.avail_out = state->out_block_size;
355
356         /* Try to fill the output buffer. */
357         while (state->stream.avail_out > 0 && !state->eof) {
358                 state->stream.next_in =
359                     __archive_read_filter_ahead(self->upstream, 1, &avail_in);
360                 if (state->stream.next_in == NULL && avail_in < 0)
361                         return (ARCHIVE_FATAL);
362                 state->stream.avail_in = avail_in;
363
364                 /* Decompress as much as we can in one pass. */
365                 ret = lzma_code(&(state->stream),
366                     (state->stream.avail_in == 0)? LZMA_FINISH: LZMA_RUN);
367                 switch (ret) {
368                 case LZMA_STREAM_END: /* Found end of stream. */
369                         state->eof = 1;
370                         /* FALL THROUGH */
371                 case LZMA_OK: /* Decompressor made some progress. */
372                         __archive_read_filter_consume(self->upstream,
373                             avail_in - state->stream.avail_in);
374                         break;
375                 case LZMA_MEM_ERROR:
376                         archive_set_error(&self->archive->archive, ENOMEM,
377                             "Lzma library error: Cannot allocate memory");
378                         return (ARCHIVE_FATAL);
379                 case LZMA_MEMLIMIT_ERROR:
380                         archive_set_error(&self->archive->archive, ENOMEM,
381                             "Lzma library error: Out of memory");
382                         return (ARCHIVE_FATAL);
383                 case LZMA_FORMAT_ERROR:
384                         archive_set_error(&self->archive->archive,
385                             ARCHIVE_ERRNO_MISC,
386                             "Lzma library error: format not recognized");
387                         return (ARCHIVE_FATAL);
388                 case LZMA_OPTIONS_ERROR:
389                         archive_set_error(&self->archive->archive,
390                             ARCHIVE_ERRNO_MISC,
391                             "Lzma library error: Invalid options");
392                         return (ARCHIVE_FATAL);
393                 case LZMA_DATA_ERROR:
394                         archive_set_error(&self->archive->archive,
395                             ARCHIVE_ERRNO_MISC,
396                             "Lzma library error: Corrupted input data");
397                         return (ARCHIVE_FATAL);
398                 case LZMA_BUF_ERROR:
399                         archive_set_error(&self->archive->archive,
400                             ARCHIVE_ERRNO_MISC,
401                             "Lzma library error:  No progress is possible");
402                         return (ARCHIVE_FATAL);
403                 default:
404                         /* Return an error. */
405                         archive_set_error(&self->archive->archive,
406                             ARCHIVE_ERRNO_MISC,
407                             "Lzma decompression failed:  Unknown error");
408                         return (ARCHIVE_FATAL);
409                 }
410         }
411
412         decompressed = state->stream.next_out - state->out_block;
413         state->total_out += decompressed;
414         if (decompressed == 0)
415                 *p = NULL;
416         else
417                 *p = state->out_block;
418         return (decompressed);
419 }
420
421 /*
422  * Clean up the decompressor.
423  */
424 static int
425 xz_filter_close(struct archive_read_filter *self)
426 {
427         struct private_data *state;
428
429         state = (struct private_data *)self->data;
430         lzma_end(&(state->stream));
431         free(state->out_block);
432         free(state);
433         return (ARCHIVE_OK);
434 }
435
436 #else
437
438 #if HAVE_LZMADEC_H && HAVE_LIBLZMADEC
439
440 /*
441  * If we have the older liblzmadec library, then we can handle
442  * LZMA streams but not XZ streams.
443  */
444
445 /*
446  * Setup the callbacks.
447  */
448 static int
449 lzma_bidder_init(struct archive_read_filter *self)
450 {
451         static const size_t out_block_size = 64 * 1024;
452         void *out_block;
453         struct private_data *state;
454         ssize_t ret, avail_in;
455
456         self->code = ARCHIVE_COMPRESSION_LZMA;
457         self->name = "lzma";
458
459         state = (struct private_data *)calloc(sizeof(*state), 1);
460         out_block = (unsigned char *)malloc(out_block_size);
461         if (state == NULL || out_block == NULL) {
462                 archive_set_error(&self->archive->archive, ENOMEM,
463                     "Can't allocate data for lzma decompression");
464                 free(out_block);
465                 free(state);
466                 return (ARCHIVE_FATAL);
467         }
468
469         self->data = state;
470         state->out_block_size = out_block_size;
471         state->out_block = out_block;
472         self->read = lzma_filter_read;
473         self->skip = NULL; /* not supported */
474         self->close = lzma_filter_close;
475
476         /* Prime the lzma library with 18 bytes of input. */
477         state->stream.next_in = (unsigned char *)(uintptr_t)
478             __archive_read_filter_ahead(self->upstream, 18, &avail_in);
479         if (state->stream.next_in == NULL)
480                 return (ARCHIVE_FATAL);
481         state->stream.avail_in = avail_in;
482         state->stream.next_out = state->out_block;
483         state->stream.avail_out = state->out_block_size;
484
485         /* Initialize compression library. */
486         ret = lzmadec_init(&(state->stream));
487         __archive_read_filter_consume(self->upstream,
488             avail_in - state->stream.avail_in);
489         if (ret == LZMADEC_OK)
490                 return (ARCHIVE_OK);
491
492         /* Library setup failed: Clean up. */
493         archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
494             "Internal error initializing lzma library");
495
496         /* Override the error message if we know what really went wrong. */
497         switch (ret) {
498         case LZMADEC_HEADER_ERROR:
499                 archive_set_error(&self->archive->archive,
500                     ARCHIVE_ERRNO_MISC,
501                     "Internal error initializing compression library: "
502                     "invalid header");
503                 break;
504         case LZMADEC_MEM_ERROR:
505                 archive_set_error(&self->archive->archive, ENOMEM,
506                     "Internal error initializing compression library: "
507                     "out of memory");
508                 break;
509         }
510
511         free(state->out_block);
512         free(state);
513         self->data = NULL;
514         return (ARCHIVE_FATAL);
515 }
516
517 /*
518  * Return the next block of decompressed data.
519  */
520 static ssize_t
521 lzma_filter_read(struct archive_read_filter *self, const void **p)
522 {
523         struct private_data *state;
524         size_t decompressed;
525         ssize_t avail_in, ret;
526
527         state = (struct private_data *)self->data;
528
529         /* Empty our output buffer. */
530         state->stream.next_out = state->out_block;
531         state->stream.avail_out = state->out_block_size;
532
533         /* Try to fill the output buffer. */
534         while (state->stream.avail_out > 0 && !state->eof) {
535                 state->stream.next_in = (unsigned char *)(uintptr_t)
536                     __archive_read_filter_ahead(self->upstream, 1, &avail_in);
537                 if (state->stream.next_in == NULL && avail_in < 0)
538                         return (ARCHIVE_FATAL);
539                 state->stream.avail_in = avail_in;
540
541                 /* Decompress as much as we can in one pass. */
542                 ret = lzmadec_decode(&(state->stream), avail_in == 0);
543                 switch (ret) {
544                 case LZMADEC_STREAM_END: /* Found end of stream. */
545                         state->eof = 1;
546                         /* FALL THROUGH */
547                 case LZMADEC_OK: /* Decompressor made some progress. */
548                         __archive_read_filter_consume(self->upstream,
549                             avail_in - state->stream.avail_in);
550                         break;
551                 case LZMADEC_BUF_ERROR: /* Insufficient input data? */
552                         archive_set_error(&self->archive->archive,
553                             ARCHIVE_ERRNO_MISC,
554                             "Insufficient compressed data");
555                         return (ARCHIVE_FATAL);
556                 default:
557                         /* Return an error. */
558                         archive_set_error(&self->archive->archive,
559                             ARCHIVE_ERRNO_MISC,
560                             "Lzma decompression failed");
561                         return (ARCHIVE_FATAL);
562                 }
563         }
564
565         decompressed = state->stream.next_out - state->out_block;
566         state->total_out += decompressed;
567         if (decompressed == 0)
568                 *p = NULL;
569         else
570                 *p = state->out_block;
571         return (decompressed);
572 }
573
574 /*
575  * Clean up the decompressor.
576  */
577 static int
578 lzma_filter_close(struct archive_read_filter *self)
579 {
580         struct private_data *state;
581         int ret;
582
583         state = (struct private_data *)self->data;
584         ret = ARCHIVE_OK;
585         switch (lzmadec_end(&(state->stream))) {
586         case LZMADEC_OK:
587                 break;
588         default:
589                 archive_set_error(&(self->archive->archive),
590                     ARCHIVE_ERRNO_MISC,
591                     "Failed to clean up %s compressor",
592                     self->archive->archive.compression_name);
593                 ret = ARCHIVE_FATAL;
594         }
595
596         free(state->out_block);
597         free(state);
598         return (ret);
599 }
600
601 #else
602
603 /*
604  *
605  * If we have no suitable library on this system, we can't actually do
606  * the decompression.  We can, however, still detect compressed
607  * archives and emit a useful message.
608  *
609  */
610 static int
611 lzma_bidder_init(struct archive_read_filter *self)
612 {
613         int r;
614
615         r = __archive_read_program(self, "unlzma");
616         /* Note: We set the format here even if __archive_read_program()
617          * above fails.  We do, after all, know what the format is
618          * even if we weren't able to read it. */
619         self->code = ARCHIVE_COMPRESSION_LZMA;
620         self->name = "lzma";
621         return (r);
622 }
623
624 #endif /* HAVE_LZMADEC_H */
625
626
627 static int
628 xz_bidder_init(struct archive_read_filter *self)
629 {
630         int r;
631
632         r = __archive_read_program(self, "unxz");
633         /* Note: We set the format here even if __archive_read_program()
634          * above fails.  We do, after all, know what the format is
635          * even if we weren't able to read it. */
636         self->code = ARCHIVE_COMPRESSION_XZ;
637         self->name = "xz";
638         return (r);
639 }
640
641
642 #endif /* HAVE_LZMA_H */