Merge branch 'vendor/LIBARCHIVE'
[dragonfly.git] / contrib / libarchive / libarchive / archive_read_support_filter_gzip.c
1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "archive_platform.h"
27
28 __FBSDID("$FreeBSD$");
29
30
31 #ifdef HAVE_ERRNO_H
32 #include <errno.h>
33 #endif
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40 #ifdef HAVE_UNISTD_H
41 #include <unistd.h>
42 #endif
43 #ifdef HAVE_ZLIB_H
44 #include <zlib.h>
45 #endif
46
47 #include "archive.h"
48 #include "archive_private.h"
49 #include "archive_read_private.h"
50
51 #ifdef HAVE_ZLIB_H
52 struct private_data {
53         z_stream         stream;
54         char             in_stream;
55         unsigned char   *out_block;
56         size_t           out_block_size;
57         int64_t          total_out;
58         unsigned long    crc;
59         char             eof; /* True = found end of compressed data. */
60 };
61
62 /* Gzip Filter. */
63 static ssize_t  gzip_filter_read(struct archive_read_filter *, const void **);
64 static int      gzip_filter_close(struct archive_read_filter *);
65 #endif
66
67 /*
68  * Note that we can detect gzip archives even if we can't decompress
69  * them.  (In fact, we like detecting them because we can give better
70  * error messages.)  So the bid framework here gets compiled even
71  * if zlib is unavailable.
72  *
73  * TODO: If zlib is unavailable, gzip_bidder_init() should
74  * use the compress_program framework to try to fire up an external
75  * gunzip program.
76  */
77 static int      gzip_bidder_bid(struct archive_read_filter_bidder *,
78                     struct archive_read_filter *);
79 static int      gzip_bidder_init(struct archive_read_filter *);
80
81 #if ARCHIVE_VERSION_NUMBER < 4000000
82 /* Deprecated; remove in libarchive 4.0 */
83 int
84 archive_read_support_compression_gzip(struct archive *a)
85 {
86         return archive_read_support_filter_gzip(a);
87 }
88 #endif
89
90 int
91 archive_read_support_filter_gzip(struct archive *_a)
92 {
93         struct archive_read *a = (struct archive_read *)_a;
94         struct archive_read_filter_bidder *bidder;
95
96         archive_check_magic(_a, ARCHIVE_READ_MAGIC,
97             ARCHIVE_STATE_NEW, "archive_read_support_filter_gzip");
98
99         if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK)
100                 return (ARCHIVE_FATAL);
101
102         bidder->data = NULL;
103         bidder->bid = gzip_bidder_bid;
104         bidder->init = gzip_bidder_init;
105         bidder->options = NULL;
106         bidder->free = NULL; /* No data, so no cleanup necessary. */
107         /* Signal the extent of gzip support with the return value here. */
108 #if HAVE_ZLIB_H
109         return (ARCHIVE_OK);
110 #else
111         archive_set_error(_a, ARCHIVE_ERRNO_MISC,
112             "Using external gunzip program");
113         return (ARCHIVE_WARN);
114 #endif
115 }
116
117 /*
118  * Read and verify the header.
119  *
120  * Returns zero if the header couldn't be validated, else returns
121  * number of bytes in header.  If pbits is non-NULL, it receives a
122  * count of bits verified, suitable for use by bidder.
123  */
124 static int
125 peek_at_header(struct archive_read_filter *filter, int *pbits)
126 {
127         const unsigned char *p;
128         ssize_t avail, len;
129         int bits = 0;
130         int header_flags;
131
132         /* Start by looking at the first ten bytes of the header, which
133          * is all fixed layout. */
134         len = 10;
135         p = __archive_read_filter_ahead(filter, len, &avail);
136         if (p == NULL || avail == 0)
137                 return (0);
138         /* We only support deflation- third byte must be 0x08. */
139         if (memcmp(p, "\x1F\x8B\x08", 3) != 0)
140                 return (0);
141         bits += 24;
142         if ((p[3] & 0xE0)!= 0)  /* No reserved flags set. */
143                 return (0);
144         bits += 3;
145         header_flags = p[3];
146         /* Bytes 4-7 are mod time. */
147         /* Byte 8 is deflate flags. */
148         /* XXXX TODO: return deflate flags back to consume_header for use
149            in initializing the decompressor. */
150         /* Byte 9 is OS. */
151
152         /* Optional extra data:  2 byte length plus variable body. */
153         if (header_flags & 4) {
154                 p = __archive_read_filter_ahead(filter, len + 2, &avail);
155                 if (p == NULL)
156                         return (0);
157                 len += ((int)p[len + 1] << 8) | (int)p[len];
158                 len += 2;
159         }
160
161         /* Null-terminated optional filename. */
162         if (header_flags & 8) {
163                 do {
164                         ++len;
165                         if (avail < len)
166                                 p = __archive_read_filter_ahead(filter,
167                                     len, &avail);
168                         if (p == NULL)
169                                 return (0);
170                 } while (p[len - 1] != 0);
171         }
172
173         /* Null-terminated optional comment. */
174         if (header_flags & 16) {
175                 do {
176                         ++len;
177                         if (avail < len)
178                                 p = __archive_read_filter_ahead(filter,
179                                     len, &avail);
180                         if (p == NULL)
181                                 return (0);
182                 } while (p[len - 1] != 0);
183         }
184
185         /* Optional header CRC */
186         if ((header_flags & 2)) {
187                 p = __archive_read_filter_ahead(filter, len + 2, &avail);
188                 if (p == NULL)
189                         return (0);
190 #if 0
191         int hcrc = ((int)p[len + 1] << 8) | (int)p[len];
192         int crc = /* XXX TODO: Compute header CRC. */;
193         if (crc != hcrc)
194                 return (0);
195         bits += 16;
196 #endif
197                 len += 2;
198         }
199
200         if (pbits != NULL)
201                 *pbits = bits;
202         return (len);
203 }
204
205 /*
206  * Bidder just verifies the header and returns the number of verified bits.
207  */
208 static int
209 gzip_bidder_bid(struct archive_read_filter_bidder *self,
210     struct archive_read_filter *filter)
211 {
212         int bits_checked;
213
214         (void)self; /* UNUSED */
215
216         if (peek_at_header(filter, &bits_checked))
217                 return (bits_checked);
218         return (0);
219 }
220
221
222 #ifndef HAVE_ZLIB_H
223
224 /*
225  * If we don't have the library on this system, we can't do the
226  * decompression directly.  We can, however, try to run gunzip
227  * in case that's available.
228  */
229 static int
230 gzip_bidder_init(struct archive_read_filter *self)
231 {
232         int r;
233
234         r = __archive_read_program(self, "gunzip");
235         /* Note: We set the format here even if __archive_read_program()
236          * above fails.  We do, after all, know what the format is
237          * even if we weren't able to read it. */
238         self->code = ARCHIVE_COMPRESSION_GZIP;
239         self->name = "gzip";
240         return (r);
241 }
242
243 #else
244
245 /*
246  * Initialize the filter object.
247  */
248 static int
249 gzip_bidder_init(struct archive_read_filter *self)
250 {
251         struct private_data *state;
252         static const size_t out_block_size = 64 * 1024;
253         void *out_block;
254
255         self->code = ARCHIVE_COMPRESSION_GZIP;
256         self->name = "gzip";
257
258         state = (struct private_data *)calloc(sizeof(*state), 1);
259         out_block = (unsigned char *)malloc(out_block_size);
260         if (state == NULL || out_block == NULL) {
261                 free(out_block);
262                 free(state);
263                 archive_set_error(&self->archive->archive, ENOMEM,
264                     "Can't allocate data for gzip decompression");
265                 return (ARCHIVE_FATAL);
266         }
267
268         self->data = state;
269         state->out_block_size = out_block_size;
270         state->out_block = out_block;
271         self->read = gzip_filter_read;
272         self->skip = NULL; /* not supported */
273         self->close = gzip_filter_close;
274
275         state->in_stream = 0; /* We're not actually within a stream yet. */
276
277         return (ARCHIVE_OK);
278 }
279
280 static int
281 consume_header(struct archive_read_filter *self)
282 {
283         struct private_data *state;
284         ssize_t avail;
285         size_t len;
286         int ret;
287
288         state = (struct private_data *)self->data;
289
290         /* If this is a real header, consume it. */
291         len = peek_at_header(self->upstream, NULL);
292         if (len == 0)
293                 return (ARCHIVE_EOF);
294         __archive_read_filter_consume(self->upstream, len);
295
296         /* Initialize CRC accumulator. */
297         state->crc = crc32(0L, NULL, 0);
298
299         /* Initialize compression library. */
300         state->stream.next_in = (unsigned char *)(uintptr_t)
301             __archive_read_filter_ahead(self->upstream, 1, &avail);
302         state->stream.avail_in = avail;
303         ret = inflateInit2(&(state->stream),
304             -15 /* Don't check for zlib header */);
305
306         /* Decipher the error code. */
307         switch (ret) {
308         case Z_OK:
309                 state->in_stream = 1;
310                 return (ARCHIVE_OK);
311         case Z_STREAM_ERROR:
312                 archive_set_error(&self->archive->archive,
313                     ARCHIVE_ERRNO_MISC,
314                     "Internal error initializing compression library: "
315                     "invalid setup parameter");
316                 break;
317         case Z_MEM_ERROR:
318                 archive_set_error(&self->archive->archive, ENOMEM,
319                     "Internal error initializing compression library: "
320                     "out of memory");
321                 break;
322         case Z_VERSION_ERROR:
323                 archive_set_error(&self->archive->archive,
324                     ARCHIVE_ERRNO_MISC,
325                     "Internal error initializing compression library: "
326                     "invalid library version");
327                 break;
328         default:
329                 archive_set_error(&self->archive->archive,
330                     ARCHIVE_ERRNO_MISC,
331                     "Internal error initializing compression library: "
332                     " Zlib error %d", ret);
333                 break;
334         }
335         return (ARCHIVE_FATAL);
336 }
337
338 static int
339 consume_trailer(struct archive_read_filter *self)
340 {
341         struct private_data *state;
342         const unsigned char *p;
343         ssize_t avail;
344
345         state = (struct private_data *)self->data;
346
347         state->in_stream = 0;
348         switch (inflateEnd(&(state->stream))) {
349         case Z_OK:
350                 break;
351         default:
352                 archive_set_error(&self->archive->archive,
353                     ARCHIVE_ERRNO_MISC,
354                     "Failed to clean up gzip decompressor");
355                 return (ARCHIVE_FATAL);
356         }
357
358         /* GZip trailer is a fixed 8 byte structure. */
359         p = __archive_read_filter_ahead(self->upstream, 8, &avail);
360         if (p == NULL || avail == 0)
361                 return (ARCHIVE_FATAL);
362
363         /* XXX TODO: Verify the length and CRC. */
364
365         /* We've verified the trailer, so consume it now. */
366         __archive_read_filter_consume(self->upstream, 8);
367
368         return (ARCHIVE_OK);
369 }
370
371 static ssize_t
372 gzip_filter_read(struct archive_read_filter *self, const void **p)
373 {
374         struct private_data *state;
375         size_t decompressed;
376         ssize_t avail_in;
377         int ret;
378
379         state = (struct private_data *)self->data;
380
381         /* Empty our output buffer. */
382         state->stream.next_out = state->out_block;
383         state->stream.avail_out = state->out_block_size;
384
385         /* Try to fill the output buffer. */
386         while (state->stream.avail_out > 0 && !state->eof) {
387                 /* If we're not in a stream, read a header
388                  * and initialize the decompression library. */
389                 if (!state->in_stream) {
390                         ret = consume_header(self);
391                         if (ret == ARCHIVE_EOF) {
392                                 state->eof = 1;
393                                 break;
394                         }
395                         if (ret < ARCHIVE_OK)
396                                 return (ret);
397                 }
398
399                 /* Peek at the next available data. */
400                 /* ZLib treats stream.next_in as const but doesn't declare
401                  * it so, hence this ugly cast. */
402                 state->stream.next_in = (unsigned char *)(uintptr_t)
403                     __archive_read_filter_ahead(self->upstream, 1, &avail_in);
404                 if (state->stream.next_in == NULL) {
405                         archive_set_error(&self->archive->archive,
406                             ARCHIVE_ERRNO_MISC,
407                             "truncated gzip input");
408                         return (ARCHIVE_FATAL);
409                 }
410                 state->stream.avail_in = avail_in;
411
412                 /* Decompress and consume some of that data. */
413                 ret = inflate(&(state->stream), 0);
414                 switch (ret) {
415                 case Z_OK: /* Decompressor made some progress. */
416                         __archive_read_filter_consume(self->upstream,
417                             avail_in - state->stream.avail_in);
418                         break;
419                 case Z_STREAM_END: /* Found end of stream. */
420                         __archive_read_filter_consume(self->upstream,
421                             avail_in - state->stream.avail_in);
422                         /* Consume the stream trailer; release the
423                          * decompression library. */
424                         ret = consume_trailer(self);
425                         if (ret < ARCHIVE_OK)
426                                 return (ret);
427                         break;
428                 default:
429                         /* Return an error. */
430                         archive_set_error(&self->archive->archive,
431                             ARCHIVE_ERRNO_MISC,
432                             "gzip decompression failed");
433                         return (ARCHIVE_FATAL);
434                 }
435         }
436
437         /* We've read as much as we can. */
438         decompressed = state->stream.next_out - state->out_block;
439         state->total_out += decompressed;
440         if (decompressed == 0)
441                 *p = NULL;
442         else
443                 *p = state->out_block;
444         return (decompressed);
445 }
446
447 /*
448  * Clean up the decompressor.
449  */
450 static int
451 gzip_filter_close(struct archive_read_filter *self)
452 {
453         struct private_data *state;
454         int ret;
455
456         state = (struct private_data *)self->data;
457         ret = ARCHIVE_OK;
458
459         if (state->in_stream) {
460                 switch (inflateEnd(&(state->stream))) {
461                 case Z_OK:
462                         break;
463                 default:
464                         archive_set_error(&(self->archive->archive),
465                             ARCHIVE_ERRNO_MISC,
466                             "Failed to clean up gzip compressor");
467                         ret = ARCHIVE_FATAL;
468                 }
469         }
470
471         free(state->out_block);
472         free(state);
473         return (ret);
474 }
475
476 #endif /* HAVE_ZLIB_H */