Import libarchive-2.2.5 which fixes a forgotten 'break'. Without this,
[dragonfly.git] / contrib / libarchive-2.1 / libarchive / archive_read_support_compression_gzip.c
1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "archive_platform.h"
27
28 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_compression_gzip.c,v 1.14 2007/04/05 05:18:16 kientzle Exp $");
29
30
31 #ifdef HAVE_ERRNO_H
32 #include <errno.h>
33 #endif
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40 #ifdef HAVE_UNISTD_H
41 #include <unistd.h>
42 #endif
43 #ifdef HAVE_ZLIB_H
44 #include <zlib.h>
45 #endif
46
47 #include "archive.h"
48 #include "archive_private.h"
49 #include "archive_read_private.h"
50
51 #ifdef HAVE_ZLIB_H
52 struct private_data {
53         z_stream         stream;
54         unsigned char   *uncompressed_buffer;
55         size_t           uncompressed_buffer_size;
56         unsigned char   *read_next;
57         int64_t          total_out;
58         unsigned long    crc;
59         char             header_done;
60 };
61
62 static int      finish(struct archive_read *);
63 static ssize_t  read_ahead(struct archive_read *, const void **, size_t);
64 static ssize_t  read_consume(struct archive_read *, size_t);
65 static int      drive_decompressor(struct archive_read *a, struct private_data *);
66 #endif
67
68 /* These two functions are defined even if we lack zlib.  See below. */
69 static int      bid(const void *, size_t);
70 static int      init(struct archive_read *, const void *, size_t);
71
72 int
73 archive_read_support_compression_gzip(struct archive *_a)
74 {
75         struct archive_read *a = (struct archive_read *)_a;
76         if(__archive_read_register_compression(a, bid, init) != NULL)
77                 return (ARCHIVE_OK);
78         return (ARCHIVE_FATAL);
79 }
80
81 /*
82  * Test whether we can handle this data.
83  *
84  * This logic returns zero if any part of the signature fails.  It
85  * also tries to Do The Right Thing if a very short buffer prevents us
86  * from verifying as much as we would like.
87  */
88 static int
89 bid(const void *buff, size_t len)
90 {
91         const unsigned char *buffer;
92         int bits_checked;
93
94         if (len < 1)
95                 return (0);
96
97         buffer = (const unsigned char *)buff;
98         bits_checked = 0;
99         if (buffer[0] != 037)   /* Verify first ID byte. */
100                 return (0);
101         bits_checked += 8;
102         if (len < 2)
103                 return (bits_checked);
104
105         if (buffer[1] != 0213)  /* Verify second ID byte. */
106                 return (0);
107         bits_checked += 8;
108         if (len < 3)
109                 return (bits_checked);
110
111         if (buffer[2] != 8)     /* Compression must be 'deflate'. */
112                 return (0);
113         bits_checked += 8;
114         if (len < 4)
115                 return (bits_checked);
116
117         if ((buffer[3] & 0xE0)!= 0)     /* No reserved flags set. */
118                 return (0);
119         bits_checked += 3;
120         if (len < 5)
121                 return (bits_checked);
122
123         /*
124          * TODO: Verify more; in particular, gzip has an optional
125          * header CRC, which would give us 16 more verified bits.  We
126          * may also be able to verify certain constraints on other
127          * fields.
128          */
129
130         return (bits_checked);
131 }
132
133
134 #ifndef HAVE_ZLIB_H
135
136 /*
137  * If we don't have zlib on this system, we can't actually do the
138  * decompression.  We can, however, still detect gzip-compressed
139  * archives and emit a useful message.
140  */
141 static int
142 init(struct archive_read *a, const void *buff, size_t n)
143 {
144         (void)a;        /* UNUSED */
145         (void)buff;     /* UNUSED */
146         (void)n;        /* UNUSED */
147
148         archive_set_error(a, -1,
149             "This version of libarchive was compiled without gzip support");
150         return (ARCHIVE_FATAL);
151 }
152
153
154 #else
155
156 /*
157  * Setup the callbacks.
158  */
159 static int
160 init(struct archive_read *a, const void *buff, size_t n)
161 {
162         struct private_data *state;
163         int ret;
164
165         a->archive.compression_code = ARCHIVE_COMPRESSION_GZIP;
166         a->archive.compression_name = "gzip";
167
168         state = (struct private_data *)malloc(sizeof(*state));
169         if (state == NULL) {
170                 archive_set_error(&a->archive, ENOMEM,
171                     "Can't allocate data for %s decompression",
172                     a->archive.compression_name);
173                 return (ARCHIVE_FATAL);
174         }
175         memset(state, 0, sizeof(*state));
176
177         state->crc = crc32(0L, NULL, 0);
178         state->header_done = 0; /* We've not yet begun to parse header... */
179
180         state->uncompressed_buffer_size = 64 * 1024;
181         state->uncompressed_buffer = (unsigned char *)malloc(state->uncompressed_buffer_size);
182         state->stream.next_out = state->uncompressed_buffer;
183         state->read_next = state->uncompressed_buffer;
184         state->stream.avail_out = state->uncompressed_buffer_size;
185
186         if (state->uncompressed_buffer == NULL) {
187                 archive_set_error(&a->archive, ENOMEM,
188                     "Can't allocate %s decompression buffers",
189                     a->archive.compression_name);
190                 free(state);
191                 return (ARCHIVE_FATAL);
192         }
193
194         /*
195          * A bug in zlib.h: stream.next_in should be marked 'const'
196          * but isn't (the library never alters data through the
197          * next_in pointer, only reads it).  The result: this ugly
198          * cast to remove 'const'.
199          */
200         state->stream.next_in = (Bytef *)(uintptr_t)(const void *)buff;
201         state->stream.avail_in = n;
202
203         a->decompressor->read_ahead = read_ahead;
204         a->decompressor->consume = read_consume;
205         a->decompressor->skip = NULL; /* not supported */
206         a->decompressor->finish = finish;
207
208         /*
209          * TODO: Do I need to parse the gzip header before calling
210          * inflateInit2()?  In particular, one of the header bytes
211          * marks "best compression" or "fastest", which may be
212          * appropriate for setting the second parameter here.
213          * However, I think the only penalty for not setting it
214          * correctly is wasted memory.  If this is necessary, it
215          * should probably go into drive_decompressor() below.
216          */
217
218         /* Initialize compression library. */
219         ret = inflateInit2(&(state->stream),
220             -15 /* Don't check for zlib header */);
221         if (ret == Z_OK) {
222                 a->decompressor->data = state;
223                 return (ARCHIVE_OK);
224         }
225
226         /* Library setup failed: Clean up. */
227         archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
228             "Internal error initializing %s library",
229             a->archive.compression_name);
230         free(state->uncompressed_buffer);
231         free(state);
232
233         /* Override the error message if we know what really went wrong. */
234         switch (ret) {
235         case Z_STREAM_ERROR:
236                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
237                     "Internal error initializing compression library: "
238                     "invalid setup parameter");
239                 break;
240         case Z_MEM_ERROR:
241                 archive_set_error(&a->archive, ENOMEM,
242                     "Internal error initializing compression library: "
243                     "out of memory");
244                 break;
245         case Z_VERSION_ERROR:
246                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
247                     "Internal error initializing compression library: "
248                     "invalid library version");
249                 break;
250         }
251
252         return (ARCHIVE_FATAL);
253 }
254
255 /*
256  * Return a block of data from the decompression buffer.  Decompress more
257  * as necessary.
258  */
259 static ssize_t
260 read_ahead(struct archive_read *a, const void **p, size_t min)
261 {
262         struct private_data *state;
263         size_t read_avail, was_avail;
264         int ret;
265
266         state = (struct private_data *)a->decompressor->data;
267         if (!a->client_reader) {
268                 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
269                     "No read callback is registered?  "
270                     "This is probably an internal programming error.");
271                 return (ARCHIVE_FATAL);
272         }
273
274         read_avail = state->stream.next_out - state->read_next;
275
276         if (read_avail + state->stream.avail_out < min) {
277                 memmove(state->uncompressed_buffer, state->read_next,
278                     read_avail);
279                 state->read_next = state->uncompressed_buffer;
280                 state->stream.next_out = state->read_next + read_avail;
281                 state->stream.avail_out
282                     = state->uncompressed_buffer_size - read_avail;
283         }
284
285         while (read_avail < min &&              /* Haven't satisfied min. */
286             read_avail < state->uncompressed_buffer_size) { /* !full */
287                 was_avail = read_avail;
288                 if ((ret = drive_decompressor(a, state)) != ARCHIVE_OK)
289                         return (ret);
290                 read_avail = state->stream.next_out - state->read_next;
291                 if (was_avail == read_avail) /* No progress? */
292                         break;
293         }
294
295         *p = state->read_next;
296         return (read_avail);
297 }
298
299 /*
300  * Mark a previously-returned block of data as read.
301  */
302 static ssize_t
303 read_consume(struct archive_read *a, size_t n)
304 {
305         struct private_data *state;
306
307         state = (struct private_data *)a->decompressor->data;
308         a->archive.file_position += n;
309         state->read_next += n;
310         if (state->read_next > state->stream.next_out)
311                 __archive_errx(1, "Request to consume too many "
312                     "bytes from gzip decompressor");
313         return (n);
314 }
315
316 /*
317  * Clean up the decompressor.
318  */
319 static int
320 finish(struct archive_read *a)
321 {
322         struct private_data *state;
323         int ret;
324
325         state = (struct private_data *)a->decompressor->data;
326         ret = ARCHIVE_OK;
327         switch (inflateEnd(&(state->stream))) {
328         case Z_OK:
329                 break;
330         default:
331                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
332                     "Failed to clean up %s compressor",
333                     a->archive.compression_name);
334                 ret = ARCHIVE_FATAL;
335         }
336
337         free(state->uncompressed_buffer);
338         free(state);
339
340         a->decompressor->data = NULL;
341         return (ret);
342 }
343
344 /*
345  * Utility function to pull data through decompressor, reading input
346  * blocks as necessary.
347  */
348 static int
349 drive_decompressor(struct archive_read *a, struct private_data *state)
350 {
351         ssize_t ret;
352         size_t decompressed, total_decompressed;
353         int count, flags, header_state;
354         unsigned char *output;
355         unsigned char b;
356         const void *read_buf;
357
358         flags = 0;
359         count = 0;
360         header_state = 0;
361         total_decompressed = 0;
362         for (;;) {
363                 if (state->stream.avail_in == 0) {
364                         read_buf = state->stream.next_in;
365                         ret = (a->client_reader)(&a->archive, a->client_data,
366                             &read_buf);
367                         state->stream.next_in = (unsigned char *)(uintptr_t)read_buf;
368                         if (ret < 0) {
369                                 /*
370                                  * TODO: Find a better way to handle
371                                  * this read failure.
372                                  */
373                                 goto fatal;
374                         }
375                         if (ret == 0  &&  total_decompressed == 0) {
376                                 archive_set_error(&a->archive, EIO,
377                                     "Premature end of %s compressed data",
378                                     a->archive.compression_name);
379                                 return (ARCHIVE_FATAL);
380                         }
381                         a->archive.raw_position += ret;
382                         state->stream.avail_in = ret;
383                 }
384
385                 if (!state->header_done) {
386                         /*
387                          * If still parsing the header, interpret the
388                          * next byte.
389                          */
390                         b = *(state->stream.next_in++);
391                         state->stream.avail_in--;
392
393                         /*
394                          * Yes, this is somewhat crude, but it works,
395                          * GZip format isn't likely to change anytime
396                          * in the near future, and header parsing is
397                          * certainly not a performance issue, so
398                          * there's little point in making this more
399                          * elegant.  Of course, if you see an easy way
400                          * to make this more elegant, please let me
401                          * know.. ;-)
402                          */
403                         switch (header_state) {
404                         case 0: /* First byte of signature. */
405                                 if (b != 037)
406                                         goto fatal;
407                                 header_state = 1;
408                                 break;
409                         case 1: /* Second byte of signature. */
410                                 if (b != 0213)
411                                         goto fatal;
412                                 header_state = 2;
413                                 break;
414                         case 2: /* Compression type must be 8. */
415                                 if (b != 8)
416                                         goto fatal;
417                                 header_state = 3;
418                                 break;
419                         case 3: /* GZip flags. */
420                                 flags = b;
421                                 header_state = 4;
422                                 break;
423                         case 4: case 5: case 6: case 7: /* Mod time. */
424                                 header_state++;
425                                 break;
426                         case 8: /* Deflate flags. */
427                                 header_state = 9;
428                                 break;
429                         case 9: /* OS. */
430                                 header_state = 10;
431                                 break;
432                         case 10: /* Optional Extra: First byte of Length. */
433                                 if ((flags & 4)) {
434                                         count = 255 & (int)b;
435                                         header_state = 11;
436                                         break;
437                                 }
438                                 /*
439                                  * Fall through if there is no
440                                  * Optional Extra field.
441                                  */
442                         case 11: /* Optional Extra: Second byte of Length. */
443                                 if ((flags & 4)) {
444                                         count = (0xff00 & ((int)b << 8)) | count;
445                                         header_state = 12;
446                                         break;
447                                 }
448                                 /*
449                                  * Fall through if there is no
450                                  * Optional Extra field.
451                                  */
452                         case 12: /* Optional Extra Field: counted length. */
453                                 if ((flags & 4)) {
454                                         --count;
455                                         if (count == 0) header_state = 13;
456                                         else header_state = 12;
457                                         break;
458                                 }
459                                 /*
460                                  * Fall through if there is no
461                                  * Optional Extra field.
462                                  */
463                         case 13: /* Optional Original Filename. */
464                                 if ((flags & 8)) {
465                                         if (b == 0) header_state = 14;
466                                         else header_state = 13;
467                                         break;
468                                 }
469                                 /*
470                                  * Fall through if no Optional
471                                  * Original Filename.
472                                  */
473                         case 14: /* Optional Comment. */
474                                 if ((flags & 16)) {
475                                         if (b == 0) header_state = 15;
476                                         else header_state = 14;
477                                         break;
478                                 }
479                                 /* Fall through if no Optional Comment. */
480                         case 15: /* Optional Header CRC: First byte. */
481                                 if ((flags & 2)) {
482                                         header_state = 16;
483                                         break;
484                                 }
485                                 /* Fall through if no Optional Header CRC. */
486                         case 16: /* Optional Header CRC: Second byte. */
487                                 if ((flags & 2)) {
488                                         header_state = 17;
489                                         break;
490                                 }
491                                 /* Fall through if no Optional Header CRC. */
492                         case 17: /* First byte of compressed data. */
493                                 state->header_done = 1; /* done with header */
494                                 state->stream.avail_in++;
495                                 state->stream.next_in--;
496                         }
497
498                         /*
499                          * TODO: Consider moving the inflateInit2 call
500                          * here so it can include the compression type
501                          * from the header?
502                          */
503                 } else {
504                         output = state->stream.next_out;
505
506                         /* Decompress some data. */
507                         ret = inflate(&(state->stream), 0);
508                         decompressed = state->stream.next_out - output;
509
510                         /* Accumulate the CRC of the uncompressed data. */
511                         state->crc = crc32(state->crc, output, decompressed);
512
513                         /* Accumulate the total bytes of output. */
514                         state->total_out += decompressed;
515                         total_decompressed += decompressed;
516
517                         switch (ret) {
518                         case Z_OK: /* Decompressor made some progress. */
519                                 if (decompressed > 0)
520                                         return (ARCHIVE_OK);
521                                 break;
522                         case Z_STREAM_END: /* Found end of stream. */
523                                 /*
524                                  * TODO: Verify gzip trailer
525                                  * (uncompressed length and CRC).
526                                  */
527                                 return (ARCHIVE_OK);
528                         default:
529                                 /* Any other return value is an error. */
530                                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
531                                     "gzip decompression failed (%s)",
532                                     state->stream.msg);
533                                 goto fatal;
534                         }
535                 }
536         }
537         return (ARCHIVE_OK);
538
539         /* Return a fatal error. */
540 fatal:
541         archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
542             "%s decompression failed", a->archive.compression_name);
543         return (ARCHIVE_FATAL);
544 }
545
546 #endif /* HAVE_ZLIB_H */