2 * Copyright (c) 2003-2004 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * This code borrows heavily from "compress" source code, which is
29 * protected by the following copyright. (Clause 3 dropped by request
34 * Copyright (c) 1985, 1986, 1992, 1993
35 * The Regents of the University of California. All rights reserved.
37 * This code is derived from software contributed to Berkeley by
38 * Diomidis Spinellis and James A. Woods, derived from original
39 * work by Spencer Thomas and Joseph Orost.
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67 #include "archive_platform.h"
68 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_compression_compress.c,v 1.5 2006/07/30 00:29:00 kientzle Exp $");
76 #include "archive_private.h"
79 * Because LZW decompression is pretty simple, I've just implemented
80 * the whole decompressor here (cribbing from "compress" source code,
81 * of course), rather than relying on an external library. I have
82 * made an effort to clarify and simplify the algorithm, so the
83 * names and structure here don't exactly match those used by compress.
87 /* Input variables. */
88 const unsigned char *next_in;
92 size_t bytes_in_section;
94 /* Output variables. */
95 size_t uncompressed_buffer_size;
96 void *uncompressed_buffer;
97 unsigned char *read_next; /* Data for client. */
98 unsigned char *next_out; /* Where to write new data. */
99 size_t avail_out; /* Space at end of buffer. */
101 /* Decompression status variables. */
103 int end_of_stream; /* EOF status. */
104 int maxcode; /* Largest code. */
105 int maxcode_bits; /* Length of largest code. */
106 int section_end_code; /* When to increase bits. */
107 int bits; /* Current code length. */
108 int oldcode; /* Previous code. */
109 int finbyte; /* Last byte of prev code. */
112 int free_ent; /* Next dictionary entry. */
113 unsigned char suffix[65536];
114 uint16_t prefix[65536];
117 * Scratch area for expanding dictionary entries. Note:
118 * "worst" case here comes from compressing /dev/zero: the
119 * last code in the dictionary will code a sequence of
120 * 65536-256 zero bytes. Thus, we need stack space to expand
121 * a 65280-byte dictionary entry. (Of course, 32640:1
122 * compression could also be considered the "best" case. ;-)
124 unsigned char *stackp;
125 unsigned char stack[65300];
128 static int bid(const void *, size_t);
129 static int finish(struct archive *);
130 static int init(struct archive *, const void *, size_t);
131 static ssize_t read_ahead(struct archive *, const void **, size_t);
132 static ssize_t read_consume(struct archive *, size_t);
133 static int getbits(struct archive *, struct private_data *, int n);
134 static int next_code(struct archive *a, struct private_data *state);
137 archive_read_support_compression_compress(struct archive *a)
139 return (__archive_read_register_compression(a, bid, init));
143 * Test whether we can handle this data.
145 * This logic returns zero if any part of the signature fails. It
146 * also tries to Do The Right Thing if a very short buffer prevents us
147 * from verifying as much as we would like.
150 bid(const void *buff, size_t len)
152 const unsigned char *buffer;
160 if (buffer[0] != 037) /* Verify first ID byte. */
164 return (bits_checked);
166 if (buffer[1] != 0235) /* Verify second ID byte. */
170 return (bits_checked);
176 return (bits_checked);
180 * Setup the callbacks.
183 init(struct archive *a, const void *buff, size_t n)
185 struct private_data *state;
188 a->compression_code = ARCHIVE_COMPRESSION_COMPRESS;
189 a->compression_name = "compress (.Z)";
191 a->compression_read_ahead = read_ahead;
192 a->compression_read_consume = read_consume;
193 a->compression_skip = NULL; /* not supported */
194 a->compression_finish = finish;
196 state = malloc(sizeof(*state));
198 archive_set_error(a, ENOMEM,
199 "Can't allocate data for %s decompression",
200 a->compression_name);
201 return (ARCHIVE_FATAL);
203 memset(state, 0, sizeof(*state));
204 a->compression_data = state;
206 state->uncompressed_buffer_size = 64 * 1024;
207 state->uncompressed_buffer = malloc(state->uncompressed_buffer_size);
209 if (state->uncompressed_buffer == NULL) {
210 archive_set_error(a, ENOMEM,
211 "Can't allocate %s decompression buffers",
212 a->compression_name);
216 state->next_in = buff;
218 state->read_next = state->next_out = state->uncompressed_buffer;
219 state->avail_out = state->uncompressed_buffer_size;
221 code = getbits(a, state, 8);
222 if (code != 037) /* This should be impossible. */
225 code = getbits(a, state, 8);
227 /* This can happen if the library is receiving 1-byte
228 * blocks and gzip and compress are both enabled.
229 * You can't distinguish gzip and compress only from
231 archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
232 "Compress signature did not match.");
236 code = getbits(a, state, 8);
237 state->maxcode_bits = code & 0x1f;
238 state->maxcode = (1 << state->maxcode_bits);
239 state->use_reset_code = code & 0x80;
241 /* Initialize decompressor. */
242 state->free_ent = 256;
243 state->stackp = state->stack;
244 if (state->use_reset_code)
247 state->section_end_code = (1<<state->bits) - 1;
249 for (code = 255; code >= 0; code--) {
250 state->prefix[code] = 0;
251 state->suffix[code] = code;
258 return (ARCHIVE_FATAL);
262 * Return a block of data from the decompression buffer. Decompress more
266 read_ahead(struct archive *a, const void **p, size_t min)
268 struct private_data *state;
269 int read_avail, was_avail, ret;
271 state = a->compression_data;
273 if (!a->client_reader) {
274 archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER,
275 "No read callback is registered? "
276 "This is probably an internal programming error.");
277 return (ARCHIVE_FATAL);
280 read_avail = state->next_out - state->read_next;
282 if (read_avail < (int)min && state->end_of_stream) {
283 if (state->end_of_stream == ARCHIVE_EOF)
289 if (read_avail < (int)min) {
290 memmove(state->uncompressed_buffer, state->read_next,
292 state->read_next = state->uncompressed_buffer;
293 state->next_out = state->read_next + read_avail;
295 = state->uncompressed_buffer_size - read_avail;
297 while (read_avail < (int)state->uncompressed_buffer_size
298 && !state->end_of_stream) {
299 if (state->stackp > state->stack) {
300 *state->next_out++ = *--state->stackp;
304 ret = next_code(a, state);
305 if (ret == ARCHIVE_EOF)
306 state->end_of_stream = ret;
307 else if (ret != ARCHIVE_OK)
313 *p = state->read_next;
318 * Mark a previously-returned block of data as read.
321 read_consume(struct archive *a, size_t n)
323 struct private_data *state;
325 state = a->compression_data;
326 a->file_position += n;
327 state->read_next += n;
328 if (state->read_next > state->next_out)
329 __archive_errx(1, "Request to consume too many "
330 "bytes from compress decompressor");
335 * Clean up the decompressor.
338 finish(struct archive *a)
340 struct private_data *state;
341 int ret = ARCHIVE_OK;
343 state = a->compression_data;
346 if (state->uncompressed_buffer != NULL)
347 free(state->uncompressed_buffer);
351 a->compression_data = NULL;
352 if (a->client_closer != NULL)
353 ret = (a->client_closer)(a, a->client_data);
359 * Process the next code and fill the stack with the expansion
360 * of the code. Returns ARCHIVE_FATAL if there is a fatal I/O or
361 * format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise.
364 next_code(struct archive *a, struct private_data *state)
368 static int debug_buff[1024];
369 static unsigned debug_index;
371 code = newcode = getbits(a, state, state->bits);
375 debug_buff[debug_index++] = code;
376 if (debug_index >= sizeof(debug_buff)/sizeof(debug_buff[0]))
379 /* If it's a reset code, reset the dictionary. */
380 if ((code == 256) && state->use_reset_code) {
382 * The original 'compress' implementation blocked its
383 * I/O in a manner that resulted in junk bytes being
384 * inserted after every reset. The next section skips
385 * this junk. (Yes, the number of *bytes* to skip is
386 * a function of the current *bit* length.)
388 int skip_bytes = state->bits -
389 (state->bytes_in_section % state->bits);
390 skip_bytes %= state->bits;
391 state->bits_avail = 0; /* Discard rest of this byte. */
392 while (skip_bytes-- > 0) {
393 code = getbits(a, state, 8);
397 /* Now, actually do the reset. */
398 state->bytes_in_section = 0;
400 state->section_end_code = (1 << state->bits) - 1;
401 state->free_ent = 257;
403 return (next_code(a, state));
406 if (code > state->free_ent) {
407 /* An invalid code is a fatal error. */
408 archive_set_error(a, -1, "Invalid compressed data: code %d is larger than max code %d (input address %X)", code, state->free_ent, state->next_in);
409 return (ARCHIVE_FATAL);
412 /* Special case for KwKwK string. */
413 if (code >= state->free_ent) {
414 *state->stackp++ = state->finbyte;
415 code = state->oldcode;
418 /* Generate output characters in reverse order. */
419 while (code >= 256) {
420 *state->stackp++ = state->suffix[code];
421 code = state->prefix[code];
423 *state->stackp++ = state->finbyte = code;
425 /* Generate the new entry. */
426 code = state->free_ent;
427 if (code < state->maxcode && state->oldcode >= 0) {
428 state->prefix[code] = state->oldcode;
429 state->suffix[code] = state->finbyte;
432 if (state->free_ent > state->section_end_code) {
434 state->bytes_in_section = 0;
435 if (state->bits == state->maxcode_bits)
436 state->section_end_code = state->maxcode;
438 state->section_end_code = (1 << state->bits) - 1;
441 /* Remember previous code. */
442 state->oldcode = newcode;
447 * Return next 'n' bits from stream.
449 * -1 indicates end of available data.
452 getbits(struct archive *a, struct private_data *state, int n)
455 static const int mask[] = {
456 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff,
457 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
461 while (state->bits_avail < n) {
462 if (state->avail_in <= 0) {
463 ret = (a->client_reader)(a, a->client_data,
464 (const void **)&state->next_in);
466 return (ARCHIVE_FATAL);
468 return (ARCHIVE_EOF);
469 a->raw_position += ret;
470 state->avail_in = ret;
472 state->bit_buffer |= *state->next_in++ << state->bits_avail;
474 state->bits_avail += 8;
475 state->bytes_in_section++;
478 code = state->bit_buffer;
479 state->bit_buffer >>= n;
480 state->bits_avail -= n;
482 return (code & mask[n]);