1 /* zlib.c --- interface to the zlib compression library
2 Ian Lance Taylor <ian@cygnus.com>
4 This file is part of GNU CVS.
6 GNU CVS is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details. */
16 /* The routines in this file are the interface between the CVS
17 client/server support and the zlib compression library. */
22 #if defined (SERVER_SUPPORT) || defined (CLIENT_SUPPORT)
30 /* OS/2 doesn't have EIO. FIXME: this whole notion of turning
31 a different error into EIO strikes me as pretty dubious. */
36 /* The compression interface is built upon the buffer data structure.
37 We provide a buffer type which compresses or decompresses the data
38 which passes through it. An input buffer decompresses the data
39 read from an underlying buffer, and an output buffer compresses the
40 data before writing it to an underlying buffer. */
42 /* This structure is the closure field of the buffer. */
44 struct compress_buffer
46 /* The underlying buffer. */
48 /* The compression information. */
52 static void compress_error (int, int, z_stream *, const char *);
53 static int compress_buffer_input (void *, char *, int, int, int *);
54 static int compress_buffer_output (void *, const char *, int, int *);
55 static int compress_buffer_flush (void *);
56 static int compress_buffer_block (void *, int);
57 static int compress_buffer_shutdown_input (struct buffer *);
58 static int compress_buffer_shutdown_output (struct buffer *);
60 /* Report an error from one of the zlib functions. */
63 compress_error (int status, int zstatus, z_stream *zstr, const char *msg)
74 sprintf (buf, "error %d", zstatus);
79 zstatus == Z_ERRNO ? hold_errno : 0,
83 /* Create a compression buffer. */
86 compress_buffer_initialize (struct buffer *buf, int input, int level, void (*memory) (struct buffer *))
88 struct compress_buffer *n;
91 n = (struct compress_buffer *) xmalloc (sizeof *n);
92 memset (n, 0, sizeof *n);
97 zstatus = inflateInit (&n->zstr);
99 zstatus = deflateInit (&n->zstr, level);
101 compress_error (1, zstatus, &n->zstr, "compression initialization");
103 /* There may already be data buffered on BUF. For an output
104 buffer, this is OK, because these routines will just use the
105 buffer routines to append data to the (uncompressed) data
106 already on BUF. An input buffer expects to handle a single
107 buffer_data of buffered input to be uncompressed, so that is OK
108 provided there is only one buffer. At present that is all
109 there ever will be; if this changes, compress_buffer_input must
110 be modified to handle multiple input buffers. */
111 assert (! input || buf->data == NULL || buf->data->next == NULL);
113 return buf_initialize (input ? compress_buffer_input : NULL,
114 input ? NULL : compress_buffer_output,
115 input ? NULL : compress_buffer_flush,
116 compress_buffer_block,
118 ? compress_buffer_shutdown_input
119 : compress_buffer_shutdown_output),
124 /* Input data from a compression buffer. */
127 compress_buffer_input (void *closure, char *data, int need, int size, int *got)
129 struct compress_buffer *cb = (struct compress_buffer *) closure;
130 struct buffer_data *bd;
132 if (cb->buf->input == NULL)
135 /* We use a single buffer_data structure to buffer up data which
136 the z_stream structure won't use yet. We can safely store this
137 on cb->buf->data, because we never call the buffer routines on
138 cb->buf; we only call the buffer input routine, since that
139 gives us the semantics we want. As noted in
140 compress_buffer_initialize, the buffer_data structure may
141 already exist, and hold data which was already read and
142 buffered before the decompression began. */
146 bd = ((struct buffer_data *) xmalloc (sizeof (struct buffer_data)));
149 bd->text = (char *) xmalloc (BUFFER_DATA_SIZE);
150 if (bd->text == NULL)
160 cb->zstr.avail_out = size;
161 cb->zstr.next_out = (Bytef *) data;
165 int zstatus, sofar, status, nread;
167 /* First try to inflate any data we already have buffered up.
168 This is useful even if we don't have any buffered data,
169 because there may be data buffered inside the z_stream
172 cb->zstr.avail_in = bd->size;
173 cb->zstr.next_in = (Bytef *) bd->bufp;
177 zstatus = inflate (&cb->zstr, Z_NO_FLUSH);
178 if (zstatus == Z_STREAM_END)
180 if (zstatus != Z_OK && zstatus != Z_BUF_ERROR)
182 compress_error (0, zstatus, &cb->zstr, "inflate");
185 } while (cb->zstr.avail_in > 0
186 && cb->zstr.avail_out > 0);
188 bd->size = cb->zstr.avail_in;
189 bd->bufp = (char *) cb->zstr.next_in;
191 if (zstatus == Z_STREAM_END)
194 /* If we have obtained NEED bytes, then return, unless NEED is
195 zero and we haven't obtained anything at all. If NEED is
196 zero, we will keep reading from the underlying buffer until
197 we either can't read anything, or we have managed to
198 inflate at least one byte. */
199 sofar = size - cb->zstr.avail_out;
200 if (sofar > 0 && sofar >= need)
203 /* All our buffered data should have been processed at this
205 assert (bd->size == 0);
207 /* This will work well in the server, because this call will
208 do an unblocked read and fetch all the available data. In
209 the client, this will read a single byte from the stdio
210 stream, which will cause us to call inflate once per byte.
211 It would be more efficient if we could make a call which
212 would fetch all the available bytes, and at least one byte. */
214 status = (*cb->buf->input) (cb->buf->closure, bd->text,
216 BUFFER_DATA_SIZE, &nread);
220 /* If we didn't read anything, then presumably the buffer is
221 in nonblocking mode, and we should just get out now with
222 whatever we've inflated. */
233 *got = size - cb->zstr.avail_out;
238 /* Output data to a compression buffer. */
241 compress_buffer_output (void *closure, const char *data, int have, int *wrote)
243 struct compress_buffer *cb = (struct compress_buffer *) closure;
245 cb->zstr.avail_in = have;
246 cb->zstr.next_in = (unsigned char *) data;
248 while (cb->zstr.avail_in > 0)
250 char buffer[BUFFER_DATA_SIZE];
253 cb->zstr.avail_out = BUFFER_DATA_SIZE;
254 cb->zstr.next_out = (unsigned char *) buffer;
256 zstatus = deflate (&cb->zstr, Z_NO_FLUSH);
259 compress_error (0, zstatus, &cb->zstr, "deflate");
263 if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
264 buf_output (cb->buf, buffer,
265 BUFFER_DATA_SIZE - cb->zstr.avail_out);
270 /* We will only be here because buf_send_output was called on the
271 compression buffer. That means that we should now call
272 buf_send_output on the underlying buffer. */
273 return buf_send_output (cb->buf);
276 /* Flush a compression buffer. */
279 compress_buffer_flush (void *closure)
281 struct compress_buffer *cb = (struct compress_buffer *) closure;
283 cb->zstr.avail_in = 0;
284 cb->zstr.next_in = NULL;
288 char buffer[BUFFER_DATA_SIZE];
291 cb->zstr.avail_out = BUFFER_DATA_SIZE;
292 cb->zstr.next_out = (unsigned char *) buffer;
294 zstatus = deflate (&cb->zstr, Z_SYNC_FLUSH);
296 /* The deflate function will return Z_BUF_ERROR if it can't do
297 anything, which in this case means that all data has been
299 if (zstatus == Z_BUF_ERROR)
304 compress_error (0, zstatus, &cb->zstr, "deflate flush");
308 if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
309 buf_output (cb->buf, buffer,
310 BUFFER_DATA_SIZE - cb->zstr.avail_out);
312 /* If the deflate function did not fill the output buffer,
313 then all data has been flushed. */
314 if (cb->zstr.avail_out > 0)
318 /* Now flush the underlying buffer. Note that if the original
319 call to buf_flush passed 1 for the BLOCK argument, then the
320 buffer will already have been set into blocking mode, so we
321 should always pass 0 here. */
322 return buf_flush (cb->buf, 0);
325 /* The block routine for a compression buffer. */
328 compress_buffer_block (void *closure, int block)
330 struct compress_buffer *cb = (struct compress_buffer *) closure;
333 return set_block (cb->buf);
335 return set_nonblock (cb->buf);
338 /* Shut down an input buffer. */
341 compress_buffer_shutdown_input (struct buffer *buf)
343 struct compress_buffer *cb = (struct compress_buffer *) buf->closure;
346 /* Pick up any trailing data, such as the checksum. */
352 status = compress_buffer_input (cb, buf, 0, sizeof buf, &nread);
359 zstatus = inflateEnd (&cb->zstr);
362 compress_error (0, zstatus, &cb->zstr, "inflateEnd");
366 return buf_shutdown (cb->buf);
369 /* Shut down an output buffer. */
372 compress_buffer_shutdown_output (struct buffer *buf)
374 struct compress_buffer *cb = (struct compress_buffer *) buf->closure;
379 char buffer[BUFFER_DATA_SIZE];
381 cb->zstr.avail_out = BUFFER_DATA_SIZE;
382 cb->zstr.next_out = (unsigned char *) buffer;
384 zstatus = deflate (&cb->zstr, Z_FINISH);
385 if (zstatus != Z_OK && zstatus != Z_STREAM_END)
387 compress_error (0, zstatus, &cb->zstr, "deflate finish");
391 if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
392 buf_output (cb->buf, buffer,
393 BUFFER_DATA_SIZE - cb->zstr.avail_out);
394 } while (zstatus != Z_STREAM_END);
396 zstatus = deflateEnd (&cb->zstr);
399 compress_error (0, zstatus, &cb->zstr, "deflateEnd");
403 status = buf_flush (cb->buf, 1);
407 return buf_shutdown (cb->buf);
412 /* Here is our librarified gzip implementation. It is very minimal
413 but attempts to be RFC1952 compliant. */
415 /* GZIP ID byte values */
419 /* Compression methods */
420 #define GZIP_CDEFLATE 8
425 #define GZIP_FEXTRA 4
427 #define GZIP_FCOMMENT 16
429 /* BUF should contain SIZE bytes of gzipped data (RFC1952/RFC1951).
430 We are to uncompress the data and write the result to the file
431 descriptor FD. If something goes wrong, give a nonfatal error message
432 mentioning FULLNAME as the name of the file for FD. Return 1 if
433 it is an error we can't recover from. */
436 gunzip_and_write (int fd, char *fullname, unsigned char *buf, size_t size)
441 unsigned char outbuf[32768];
446 error (0, 0, "gzipped data too small - lacks complete header");
449 if (buf[0] != GZIP_ID1 || buf[1] != GZIP_ID2)
451 error (0, 0, "gzipped data does not start with gzip identification");
454 if (buf[2] != GZIP_CDEFLATE)
456 error (0, 0, "only the deflate compression method is supported");
460 /* Skip over the fixed header, and then skip any of the variable-length
461 fields. As we skip each field, we keep pos <= size. The checks
462 on positions and lengths are really checks for malformed or
463 incomplete gzip data. */
465 if (buf[3] & GZIP_FEXTRA)
469 error (0, 0, "%s lacks proper gzip XLEN field", fullname);
472 pos += buf[pos] + (buf[pos + 1] << 8) + 2;
475 error (0, 0, "%s lacks proper gzip \"extra field\"", fullname);
480 if (buf[3] & GZIP_FNAME)
482 unsigned char *p = memchr(buf + pos, '\0', size - pos);
485 error (0, 0, "%s has bad gzip filename field", fullname);
490 if (buf[3] & GZIP_FCOMMENT)
492 unsigned char *p = memchr(buf + pos, '\0', size - pos);
495 error (0, 0, "%s has bad gzip comment field", fullname);
500 if (buf[3] & GZIP_FHCRC)
505 error (0, 0, "%s has bad gzip CRC16 field", fullname);
510 /* There could be no data to decompress - check and short circuit. */
513 error (0, 0, "gzip data incomplete for %s (no data)", fullname);
517 memset (&zstr, 0, sizeof zstr);
518 /* Passing a negative argument tells zlib not to look for a zlib
519 (RFC1950) header. This is an undocumented feature; I suppose if
520 we wanted to be anal we could synthesize a header instead,
522 zstatus = inflateInit2 (&zstr, -15);
525 compress_error (1, zstatus, &zstr, fullname);
527 /* I don't see why we should have to include the 8 byte trailer in
528 avail_in. But I see that zlib/gzio.c does, and it seemed to fix
529 a fairly rare bug in which we'd get a Z_BUF_ERROR for no obvious
531 zstr.avail_in = size - pos;
532 zstr.next_in = buf + pos;
534 crc = crc32 (0, NULL, 0);
538 zstr.avail_out = sizeof (outbuf);
539 zstr.next_out = outbuf;
540 zstatus = inflate (&zstr, Z_NO_FLUSH);
541 if (zstatus != Z_STREAM_END && zstatus != Z_OK)
543 compress_error (0, zstatus, &zstr, fullname);
546 if (write (fd, outbuf, sizeof (outbuf) - zstr.avail_out) < 0)
548 error (0, errno, "writing decompressed file %s", fullname);
551 crc = crc32 (crc, outbuf, sizeof (outbuf) - zstr.avail_out);
552 } while (zstatus != Z_STREAM_END);
553 zstatus = inflateEnd (&zstr);
555 compress_error (0, zstatus, &zstr, fullname);
557 /* Check that there is still 8 trailer bytes remaining (CRC32
558 and ISIZE). Check total decomp. data, plus header len (pos)
559 against input buffer total size. */
560 pos += zstr.total_in;
563 error (0, 0, "gzip data incomplete for %s (no trailer)", fullname);
567 if (crc != ((unsigned long)buf[pos]
568 + ((unsigned long)buf[pos + 1] << 8)
569 + ((unsigned long)buf[pos + 2] << 16)
570 + ((unsigned long)buf[pos + 3] << 24)))
572 error (0, 0, "CRC error uncompressing %s", fullname);
576 if (zstr.total_out != ((unsigned long)buf[pos + 4]
577 + ((unsigned long)buf[pos + 5] << 8)
578 + ((unsigned long)buf[pos + 6] << 16)
579 + ((unsigned long)buf[pos + 7] << 24)))
581 error (0, 0, "invalid length uncompressing %s", fullname);
588 /* Read all of FD and put the gzipped data (RFC1952/RFC1951) into *BUF,
589 replacing previous contents of *BUF. *BUF is xmalloc'd and *SIZE is
590 its allocated size. Put the actual number of bytes of data in
591 *LEN. If something goes wrong, give a nonfatal error mentioning
592 FULLNAME as the name of the file for FD, and return 1 if we can't
593 recover from it). LEVEL is the compression level (1-9). */
596 read_and_gzip (int fd, const char *fullname, unsigned char **buf, size_t *size,
597 size_t *len, int level)
601 unsigned char inbuf[8192];
607 unsigned char *newbuf;
610 newbuf = xrealloc (*buf, *size);
613 error (0, 0, "out of memory");
618 (*buf)[0] = GZIP_ID1;
619 (*buf)[1] = GZIP_ID2;
620 (*buf)[2] = GZIP_CDEFLATE;
622 (*buf)[4] = (*buf)[5] = (*buf)[6] = (*buf)[7] = 0;
623 /* Could set this based on level, but why bother? */
627 memset (&zstr, 0, sizeof zstr);
628 zstatus = deflateInit2 (&zstr, level, Z_DEFLATED, -15, 8,
630 crc = crc32 (0, NULL, 0);
633 compress_error (0, zstatus, &zstr, fullname);
637 /* Adjust for 10-byte output header (filled in above) */
639 zstr.avail_out = *size - 10;
640 zstr.next_out = *buf + 10;
646 nread = read (fd, inbuf, sizeof inbuf);
649 error (0, errno, "cannot read %s", fullname);
655 crc = crc32 (crc, inbuf, nread);
656 zstr.next_in = inbuf;
657 zstr.avail_in = nread;
661 /* I don't see this documented anywhere, but deflate seems
662 to tend to dump core sometimes if we pass it Z_FINISH and
663 a small (e.g. 2147 byte) avail_out. So we insist on at
664 least 4096 bytes (that is what zlib/gzio.c uses). */
666 if (zstr.avail_out < 4096)
668 unsigned char *newbuf;
670 assert(zstr.avail_out + zstr.total_out == *size);
671 assert(zstr.next_out == *buf + zstr.total_out);
673 newbuf = xrealloc (*buf, *size);
676 error (0, 0, "out of memory");
680 zstr.next_out = *buf + zstr.total_out;
681 zstr.avail_out = *size - zstr.total_out;
682 assert(zstr.avail_out + zstr.total_out == *size);
683 assert(zstr.next_out == *buf + zstr.total_out);
686 zstatus = deflate (&zstr, finish ? Z_FINISH : 0);
687 if (zstatus == Z_STREAM_END)
689 else if (zstatus != Z_OK)
690 compress_error (0, zstatus, &zstr, fullname);
691 } while (zstr.avail_out == 0);
694 /* Need to add the CRC information (8 bytes)
695 to the end of the gzip'd output.
696 Ensure there is enough space in the output buffer
698 if (zstr.avail_out < 8)
700 unsigned char *newbuf;
702 assert(zstr.avail_out + zstr.total_out == *size);
703 assert(zstr.next_out == *buf + zstr.total_out);
704 *size += 8 - zstr.avail_out;
705 newbuf = realloc (*buf, *size);
708 error (0, 0, "out of memory");
712 zstr.next_out = *buf + zstr.total_out;
713 zstr.avail_out = *size - zstr.total_out;
714 assert(zstr.avail_out + zstr.total_out == *size);
715 assert(zstr.next_out == *buf + zstr.total_out);
717 *zstr.next_out++ = (unsigned char)(crc & 0xff);
718 *zstr.next_out++ = (unsigned char)((crc >> 8) & 0xff);
719 *zstr.next_out++ = (unsigned char)((crc >> 16) & 0xff);
720 *zstr.next_out++ = (unsigned char)((crc >> 24) & 0xff);
722 *zstr.next_out++ = (unsigned char)(zstr.total_in & 0xff);
723 *zstr.next_out++ = (unsigned char)((zstr.total_in >> 8) & 0xff);
724 *zstr.next_out++ = (unsigned char)((zstr.total_in >> 16) & 0xff);
725 *zstr.next_out++ = (unsigned char)((zstr.total_in >> 24) & 0xff);
729 assert(zstr.avail_out + zstr.total_out == *size);
730 assert(zstr.next_out == *buf + zstr.total_out);
732 *len = zstr.total_out;
734 zstatus = deflateEnd (&zstr);
736 compress_error (0, zstatus, &zstr, fullname);
740 #endif /* defined (SERVER_SUPPORT) || defined (CLIENT_SUPPORT) */