1 /* zlib.c --- interface to the zlib compression library
2 Ian Lance Taylor <ian@cygnus.com>
4 This file is part of GNU CVS.
6 GNU CVS is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details. */
16 /* The routines in this file are the interface between the CVS
17 client/server support and the zlib compression library. */
21 #include "pagealign_alloc.h"
23 #if defined (SERVER_SUPPORT) || defined (CLIENT_SUPPORT)
31 /* OS/2 doesn't have EIO. FIXME: this whole notion of turning
32 a different error into EIO strikes me as pretty dubious. */
37 /* The compression interface is built upon the buffer data structure.
38 We provide a buffer type which compresses or decompresses the data
39 which passes through it. An input buffer decompresses the data
40 read from an underlying buffer, and an output buffer compresses the
41 data before writing it to an underlying buffer. */
43 /* This structure is the closure field of the buffer. */
45 struct compress_buffer
47 /* The underlying buffer. */
49 /* The compression information. */
53 static void compress_error (int, int, z_stream *, const char *);
54 static int compress_buffer_input (void *, char *, size_t, size_t, size_t *);
55 static int compress_buffer_output (void *, const char *, size_t, size_t *);
56 static int compress_buffer_flush (void *);
57 static int compress_buffer_block (void *, bool);
58 static int compress_buffer_get_fd (void *);
59 static int compress_buffer_shutdown_input (struct buffer *);
60 static int compress_buffer_shutdown_output (struct buffer *);
62 /* Report an error from one of the zlib functions. */
65 compress_error (int status, int zstatus, z_stream *zstr, const char *msg)
76 sprintf (buf, "error %d", zstatus);
81 zstatus == Z_ERRNO ? hold_errno : 0,
87 /* Create a compression buffer. */
89 compress_buffer_initialize (struct buffer *buf, int input, int level,
90 void (*memory) (struct buffer *))
92 struct compress_buffer *n;
95 n = xmalloc (sizeof *n);
96 memset (n, 0, sizeof *n);
101 zstatus = inflateInit (&n->zstr);
103 zstatus = deflateInit (&n->zstr, level);
105 compress_error (1, zstatus, &n->zstr, "compression initialization");
107 /* There may already be data buffered on BUF. For an output
108 buffer, this is OK, because these routines will just use the
109 buffer routines to append data to the (uncompressed) data
110 already on BUF. An input buffer expects to handle a single
111 buffer_data of buffered input to be uncompressed, so that is OK
112 provided there is only one buffer. At present that is all
113 there ever will be; if this changes, compress_buffer_input must
114 be modified to handle multiple input buffers. */
115 assert (! input || buf->data == NULL || buf->data->next == NULL);
117 return buf_initialize (input ? compress_buffer_input : NULL,
118 input ? NULL : compress_buffer_output,
119 input ? NULL : compress_buffer_flush,
120 compress_buffer_block, compress_buffer_get_fd,
122 ? compress_buffer_shutdown_input
123 : compress_buffer_shutdown_output),
130 /* Input data from a compression buffer. */
132 compress_buffer_input (void *closure, char *data, size_t need, size_t size,
135 struct compress_buffer *cb = closure;
136 struct buffer_data *bd;
138 assert (cb->buf->input);
140 /* We use a single buffer_data structure to buffer up data which
141 the z_stream structure won't use yet. We can safely store this
142 on cb->buf->data, because we never call the buffer routines on
143 cb->buf; we only call the buffer input routine, since that
144 gives us the semantics we want. As noted in
145 compress_buffer_initialize, the buffer_data structure may
146 already exist, and hold data which was already read and
147 buffered before the decompression began. */
151 bd = xmalloc (sizeof (struct buffer_data));
154 bd->text = pagealign_xalloc (BUFFER_DATA_SIZE);
155 if (bd->text == NULL)
165 cb->zstr.avail_out = size;
166 cb->zstr.next_out = (Bytef *) data;
170 int zstatus, sofar, status;
173 /* First try to inflate any data we already have buffered up.
174 This is useful even if we don't have any buffered data,
175 because there may be data buffered inside the z_stream
178 cb->zstr.avail_in = bd->size;
179 cb->zstr.next_in = (Bytef *) bd->bufp;
183 zstatus = inflate (&cb->zstr, Z_NO_FLUSH);
184 if (zstatus == Z_STREAM_END)
186 if (zstatus != Z_OK && zstatus != Z_BUF_ERROR)
188 compress_error (0, zstatus, &cb->zstr, "inflate");
191 } while (cb->zstr.avail_in > 0
192 && cb->zstr.avail_out > 0);
194 bd->size = cb->zstr.avail_in;
195 bd->bufp = (char *) cb->zstr.next_in;
197 sofar = size - cb->zstr.avail_out;
199 if (zstatus == Z_STREAM_END)
201 /* If we read any data, then return it, relying on the fact that
202 * we will get Z_STREAM_END on the next read too.
204 if (sofar > 0) break;
206 /* Otherwise, return EOF. */
210 /* If we have obtained NEED bytes, then return, unless NEED is
211 zero and we haven't obtained anything at all. If NEED is
212 zero, we will keep reading from the underlying buffer until
213 we either can't read anything, or we have managed to
214 inflate at least one byte. */
215 if (sofar > 0 && sofar >= need)
218 /* All our buffered data should have been processed at this
220 assert (bd->size == 0);
222 /* This will work well in the server, because this call will
223 do an unblocked read and fetch all the available data. In
224 the client, this will read a single byte from the stdio
225 stream, which will cause us to call inflate once per byte.
226 It would be more efficient if we could make a call which
227 would fetch all the available bytes, and at least one byte. */
229 status = (*cb->buf->input) (cb->buf->closure, bd->text,
231 BUFFER_DATA_SIZE, &nread);
234 /* Don't try to recover from memory allcoation errors. */
239 /* If we read any data, then return it, relying on the fact that
240 * we will get the same error reading the underlying buffer
241 * on the next read too.
243 if (sofar > 0) break;
245 /* Otherwise, return EOF. */
249 /* If we didn't read anything, then presumably the buffer is
250 in nonblocking mode, and we should just get out now with
251 whatever we've inflated. */
262 *got = size - cb->zstr.avail_out;
269 /* Output data to a compression buffer. */
271 compress_buffer_output (void *closure, const char *data, size_t have,
274 struct compress_buffer *cb = closure;
276 /* This is only used within the while loop below, but allocated here for
279 static char *buffer = NULL;
281 buffer = pagealign_xalloc (BUFFER_DATA_SIZE);
283 cb->zstr.avail_in = have;
284 cb->zstr.next_in = (unsigned char *) data;
286 while (cb->zstr.avail_in > 0)
290 cb->zstr.avail_out = BUFFER_DATA_SIZE;
291 cb->zstr.next_out = (unsigned char *) buffer;
293 zstatus = deflate (&cb->zstr, Z_NO_FLUSH);
296 compress_error (0, zstatus, &cb->zstr, "deflate");
300 if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
301 buf_output (cb->buf, buffer,
302 BUFFER_DATA_SIZE - cb->zstr.avail_out);
307 /* We will only be here because buf_send_output was called on the
308 compression buffer. That means that we should now call
309 buf_send_output on the underlying buffer. */
310 return buf_send_output (cb->buf);
315 /* Flush a compression buffer. */
317 compress_buffer_flush (void *closure)
319 struct compress_buffer *cb = closure;
321 /* This is only used within the while loop below, but allocated here for
324 static char *buffer = NULL;
326 buffer = pagealign_xalloc (BUFFER_DATA_SIZE);
328 cb->zstr.avail_in = 0;
329 cb->zstr.next_in = NULL;
335 cb->zstr.avail_out = BUFFER_DATA_SIZE;
336 cb->zstr.next_out = (unsigned char *) buffer;
338 zstatus = deflate (&cb->zstr, Z_SYNC_FLUSH);
340 /* The deflate function will return Z_BUF_ERROR if it can't do
341 anything, which in this case means that all data has been
343 if (zstatus == Z_BUF_ERROR)
348 compress_error (0, zstatus, &cb->zstr, "deflate flush");
352 if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
353 buf_output (cb->buf, buffer,
354 BUFFER_DATA_SIZE - cb->zstr.avail_out);
356 /* If the deflate function did not fill the output buffer,
357 then all data has been flushed. */
358 if (cb->zstr.avail_out > 0)
362 /* Now flush the underlying buffer. Note that if the original
363 call to buf_flush passed 1 for the BLOCK argument, then the
364 buffer will already have been set into blocking mode, so we
365 should always pass 0 here. */
366 return buf_flush (cb->buf, 0);
371 /* The block routine for a compression buffer. */
373 compress_buffer_block (void *closure, bool block)
375 struct compress_buffer *cb = closure;
378 return set_block (cb->buf);
380 return set_nonblock (cb->buf);
385 /* Return the file descriptor underlying any child buffers. */
387 compress_buffer_get_fd (void *closure)
389 struct compress_buffer *cb = closure;
390 return buf_get_fd (cb->buf);
395 /* Shut down an input buffer. */
397 compress_buffer_shutdown_input (struct buffer *buf)
399 struct compress_buffer *cb = buf->closure;
402 /* Pick up any trailing data, such as the checksum. */
409 status = compress_buffer_input (cb, buf, 0, sizeof buf, &nread);
416 zstatus = inflateEnd (&cb->zstr);
419 compress_error (0, zstatus, &cb->zstr, "inflateEnd");
423 return buf_shutdown (cb->buf);
428 /* Shut down an output buffer. */
430 compress_buffer_shutdown_output (struct buffer *buf)
432 struct compress_buffer *cb = buf->closure;
435 /* This is only used within the while loop below, but allocated here for
438 static char *buffer = NULL;
440 buffer = pagealign_xalloc (BUFFER_DATA_SIZE);
444 cb->zstr.avail_out = BUFFER_DATA_SIZE;
445 cb->zstr.next_out = (unsigned char *) buffer;
447 zstatus = deflate (&cb->zstr, Z_FINISH);
448 if (zstatus != Z_OK && zstatus != Z_STREAM_END)
450 compress_error (0, zstatus, &cb->zstr, "deflate finish");
454 if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
455 buf_output (cb->buf, buffer,
456 BUFFER_DATA_SIZE - cb->zstr.avail_out);
457 } while (zstatus != Z_STREAM_END);
459 zstatus = deflateEnd (&cb->zstr);
462 compress_error (0, zstatus, &cb->zstr, "deflateEnd");
466 status = buf_flush (cb->buf, 1);
470 return buf_shutdown (cb->buf);
475 /* Here is our librarified gzip implementation. It is very minimal
476 but attempts to be RFC1952 compliant. */
478 /* GZIP ID byte values */
482 /* Compression methods */
483 #define GZIP_CDEFLATE 8
488 #define GZIP_FEXTRA 4
490 #define GZIP_FCOMMENT 16
492 /* BUF should contain SIZE bytes of gzipped data (RFC1952/RFC1951).
493 We are to uncompress the data and write the result to the file
494 descriptor FD. If something goes wrong, give a nonfatal error message
495 mentioning FULLNAME as the name of the file for FD. Return 1 if
496 it is an error we can't recover from. */
499 gunzip_and_write (int fd, const char *fullname, unsigned char *buf,
505 unsigned char outbuf[32768];
510 error (0, 0, "gzipped data too small - lacks complete header");
513 if (buf[0] != GZIP_ID1 || buf[1] != GZIP_ID2)
515 error (0, 0, "gzipped data does not start with gzip identification");
518 if (buf[2] != GZIP_CDEFLATE)
520 error (0, 0, "only the deflate compression method is supported");
524 /* Skip over the fixed header, and then skip any of the variable-length
525 fields. As we skip each field, we keep pos <= size. The checks
526 on positions and lengths are really checks for malformed or
527 incomplete gzip data. */
529 if (buf[3] & GZIP_FEXTRA)
533 error (0, 0, "%s lacks proper gzip XLEN field", fullname);
536 pos += buf[pos] + (buf[pos + 1] << 8) + 2;
539 error (0, 0, "%s lacks proper gzip \"extra field\"", fullname);
544 if (buf[3] & GZIP_FNAME)
546 unsigned char *p = memchr(buf + pos, '\0', size - pos);
549 error (0, 0, "%s has bad gzip filename field", fullname);
554 if (buf[3] & GZIP_FCOMMENT)
556 unsigned char *p = memchr(buf + pos, '\0', size - pos);
559 error (0, 0, "%s has bad gzip comment field", fullname);
564 if (buf[3] & GZIP_FHCRC)
569 error (0, 0, "%s has bad gzip CRC16 field", fullname);
574 /* There could be no data to decompress - check and short circuit. */
577 error (0, 0, "gzip data incomplete for %s (no data)", fullname);
581 memset (&zstr, 0, sizeof zstr);
582 /* Passing a negative argument tells zlib not to look for a zlib
583 (RFC1950) header. This is an undocumented feature; I suppose if
584 we wanted to be anal we could synthesize a header instead,
586 zstatus = inflateInit2 (&zstr, -15);
589 compress_error (1, zstatus, &zstr, fullname);
591 /* I don't see why we should have to include the 8 byte trailer in
592 avail_in. But I see that zlib/gzio.c does, and it seemed to fix
593 a fairly rare bug in which we'd get a Z_BUF_ERROR for no obvious
595 zstr.avail_in = size - pos;
596 zstr.next_in = buf + pos;
598 crc = crc32 (0, NULL, 0);
602 zstr.avail_out = sizeof (outbuf);
603 zstr.next_out = outbuf;
604 zstatus = inflate (&zstr, Z_NO_FLUSH);
605 if (zstatus != Z_STREAM_END && zstatus != Z_OK)
607 compress_error (0, zstatus, &zstr, fullname);
610 if (write (fd, outbuf, sizeof (outbuf) - zstr.avail_out) < 0)
612 error (0, errno, "writing decompressed file %s", fullname);
615 crc = crc32 (crc, outbuf, sizeof (outbuf) - zstr.avail_out);
616 } while (zstatus != Z_STREAM_END);
617 zstatus = inflateEnd (&zstr);
619 compress_error (0, zstatus, &zstr, fullname);
621 /* Check that there is still 8 trailer bytes remaining (CRC32
622 and ISIZE). Check total decomp. data, plus header len (pos)
623 against input buffer total size. */
624 pos += zstr.total_in;
627 error (0, 0, "gzip data incomplete for %s (no trailer)", fullname);
631 if (crc != ((unsigned long)buf[pos]
632 + ((unsigned long)buf[pos + 1] << 8)
633 + ((unsigned long)buf[pos + 2] << 16)
634 + ((unsigned long)buf[pos + 3] << 24)))
636 error (0, 0, "CRC error uncompressing %s", fullname);
640 if (zstr.total_out != ((unsigned long)buf[pos + 4]
641 + ((unsigned long)buf[pos + 5] << 8)
642 + ((unsigned long)buf[pos + 6] << 16)
643 + ((unsigned long)buf[pos + 7] << 24)))
645 error (0, 0, "invalid length uncompressing %s", fullname);
652 /* Read all of FD and put the gzipped data (RFC1952/RFC1951) into *BUF,
653 replacing previous contents of *BUF. *BUF is xmalloc'd and *SIZE is
654 its allocated size. Put the actual number of bytes of data in
655 *LEN. If something goes wrong, give a nonfatal error mentioning
656 FULLNAME as the name of the file for FD, and return 1 if we can't
657 recover from it). LEVEL is the compression level (1-9). */
660 read_and_gzip (int fd, const char *fullname, unsigned char **buf, size_t *size,
661 size_t *len, int level)
665 unsigned char inbuf[8192];
671 unsigned char *newbuf;
674 newbuf = xrealloc (*buf, *size);
677 error (0, 0, "out of memory");
682 (*buf)[0] = GZIP_ID1;
683 (*buf)[1] = GZIP_ID2;
684 (*buf)[2] = GZIP_CDEFLATE;
686 (*buf)[4] = (*buf)[5] = (*buf)[6] = (*buf)[7] = 0;
687 /* Could set this based on level, but why bother? */
691 memset (&zstr, 0, sizeof zstr);
692 zstatus = deflateInit2 (&zstr, level, Z_DEFLATED, -15, 8,
694 crc = crc32 (0, NULL, 0);
697 compress_error (0, zstatus, &zstr, fullname);
701 /* Adjust for 10-byte output header (filled in above) */
703 zstr.avail_out = *size - 10;
704 zstr.next_out = *buf + 10;
710 nread = read (fd, inbuf, sizeof inbuf);
713 error (0, errno, "cannot read %s", fullname);
719 crc = crc32 (crc, inbuf, nread);
720 zstr.next_in = inbuf;
721 zstr.avail_in = nread;
725 /* I don't see this documented anywhere, but deflate seems
726 to tend to dump core sometimes if we pass it Z_FINISH and
727 a small (e.g. 2147 byte) avail_out. So we insist on at
728 least 4096 bytes (that is what zlib/gzio.c uses). */
730 if (zstr.avail_out < 4096)
732 unsigned char *newbuf;
734 assert(zstr.avail_out + zstr.total_out == *size);
735 assert(zstr.next_out == *buf + zstr.total_out);
737 newbuf = xrealloc (*buf, *size);
740 error (0, 0, "out of memory");
744 zstr.next_out = *buf + zstr.total_out;
745 zstr.avail_out = *size - zstr.total_out;
746 assert(zstr.avail_out + zstr.total_out == *size);
747 assert(zstr.next_out == *buf + zstr.total_out);
750 zstatus = deflate (&zstr, finish ? Z_FINISH : 0);
751 if (zstatus == Z_STREAM_END)
753 else if (zstatus != Z_OK)
754 compress_error (0, zstatus, &zstr, fullname);
755 } while (zstr.avail_out == 0);
758 /* Need to add the CRC information (8 bytes)
759 to the end of the gzip'd output.
760 Ensure there is enough space in the output buffer
762 if (zstr.avail_out < 8)
764 unsigned char *newbuf;
766 assert(zstr.avail_out + zstr.total_out == *size);
767 assert(zstr.next_out == *buf + zstr.total_out);
768 *size += 8 - zstr.avail_out;
769 newbuf = realloc (*buf, *size);
772 error (0, 0, "out of memory");
776 zstr.next_out = *buf + zstr.total_out;
777 zstr.avail_out = *size - zstr.total_out;
778 assert(zstr.avail_out + zstr.total_out == *size);
779 assert(zstr.next_out == *buf + zstr.total_out);
781 *zstr.next_out++ = (unsigned char)(crc & 0xff);
782 *zstr.next_out++ = (unsigned char)((crc >> 8) & 0xff);
783 *zstr.next_out++ = (unsigned char)((crc >> 16) & 0xff);
784 *zstr.next_out++ = (unsigned char)((crc >> 24) & 0xff);
786 *zstr.next_out++ = (unsigned char)(zstr.total_in & 0xff);
787 *zstr.next_out++ = (unsigned char)((zstr.total_in >> 8) & 0xff);
788 *zstr.next_out++ = (unsigned char)((zstr.total_in >> 16) & 0xff);
789 *zstr.next_out++ = (unsigned char)((zstr.total_in >> 24) & 0xff);
793 assert(zstr.avail_out + zstr.total_out == *size);
794 assert(zstr.next_out == *buf + zstr.total_out);
796 *len = zstr.total_out;
798 zstatus = deflateEnd (&zstr);
800 compress_error (0, zstatus, &zstr, fullname);
804 #endif /* defined (SERVER_SUPPORT) || defined (CLIENT_SUPPORT) */