1 /* zlib.c --- interface to the zlib compression library
2 Ian Lance Taylor <ian@cygnus.com>
4 This file is part of GNU CVS.
6 GNU CVS is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details. */
16 /* The routines in this file are the interface between the CVS
17 client/server support and the zlib compression library. */
22 #if defined (SERVER_SUPPORT) || defined (CLIENT_SUPPORT)
30 /* OS/2 doesn't have EIO. FIXME: this whole notion of turning
31 a different error into EIO strikes me as pretty dubious. */
36 /* The compression interface is built upon the buffer data structure.
37 We provide a buffer type which compresses or decompresses the data
38 which passes through it. An input buffer decompresses the data
39 read from an underlying buffer, and an output buffer compresses the
40 data before writing it to an underlying buffer. */
42 /* This structure is the closure field of the buffer. */
44 struct compress_buffer
46 /* The underlying buffer. */
48 /* The compression information. */
52 static void compress_error (int, int, z_stream *, const char *);
53 static int compress_buffer_input (void *, char *, size_t, size_t, size_t *);
54 static int compress_buffer_output (void *, const char *, size_t, size_t *);
55 static int compress_buffer_flush (void *);
56 static int compress_buffer_block (void *, bool);
57 static int compress_buffer_get_fd (void *);
58 static int compress_buffer_shutdown_input (struct buffer *);
59 static int compress_buffer_shutdown_output (struct buffer *);
61 /* Report an error from one of the zlib functions. */
64 compress_error (int status, int zstatus, z_stream *zstr, const char *msg)
75 sprintf (buf, "error %d", zstatus);
80 zstatus == Z_ERRNO ? hold_errno : 0,
86 /* Create a compression buffer. */
88 compress_buffer_initialize (struct buffer *buf, int input, int level,
89 void (*memory) (struct buffer *))
91 struct compress_buffer *n;
94 n = xmalloc (sizeof *n);
95 memset (n, 0, sizeof *n);
100 zstatus = inflateInit (&n->zstr);
102 zstatus = deflateInit (&n->zstr, level);
104 compress_error (1, zstatus, &n->zstr, "compression initialization");
106 /* There may already be data buffered on BUF. For an output
107 buffer, this is OK, because these routines will just use the
108 buffer routines to append data to the (uncompressed) data
109 already on BUF. An input buffer expects to handle a single
110 buffer_data of buffered input to be uncompressed, so that is OK
111 provided there is only one buffer. At present that is all
112 there ever will be; if this changes, compress_buffer_input must
113 be modified to handle multiple input buffers. */
114 assert (! input || buf->data == NULL || buf->data->next == NULL);
116 return buf_initialize (input ? compress_buffer_input : NULL,
117 input ? NULL : compress_buffer_output,
118 input ? NULL : compress_buffer_flush,
119 compress_buffer_block, compress_buffer_get_fd,
121 ? compress_buffer_shutdown_input
122 : compress_buffer_shutdown_output),
129 /* Input data from a compression buffer. */
131 compress_buffer_input (void *closure, char *data, size_t need, size_t size,
134 struct compress_buffer *cb = closure;
135 struct buffer_data *bd;
137 assert (cb->buf->input);
139 /* We use a single buffer_data structure to buffer up data which
140 the z_stream structure won't use yet. We can safely store this
141 on cb->buf->data, because we never call the buffer routines on
142 cb->buf; we only call the buffer input routine, since that
143 gives us the semantics we want. As noted in
144 compress_buffer_initialize, the buffer_data structure may
145 already exist, and hold data which was already read and
146 buffered before the decompression began. */
150 bd = xmalloc (sizeof (struct buffer_data));
153 bd->text = xmalloc (BUFFER_DATA_SIZE);
154 if (bd->text == NULL)
164 cb->zstr.avail_out = size;
165 cb->zstr.next_out = (Bytef *) data;
169 int zstatus, sofar, status;
172 /* First try to inflate any data we already have buffered up.
173 This is useful even if we don't have any buffered data,
174 because there may be data buffered inside the z_stream
177 cb->zstr.avail_in = bd->size;
178 cb->zstr.next_in = (Bytef *) bd->bufp;
182 zstatus = inflate (&cb->zstr, Z_NO_FLUSH);
183 if (zstatus == Z_STREAM_END)
185 if (zstatus != Z_OK && zstatus != Z_BUF_ERROR)
187 compress_error (0, zstatus, &cb->zstr, "inflate");
190 } while (cb->zstr.avail_in > 0
191 && cb->zstr.avail_out > 0);
193 bd->size = cb->zstr.avail_in;
194 bd->bufp = (char *) cb->zstr.next_in;
196 if (zstatus == Z_STREAM_END)
199 /* If we have obtained NEED bytes, then return, unless NEED is
200 zero and we haven't obtained anything at all. If NEED is
201 zero, we will keep reading from the underlying buffer until
202 we either can't read anything, or we have managed to
203 inflate at least one byte. */
204 sofar = size - cb->zstr.avail_out;
205 if (sofar > 0 && sofar >= need)
208 /* All our buffered data should have been processed at this
210 assert (bd->size == 0);
212 /* This will work well in the server, because this call will
213 do an unblocked read and fetch all the available data. In
214 the client, this will read a single byte from the stdio
215 stream, which will cause us to call inflate once per byte.
216 It would be more efficient if we could make a call which
217 would fetch all the available bytes, and at least one byte. */
219 status = (*cb->buf->input) (cb->buf->closure, bd->text,
221 BUFFER_DATA_SIZE, &nread);
225 /* If we didn't read anything, then presumably the buffer is
226 in nonblocking mode, and we should just get out now with
227 whatever we've inflated. */
238 *got = size - cb->zstr.avail_out;
245 /* Output data to a compression buffer. */
247 compress_buffer_output (void *closure, const char *data, size_t have,
250 struct compress_buffer *cb = closure;
252 cb->zstr.avail_in = have;
253 cb->zstr.next_in = (unsigned char *) data;
255 while (cb->zstr.avail_in > 0)
257 char buffer[BUFFER_DATA_SIZE];
260 cb->zstr.avail_out = BUFFER_DATA_SIZE;
261 cb->zstr.next_out = (unsigned char *) buffer;
263 zstatus = deflate (&cb->zstr, Z_NO_FLUSH);
266 compress_error (0, zstatus, &cb->zstr, "deflate");
270 if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
271 buf_output (cb->buf, buffer,
272 BUFFER_DATA_SIZE - cb->zstr.avail_out);
277 /* We will only be here because buf_send_output was called on the
278 compression buffer. That means that we should now call
279 buf_send_output on the underlying buffer. */
280 return buf_send_output (cb->buf);
285 /* Flush a compression buffer. */
287 compress_buffer_flush (void *closure)
289 struct compress_buffer *cb = closure;
291 cb->zstr.avail_in = 0;
292 cb->zstr.next_in = NULL;
296 char buffer[BUFFER_DATA_SIZE];
299 cb->zstr.avail_out = BUFFER_DATA_SIZE;
300 cb->zstr.next_out = (unsigned char *) buffer;
302 zstatus = deflate (&cb->zstr, Z_SYNC_FLUSH);
304 /* The deflate function will return Z_BUF_ERROR if it can't do
305 anything, which in this case means that all data has been
307 if (zstatus == Z_BUF_ERROR)
312 compress_error (0, zstatus, &cb->zstr, "deflate flush");
316 if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
317 buf_output (cb->buf, buffer,
318 BUFFER_DATA_SIZE - cb->zstr.avail_out);
320 /* If the deflate function did not fill the output buffer,
321 then all data has been flushed. */
322 if (cb->zstr.avail_out > 0)
326 /* Now flush the underlying buffer. Note that if the original
327 call to buf_flush passed 1 for the BLOCK argument, then the
328 buffer will already have been set into blocking mode, so we
329 should always pass 0 here. */
330 return buf_flush (cb->buf, 0);
335 /* The block routine for a compression buffer. */
337 compress_buffer_block (void *closure, bool block)
339 struct compress_buffer *cb = closure;
342 return set_block (cb->buf);
344 return set_nonblock (cb->buf);
349 /* Return the file descriptor underlying any child buffers. */
351 compress_buffer_get_fd (void *closure)
353 struct compress_buffer *cb = closure;
354 return buf_get_fd (cb->buf);
359 /* Shut down an input buffer. */
361 compress_buffer_shutdown_input (struct buffer *buf)
363 struct compress_buffer *cb = buf->closure;
366 /* Pick up any trailing data, such as the checksum. */
373 status = compress_buffer_input (cb, buf, 0, sizeof buf, &nread);
380 zstatus = inflateEnd (&cb->zstr);
383 compress_error (0, zstatus, &cb->zstr, "inflateEnd");
387 return buf_shutdown (cb->buf);
392 /* Shut down an output buffer. */
394 compress_buffer_shutdown_output (struct buffer *buf)
396 struct compress_buffer *cb = buf->closure;
401 char buffer[BUFFER_DATA_SIZE];
403 cb->zstr.avail_out = BUFFER_DATA_SIZE;
404 cb->zstr.next_out = (unsigned char *) buffer;
406 zstatus = deflate (&cb->zstr, Z_FINISH);
407 if (zstatus != Z_OK && zstatus != Z_STREAM_END)
409 compress_error (0, zstatus, &cb->zstr, "deflate finish");
413 if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
414 buf_output (cb->buf, buffer,
415 BUFFER_DATA_SIZE - cb->zstr.avail_out);
416 } while (zstatus != Z_STREAM_END);
418 zstatus = deflateEnd (&cb->zstr);
421 compress_error (0, zstatus, &cb->zstr, "deflateEnd");
425 status = buf_flush (cb->buf, 1);
429 return buf_shutdown (cb->buf);
434 /* Here is our librarified gzip implementation. It is very minimal
435 but attempts to be RFC1952 compliant. */
437 /* GZIP ID byte values */
441 /* Compression methods */
442 #define GZIP_CDEFLATE 8
447 #define GZIP_FEXTRA 4
449 #define GZIP_FCOMMENT 16
451 /* BUF should contain SIZE bytes of gzipped data (RFC1952/RFC1951).
452 We are to uncompress the data and write the result to the file
453 descriptor FD. If something goes wrong, give a nonfatal error message
454 mentioning FULLNAME as the name of the file for FD. Return 1 if
455 it is an error we can't recover from. */
458 gunzip_and_write (int fd, const char *fullname, unsigned char *buf,
464 unsigned char outbuf[32768];
469 error (0, 0, "gzipped data too small - lacks complete header");
472 if (buf[0] != GZIP_ID1 || buf[1] != GZIP_ID2)
474 error (0, 0, "gzipped data does not start with gzip identification");
477 if (buf[2] != GZIP_CDEFLATE)
479 error (0, 0, "only the deflate compression method is supported");
483 /* Skip over the fixed header, and then skip any of the variable-length
484 fields. As we skip each field, we keep pos <= size. The checks
485 on positions and lengths are really checks for malformed or
486 incomplete gzip data. */
488 if (buf[3] & GZIP_FEXTRA)
492 error (0, 0, "%s lacks proper gzip XLEN field", fullname);
495 pos += buf[pos] + (buf[pos + 1] << 8) + 2;
498 error (0, 0, "%s lacks proper gzip \"extra field\"", fullname);
503 if (buf[3] & GZIP_FNAME)
505 unsigned char *p = memchr(buf + pos, '\0', size - pos);
508 error (0, 0, "%s has bad gzip filename field", fullname);
513 if (buf[3] & GZIP_FCOMMENT)
515 unsigned char *p = memchr(buf + pos, '\0', size - pos);
518 error (0, 0, "%s has bad gzip comment field", fullname);
523 if (buf[3] & GZIP_FHCRC)
528 error (0, 0, "%s has bad gzip CRC16 field", fullname);
533 /* There could be no data to decompress - check and short circuit. */
536 error (0, 0, "gzip data incomplete for %s (no data)", fullname);
540 memset (&zstr, 0, sizeof zstr);
541 /* Passing a negative argument tells zlib not to look for a zlib
542 (RFC1950) header. This is an undocumented feature; I suppose if
543 we wanted to be anal we could synthesize a header instead,
545 zstatus = inflateInit2 (&zstr, -15);
548 compress_error (1, zstatus, &zstr, fullname);
550 /* I don't see why we should have to include the 8 byte trailer in
551 avail_in. But I see that zlib/gzio.c does, and it seemed to fix
552 a fairly rare bug in which we'd get a Z_BUF_ERROR for no obvious
554 zstr.avail_in = size - pos;
555 zstr.next_in = buf + pos;
557 crc = crc32 (0, NULL, 0);
561 zstr.avail_out = sizeof (outbuf);
562 zstr.next_out = outbuf;
563 zstatus = inflate (&zstr, Z_NO_FLUSH);
564 if (zstatus != Z_STREAM_END && zstatus != Z_OK)
566 compress_error (0, zstatus, &zstr, fullname);
569 if (write (fd, outbuf, sizeof (outbuf) - zstr.avail_out) < 0)
571 error (0, errno, "writing decompressed file %s", fullname);
574 crc = crc32 (crc, outbuf, sizeof (outbuf) - zstr.avail_out);
575 } while (zstatus != Z_STREAM_END);
576 zstatus = inflateEnd (&zstr);
578 compress_error (0, zstatus, &zstr, fullname);
580 /* Check that there is still 8 trailer bytes remaining (CRC32
581 and ISIZE). Check total decomp. data, plus header len (pos)
582 against input buffer total size. */
583 pos += zstr.total_in;
586 error (0, 0, "gzip data incomplete for %s (no trailer)", fullname);
590 if (crc != ((unsigned long)buf[pos]
591 + ((unsigned long)buf[pos + 1] << 8)
592 + ((unsigned long)buf[pos + 2] << 16)
593 + ((unsigned long)buf[pos + 3] << 24)))
595 error (0, 0, "CRC error uncompressing %s", fullname);
599 if (zstr.total_out != ((unsigned long)buf[pos + 4]
600 + ((unsigned long)buf[pos + 5] << 8)
601 + ((unsigned long)buf[pos + 6] << 16)
602 + ((unsigned long)buf[pos + 7] << 24)))
604 error (0, 0, "invalid length uncompressing %s", fullname);
611 /* Read all of FD and put the gzipped data (RFC1952/RFC1951) into *BUF,
612 replacing previous contents of *BUF. *BUF is xmalloc'd and *SIZE is
613 its allocated size. Put the actual number of bytes of data in
614 *LEN. If something goes wrong, give a nonfatal error mentioning
615 FULLNAME as the name of the file for FD, and return 1 if we can't
616 recover from it). LEVEL is the compression level (1-9). */
619 read_and_gzip (int fd, const char *fullname, unsigned char **buf, size_t *size,
620 size_t *len, int level)
624 unsigned char inbuf[8192];
630 unsigned char *newbuf;
633 newbuf = xrealloc (*buf, *size);
636 error (0, 0, "out of memory");
641 (*buf)[0] = GZIP_ID1;
642 (*buf)[1] = GZIP_ID2;
643 (*buf)[2] = GZIP_CDEFLATE;
645 (*buf)[4] = (*buf)[5] = (*buf)[6] = (*buf)[7] = 0;
646 /* Could set this based on level, but why bother? */
650 memset (&zstr, 0, sizeof zstr);
651 zstatus = deflateInit2 (&zstr, level, Z_DEFLATED, -15, 8,
653 crc = crc32 (0, NULL, 0);
656 compress_error (0, zstatus, &zstr, fullname);
660 /* Adjust for 10-byte output header (filled in above) */
662 zstr.avail_out = *size - 10;
663 zstr.next_out = *buf + 10;
669 nread = read (fd, inbuf, sizeof inbuf);
672 error (0, errno, "cannot read %s", fullname);
678 crc = crc32 (crc, inbuf, nread);
679 zstr.next_in = inbuf;
680 zstr.avail_in = nread;
684 /* I don't see this documented anywhere, but deflate seems
685 to tend to dump core sometimes if we pass it Z_FINISH and
686 a small (e.g. 2147 byte) avail_out. So we insist on at
687 least 4096 bytes (that is what zlib/gzio.c uses). */
689 if (zstr.avail_out < 4096)
691 unsigned char *newbuf;
693 assert(zstr.avail_out + zstr.total_out == *size);
694 assert(zstr.next_out == *buf + zstr.total_out);
696 newbuf = xrealloc (*buf, *size);
699 error (0, 0, "out of memory");
703 zstr.next_out = *buf + zstr.total_out;
704 zstr.avail_out = *size - zstr.total_out;
705 assert(zstr.avail_out + zstr.total_out == *size);
706 assert(zstr.next_out == *buf + zstr.total_out);
709 zstatus = deflate (&zstr, finish ? Z_FINISH : 0);
710 if (zstatus == Z_STREAM_END)
712 else if (zstatus != Z_OK)
713 compress_error (0, zstatus, &zstr, fullname);
714 } while (zstr.avail_out == 0);
717 /* Need to add the CRC information (8 bytes)
718 to the end of the gzip'd output.
719 Ensure there is enough space in the output buffer
721 if (zstr.avail_out < 8)
723 unsigned char *newbuf;
725 assert(zstr.avail_out + zstr.total_out == *size);
726 assert(zstr.next_out == *buf + zstr.total_out);
727 *size += 8 - zstr.avail_out;
728 newbuf = realloc (*buf, *size);
731 error (0, 0, "out of memory");
735 zstr.next_out = *buf + zstr.total_out;
736 zstr.avail_out = *size - zstr.total_out;
737 assert(zstr.avail_out + zstr.total_out == *size);
738 assert(zstr.next_out == *buf + zstr.total_out);
740 *zstr.next_out++ = (unsigned char)(crc & 0xff);
741 *zstr.next_out++ = (unsigned char)((crc >> 8) & 0xff);
742 *zstr.next_out++ = (unsigned char)((crc >> 16) & 0xff);
743 *zstr.next_out++ = (unsigned char)((crc >> 24) & 0xff);
745 *zstr.next_out++ = (unsigned char)(zstr.total_in & 0xff);
746 *zstr.next_out++ = (unsigned char)((zstr.total_in >> 8) & 0xff);
747 *zstr.next_out++ = (unsigned char)((zstr.total_in >> 16) & 0xff);
748 *zstr.next_out++ = (unsigned char)((zstr.total_in >> 24) & 0xff);
752 assert(zstr.avail_out + zstr.total_out == *size);
753 assert(zstr.next_out == *buf + zstr.total_out);
755 *len = zstr.total_out;
757 zstatus = deflateEnd (&zstr);
759 compress_error (0, zstatus, &zstr, fullname);
763 #endif /* defined (SERVER_SUPPORT) || defined (CLIENT_SUPPORT) */