/* zlib.c --- interface to the zlib compression library Ian Lance Taylor This file is part of GNU CVS. GNU CVS is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. */ /* The routines in this file are the interface between the CVS client/server support and the zlib compression library. */ #include "cvs.h" #include "buffer.h" #include "pagealign_alloc.h" #if defined (SERVER_SUPPORT) || defined (CLIENT_SUPPORT) #if HAVE_ZLIB_H # include #else # include "zlib.h" #endif /* OS/2 doesn't have EIO. FIXME: this whole notion of turning a different error into EIO strikes me as pretty dubious. */ #if !defined (EIO) #define EIO EBADPOS #endif /* The compression interface is built upon the buffer data structure. We provide a buffer type which compresses or decompresses the data which passes through it. An input buffer decompresses the data read from an underlying buffer, and an output buffer compresses the data before writing it to an underlying buffer. */ /* This structure is the closure field of the buffer. */ struct compress_buffer { /* The underlying buffer. */ struct buffer *buf; /* The compression information. */ z_stream zstr; int level; }; static void compress_error (int, int, z_stream *, const char *); static int compress_buffer_input (void *, char *, size_t, size_t, size_t *); static int compress_buffer_output (void *, const char *, size_t, size_t *); static int compress_buffer_flush (void *); static int compress_buffer_block (void *, bool); static int compress_buffer_get_fd (void *); static int compress_buffer_shutdown_input (struct buffer *); static int compress_buffer_shutdown_output (struct buffer *); /* Report an error from one of the zlib functions. */ static void compress_error (int status, int zstatus, z_stream *zstr, const char *msg) { int hold_errno; const char *zmsg; char buf[100]; hold_errno = errno; zmsg = zstr->msg; if (zmsg == NULL) { sprintf (buf, "error %d", zstatus); zmsg = buf; } error (status, zstatus == Z_ERRNO ? hold_errno : 0, "%s: %s", msg, zmsg); } /* Create a compression buffer. */ struct buffer * compress_buffer_initialize (struct buffer *buf, int input, int level, void (*memory) (struct buffer *)) { struct compress_buffer *n; int zstatus; n = xmalloc (sizeof *n); memset (n, 0, sizeof *n); n->buf = buf; n->level = level; if (input) zstatus = inflateInit (&n->zstr); else zstatus = deflateInit (&n->zstr, level); if (zstatus != Z_OK) compress_error (1, zstatus, &n->zstr, "compression initialization"); /* There may already be data buffered on BUF. For an output buffer, this is OK, because these routines will just use the buffer routines to append data to the (uncompressed) data already on BUF. An input buffer expects to handle a single buffer_data of buffered input to be uncompressed, so that is OK provided there is only one buffer. At present that is all there ever will be; if this changes, compress_buffer_input must be modified to handle multiple input buffers. */ assert (! input || buf->data == NULL || buf->data->next == NULL); return buf_initialize (input ? compress_buffer_input : NULL, input ? NULL : compress_buffer_output, input ? NULL : compress_buffer_flush, compress_buffer_block, compress_buffer_get_fd, (input ? compress_buffer_shutdown_input : compress_buffer_shutdown_output), memory, n); } /* Input data from a compression buffer. */ static int compress_buffer_input (void *closure, char *data, size_t need, size_t size, size_t *got) { struct compress_buffer *cb = closure; struct buffer_data *bd; assert (cb->buf->input); /* We use a single buffer_data structure to buffer up data which the z_stream structure won't use yet. We can safely store this on cb->buf->data, because we never call the buffer routines on cb->buf; we only call the buffer input routine, since that gives us the semantics we want. As noted in compress_buffer_initialize, the buffer_data structure may already exist, and hold data which was already read and buffered before the decompression began. */ bd = cb->buf->data; if (bd == NULL) { bd = xmalloc (sizeof (struct buffer_data)); if (bd == NULL) return -2; bd->text = pagealign_xalloc (BUFFER_DATA_SIZE); if (bd->text == NULL) { free (bd); return -2; } bd->bufp = bd->text; bd->size = 0; cb->buf->data = bd; } cb->zstr.avail_out = size; cb->zstr.next_out = (Bytef *) data; while (1) { int zstatus, sofar, status; size_t nread; /* First try to inflate any data we already have buffered up. This is useful even if we don't have any buffered data, because there may be data buffered inside the z_stream structure. */ cb->zstr.avail_in = bd->size; cb->zstr.next_in = (Bytef *) bd->bufp; do { zstatus = inflate (&cb->zstr, Z_NO_FLUSH); if (zstatus == Z_STREAM_END) break; if (zstatus != Z_OK && zstatus != Z_BUF_ERROR) { compress_error (0, zstatus, &cb->zstr, "inflate"); return EIO; } } while (cb->zstr.avail_in > 0 && cb->zstr.avail_out > 0); bd->size = cb->zstr.avail_in; bd->bufp = (char *) cb->zstr.next_in; sofar = size - cb->zstr.avail_out; if (zstatus == Z_STREAM_END) { /* If we read any data, then return it, relying on the fact that * we will get Z_STREAM_END on the next read too. */ if (sofar > 0) break; /* Otherwise, return EOF. */ return -1; } /* If we have obtained NEED bytes, then return, unless NEED is zero and we haven't obtained anything at all. If NEED is zero, we will attempt at least one nonblocking read and see if we can inflate anything then. */ if (sofar > 0 && sofar >= need) break; /* All our buffered data should have been processed at this point. */ assert (bd->size == 0); /* This will work well in the server, because this call will do an unblocked read and fetch all the available data. In the client, this will read a single byte from the stdio stream, which will cause us to call inflate once per byte. It would be more efficient if we could make a call which would fetch all the available bytes, and at least one byte. */ status = (*cb->buf->input) (cb->buf->closure, bd->text, need, BUFFER_DATA_SIZE, &nread); if (status == -2) /* Don't try to recover from memory allcoation errors. */ return status; if (status != 0) { /* If we read any data, then return it, relying on the fact that * we will get the same error reading the underlying buffer * on the next read too. */ if (sofar > 0) break; /* Otherwise, return EOF. */ return status; } /* If we didn't read anything, then presumably the buffer is in nonblocking mode, and we should just get out now with whatever we've inflated. */ if (nread == 0) { assert (need == 0); break; } bd->bufp = bd->text; bd->size = nread; } *got = size - cb->zstr.avail_out; return 0; } extern int gzip_level; /* Output data to a compression buffer. * * GLOBALS * gzip_level If GZIP_LEVEL has changed to a value different from * CLOSURE->level, then set the compression level on the * stream to the new value. */ static int compress_buffer_output (void *closure, const char *data, size_t have, size_t *wrote) { struct compress_buffer *cb = closure; /* This is only used within the while loop below, but allocated here for * efficiency. */ static char *buffer = NULL; if (!buffer) buffer = pagealign_xalloc (BUFFER_DATA_SIZE); if (cb->level != gzip_level) { cb->level = gzip_level; deflateParams (&cb->zstr, gzip_level, Z_DEFAULT_STRATEGY); } cb->zstr.avail_in = have; cb->zstr.next_in = (unsigned char *) data; while (cb->zstr.avail_in > 0) { int zstatus; cb->zstr.avail_out = BUFFER_DATA_SIZE; cb->zstr.next_out = (unsigned char *) buffer; zstatus = deflate (&cb->zstr, Z_NO_FLUSH); if (zstatus != Z_OK) { compress_error (0, zstatus, &cb->zstr, "deflate"); return EIO; } if (cb->zstr.avail_out != BUFFER_DATA_SIZE) buf_output (cb->buf, buffer, BUFFER_DATA_SIZE - cb->zstr.avail_out); } *wrote = have; /* We will only be here because buf_send_output was called on the compression buffer. That means that we should now call buf_send_output on the underlying buffer. */ return buf_send_output (cb->buf); } /* Flush a compression buffer. */ static int compress_buffer_flush (void *closure) { struct compress_buffer *cb = closure; /* This is only used within the while loop below, but allocated here for * efficiency. */ static char *buffer = NULL; if (!buffer) buffer = pagealign_xalloc (BUFFER_DATA_SIZE); cb->zstr.avail_in = 0; cb->zstr.next_in = NULL; while (1) { int zstatus; cb->zstr.avail_out = BUFFER_DATA_SIZE; cb->zstr.next_out = (unsigned char *) buffer; zstatus = deflate (&cb->zstr, Z_SYNC_FLUSH); /* The deflate function will return Z_BUF_ERROR if it can't do anything, which in this case means that all data has been flushed. */ if (zstatus == Z_BUF_ERROR) break; if (zstatus != Z_OK) { compress_error (0, zstatus, &cb->zstr, "deflate flush"); return EIO; } if (cb->zstr.avail_out != BUFFER_DATA_SIZE) buf_output (cb->buf, buffer, BUFFER_DATA_SIZE - cb->zstr.avail_out); /* If the deflate function did not fill the output buffer, then all data has been flushed. */ if (cb->zstr.avail_out > 0) break; } /* Now flush the underlying buffer. Note that if the original call to buf_flush passed 1 for the BLOCK argument, then the buffer will already have been set into blocking mode, so we should always pass 0 here. */ return buf_flush (cb->buf, 0); } /* The block routine for a compression buffer. */ static int compress_buffer_block (void *closure, bool block) { struct compress_buffer *cb = closure; if (block) return set_block (cb->buf); else return set_nonblock (cb->buf); } /* Return the file descriptor underlying any child buffers. */ static int compress_buffer_get_fd (void *closure) { struct compress_buffer *cb = closure; return buf_get_fd (cb->buf); } /* Shut down an input buffer. */ static int compress_buffer_shutdown_input (struct buffer *buf) { struct compress_buffer *cb = buf->closure; int zstatus; /* Don't make any attempt to pick up trailing data since we are shutting * down. If the client doesn't know we are shutting down, we might not * see the EOF we are expecting. */ zstatus = inflateEnd (&cb->zstr); if (zstatus != Z_OK) { compress_error (0, zstatus, &cb->zstr, "inflateEnd"); return EIO; } return buf_shutdown (cb->buf); } /* Shut down an output buffer. */ static int compress_buffer_shutdown_output (struct buffer *buf) { struct compress_buffer *cb = buf->closure; int zstatus, status; /* This is only used within the while loop below, but allocated here for * efficiency. */ static char *buffer = NULL; if (!buffer) buffer = pagealign_xalloc (BUFFER_DATA_SIZE); do { cb->zstr.avail_out = BUFFER_DATA_SIZE; cb->zstr.next_out = (unsigned char *) buffer; zstatus = deflate (&cb->zstr, Z_FINISH); if (zstatus != Z_OK && zstatus != Z_STREAM_END) { compress_error (0, zstatus, &cb->zstr, "deflate finish"); return EIO; } if (cb->zstr.avail_out != BUFFER_DATA_SIZE) buf_output (cb->buf, buffer, BUFFER_DATA_SIZE - cb->zstr.avail_out); } while (zstatus != Z_STREAM_END); zstatus = deflateEnd (&cb->zstr); if (zstatus != Z_OK) { compress_error (0, zstatus, &cb->zstr, "deflateEnd"); return EIO; } status = buf_flush (cb->buf, 1); if (status != 0) return status; return buf_shutdown (cb->buf); } /* Here is our librarified gzip implementation. It is very minimal but attempts to be RFC1952 compliant. */ /* GZIP ID byte values */ #define GZIP_ID1 31 #define GZIP_ID2 139 /* Compression methods */ #define GZIP_CDEFLATE 8 /* Flags */ #define GZIP_FTEXT 1 #define GZIP_FHCRC 2 #define GZIP_FEXTRA 4 #define GZIP_FNAME 8 #define GZIP_FCOMMENT 16 /* BUF should contain SIZE bytes of gzipped data (RFC1952/RFC1951). We are to uncompress the data and write the result to the file descriptor FD. If something goes wrong, give a nonfatal error message mentioning FULLNAME as the name of the file for FD. Return 1 if it is an error we can't recover from. */ int gunzip_and_write (int fd, const char *fullname, unsigned char *buf, size_t size) { size_t pos; z_stream zstr; int zstatus; unsigned char outbuf[32768]; unsigned long crc; if (size < 10) { error (0, 0, "gzipped data too small - lacks complete header"); return 1; } if (buf[0] != GZIP_ID1 || buf[1] != GZIP_ID2) { error (0, 0, "gzipped data does not start with gzip identification"); return 1; } if (buf[2] != GZIP_CDEFLATE) { error (0, 0, "only the deflate compression method is supported"); return 1; } /* Skip over the fixed header, and then skip any of the variable-length fields. As we skip each field, we keep pos <= size. The checks on positions and lengths are really checks for malformed or incomplete gzip data. */ pos = 10; if (buf[3] & GZIP_FEXTRA) { if (pos + 2 >= size) { error (0, 0, "%s lacks proper gzip XLEN field", fullname); return 1; } pos += buf[pos] + (buf[pos + 1] << 8) + 2; if (pos > size) { error (0, 0, "%s lacks proper gzip \"extra field\"", fullname); return 1; } } if (buf[3] & GZIP_FNAME) { unsigned char *p = memchr(buf + pos, '\0', size - pos); if (p == NULL) { error (0, 0, "%s has bad gzip filename field", fullname); return 1; } pos = p - buf + 1; } if (buf[3] & GZIP_FCOMMENT) { unsigned char *p = memchr(buf + pos, '\0', size - pos); if (p == NULL) { error (0, 0, "%s has bad gzip comment field", fullname); return 1; } pos = p - buf + 1; } if (buf[3] & GZIP_FHCRC) { pos += 2; if (pos > size) { error (0, 0, "%s has bad gzip CRC16 field", fullname); return 1; } } /* There could be no data to decompress - check and short circuit. */ if (pos >= size) { error (0, 0, "gzip data incomplete for %s (no data)", fullname); return 1; } memset (&zstr, 0, sizeof zstr); /* Passing a negative argument tells zlib not to look for a zlib (RFC1950) header. This is an undocumented feature; I suppose if we wanted to be anal we could synthesize a header instead, but why bother? */ zstatus = inflateInit2 (&zstr, -15); if (zstatus != Z_OK) compress_error (1, zstatus, &zstr, fullname); /* I don't see why we should have to include the 8 byte trailer in avail_in. But I see that zlib/gzio.c does, and it seemed to fix a fairly rare bug in which we'd get a Z_BUF_ERROR for no obvious reason. */ zstr.avail_in = size - pos; zstr.next_in = buf + pos; crc = crc32 (0, NULL, 0); do { zstr.avail_out = sizeof (outbuf); zstr.next_out = outbuf; zstatus = inflate (&zstr, Z_NO_FLUSH); if (zstatus != Z_STREAM_END && zstatus != Z_OK) { compress_error (0, zstatus, &zstr, fullname); return 1; } if (write (fd, outbuf, sizeof (outbuf) - zstr.avail_out) < 0) { error (0, errno, "writing decompressed file %s", fullname); return 1; } crc = crc32 (crc, outbuf, sizeof (outbuf) - zstr.avail_out); } while (zstatus != Z_STREAM_END); zstatus = inflateEnd (&zstr); if (zstatus != Z_OK) compress_error (0, zstatus, &zstr, fullname); /* Check that there is still 8 trailer bytes remaining (CRC32 and ISIZE). Check total decomp. data, plus header len (pos) against input buffer total size. */ pos += zstr.total_in; if (size - pos != 8) { error (0, 0, "gzip data incomplete for %s (no trailer)", fullname); return 1; } if (crc != ((unsigned long)buf[pos] + ((unsigned long)buf[pos + 1] << 8) + ((unsigned long)buf[pos + 2] << 16) + ((unsigned long)buf[pos + 3] << 24))) { error (0, 0, "CRC error uncompressing %s", fullname); return 1; } if (zstr.total_out != ((unsigned long)buf[pos + 4] + ((unsigned long)buf[pos + 5] << 8) + ((unsigned long)buf[pos + 6] << 16) + ((unsigned long)buf[pos + 7] << 24))) { error (0, 0, "invalid length uncompressing %s", fullname); return 1; } return 0; } /* Read all of FD and put the gzipped data (RFC1952/RFC1951) into *BUF, replacing previous contents of *BUF. *BUF is xmalloc'd and *SIZE is its allocated size. Put the actual number of bytes of data in *LEN. If something goes wrong, give a nonfatal error mentioning FULLNAME as the name of the file for FD, and return 1 if we can't recover from it). LEVEL is the compression level (1-9). */ int read_and_gzip (int fd, const char *fullname, unsigned char **buf, size_t *size, size_t *len, int level) { z_stream zstr; int zstatus; unsigned char inbuf[8192]; int nread; unsigned long crc; if (*size < 1024) { unsigned char *newbuf; *size = 1024; newbuf = xrealloc (*buf, *size); if (newbuf == NULL) { error (0, 0, "out of memory"); return 1; } *buf = newbuf; } (*buf)[0] = GZIP_ID1; (*buf)[1] = GZIP_ID2; (*buf)[2] = GZIP_CDEFLATE; (*buf)[3] = 0; (*buf)[4] = (*buf)[5] = (*buf)[6] = (*buf)[7] = 0; /* Could set this based on level, but why bother? */ (*buf)[8] = 0; (*buf)[9] = 255; memset (&zstr, 0, sizeof zstr); zstatus = deflateInit2 (&zstr, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); crc = crc32 (0, NULL, 0); if (zstatus != Z_OK) { compress_error (0, zstatus, &zstr, fullname); return 1; } /* Adjust for 10-byte output header (filled in above) */ zstr.total_out = 10; zstr.avail_out = *size - 10; zstr.next_out = *buf + 10; while (1) { int finish = 0; nread = read (fd, inbuf, sizeof inbuf); if (nread < 0) { error (0, errno, "cannot read %s", fullname); return 1; } else if (nread == 0) /* End of file. */ finish = 1; crc = crc32 (crc, inbuf, nread); zstr.next_in = inbuf; zstr.avail_in = nread; do { /* I don't see this documented anywhere, but deflate seems to tend to dump core sometimes if we pass it Z_FINISH and a small (e.g. 2147 byte) avail_out. So we insist on at least 4096 bytes (that is what zlib/gzio.c uses). */ if (zstr.avail_out < 4096) { unsigned char *newbuf; assert(zstr.avail_out + zstr.total_out == *size); assert(zstr.next_out == *buf + zstr.total_out); *size *= 2; newbuf = xrealloc (*buf, *size); if (newbuf == NULL) { error (0, 0, "out of memory"); return 1; } *buf = newbuf; zstr.next_out = *buf + zstr.total_out; zstr.avail_out = *size - zstr.total_out; assert(zstr.avail_out + zstr.total_out == *size); assert(zstr.next_out == *buf + zstr.total_out); } zstatus = deflate (&zstr, finish ? Z_FINISH : 0); if (zstatus == Z_STREAM_END) goto done; else if (zstatus != Z_OK) compress_error (0, zstatus, &zstr, fullname); } while (zstr.avail_out == 0); } done: /* Need to add the CRC information (8 bytes) to the end of the gzip'd output. Ensure there is enough space in the output buffer to do so. */ if (zstr.avail_out < 8) { unsigned char *newbuf; assert(zstr.avail_out + zstr.total_out == *size); assert(zstr.next_out == *buf + zstr.total_out); *size += 8 - zstr.avail_out; newbuf = realloc (*buf, *size); if (newbuf == NULL) { error (0, 0, "out of memory"); return 1; } *buf = newbuf; zstr.next_out = *buf + zstr.total_out; zstr.avail_out = *size - zstr.total_out; assert(zstr.avail_out + zstr.total_out == *size); assert(zstr.next_out == *buf + zstr.total_out); } *zstr.next_out++ = (unsigned char)(crc & 0xff); *zstr.next_out++ = (unsigned char)((crc >> 8) & 0xff); *zstr.next_out++ = (unsigned char)((crc >> 16) & 0xff); *zstr.next_out++ = (unsigned char)((crc >> 24) & 0xff); *zstr.next_out++ = (unsigned char)(zstr.total_in & 0xff); *zstr.next_out++ = (unsigned char)((zstr.total_in >> 8) & 0xff); *zstr.next_out++ = (unsigned char)((zstr.total_in >> 16) & 0xff); *zstr.next_out++ = (unsigned char)((zstr.total_in >> 24) & 0xff); zstr.total_out += 8; zstr.avail_out -= 8; assert(zstr.avail_out + zstr.total_out == *size); assert(zstr.next_out == *buf + zstr.total_out); *len = zstr.total_out; zstatus = deflateEnd (&zstr); if (zstatus != Z_OK) compress_error (0, zstatus, &zstr, fullname); return 0; } #endif /* defined (SERVER_SUPPORT) || defined (CLIENT_SUPPORT) */