2 * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
30 #include <sys/types.h>
32 #include <sys/endian.h>
33 #include <sys/param.h>
34 #include <sys/sysctl.h>
37 #include <netinet/in.h>
51 #include "mkuz_cloop.h"
52 #include "mkuz_blockcache.h"
53 #include "mkuz_zlib.h"
54 #include "mkuz_lzma.h"
57 #include "mkuz_conveyor.h"
58 #include "mkuz_format.h"
59 #include "mkuz_fqueue.h"
60 #include "mkuz_time.h"
62 #define DEFAULT_CLSTSIZE 16384
64 static struct mkuz_format uzip_fmt = {
65 .magic = CLOOP_MAGIC_ZLIB,
66 .default_sufx = DEFAULT_SUFX_ZLIB,
67 .f_init = &mkuz_zlib_init,
68 .f_compress = &mkuz_zlib_compress
71 static struct mkuz_format ulzma_fmt = {
72 .magic = CLOOP_MAGIC_LZMA,
73 .default_sufx = DEFAULT_SUFX_LZMA,
74 .f_init = &mkuz_lzma_init,
75 .f_compress = &mkuz_lzma_compress
78 static struct mkuz_blk *readblock(int, u_int32_t);
79 static void usage(void);
80 static void cleanup(void);
82 static char *cleanfile = NULL;
85 cmp_blkno(const struct mkuz_blk *bp, void *p)
91 return (bp->info.blkno == *ap);
94 int main(int argc, char **argv)
106 uint64_t offset, last_offset;
107 struct cloop_header hdr;
108 struct mkuz_conveyor *cvp;
110 struct mkuz_blk_info *chit;
116 ncpusz = sizeof(size_t);
117 if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) {
119 } else if (ncpu > MAX_WORKERS_AUTO) {
120 ncpu = MAX_WORKERS_AUTO;
123 memset(&hdr, 0, sizeof(hdr));
124 cfs.blksz = DEFAULT_CLSTSIZE;
131 cfs.handler = &uzip_fmt;
133 struct mkuz_blk *iblk, *oblk;
135 while((opt = getopt(argc, argv, "o:s:vZdLSj:")) != -1) {
144 errx(1, "invalid cluster size specified: %s",
164 cfs.handler = &ulzma_fmt;
175 errx(1, "invalid number of compression threads"
176 " specified: %s", optarg);
195 strcpy(hdr.magic, cfs.handler->magic);
197 if (cfs.en_dedup != 0) {
198 hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
199 hdr.magic[CLOOP_OFS_COMPR] =
200 tolower(hdr.magic[CLOOP_OFS_COMPR]);
203 c_ctx = cfs.handler->f_init(cfs.blksz);
207 asprintf(&oname, "%s%s", iname, cfs.handler->default_sufx);
209 err(1, "can't allocate memory");
214 signal(SIGHUP, exit);
215 signal(SIGINT, exit);
216 signal(SIGTERM, exit);
217 signal(SIGXCPU, exit);
218 signal(SIGXFSZ, exit);
221 cfs.fdr = open(iname, O_RDONLY);
223 err(1, "open(%s)", iname);
226 if (fstat(cfs.fdr, &sb) != 0) {
227 err(1, "fstat(%s)", iname);
230 if (S_ISCHR(sb.st_mode)) {
233 if (ioctl(cfs.fdr, DIOCGMEDIASIZE, &ms) < 0) {
234 err(1, "ioctl(DIOCGMEDIASIZE)");
238 } else if (!S_ISREG(sb.st_mode)) {
239 fprintf(stderr, "%s: not a character device or regular file\n",
243 hdr.nblocks = sb.st_size / cfs.blksz;
244 if ((sb.st_size % cfs.blksz) != 0) {
245 if (cfs.verbose != 0)
246 fprintf(stderr, "file size is not multiple "
247 "of %d, padding data\n", cfs.blksz);
250 toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
252 cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
253 S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
255 err(1, "open(%s)", oname);
260 /* Prepare header that we will write later when we have index ready. */
261 iov[0].iov_base = (char *)&hdr;
262 iov[0].iov_len = sizeof(hdr);
263 iov[1].iov_base = (char *)toc;
264 iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc);
265 offset = iov[0].iov_len + iov[1].iov_len;
267 /* Reserve space for header */
268 lseek(cfs.fdw, offset, SEEK_SET);
270 if (cfs.verbose != 0) {
271 fprintf(stderr, "data size %ju bytes, number of clusters "
272 "%u, index length %zu bytes\n", sb.st_size,
273 hdr.nblocks, iov[1].iov_len);
276 cvp = mkuz_conveyor_ctor(&cfs);
280 for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) {
281 iblk = readblock(cfs.fdr, cfs.blksz);
282 mkuz_fqueue_enq(cvp->wrk_queue, iblk);
283 if (iblk != MKUZ_BLK_EOF &&
284 (i < (cfs.nworkers * ITEMS_PER_WORKER))) {
288 oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io);
289 assert(oblk->info.blkno == (unsigned)io);
290 oblk->info.offset = offset;
292 if (cfs.en_dedup != 0 && oblk->info.len > 0) {
293 chit = mkuz_blkcache_regblock(cfs.fdw, oblk);
295 * There should be at least one non-empty block
296 * between us and the backref'ed offset, otherwise
297 * we won't be able to parse that sequence correctly
298 * as it would be indistinguishible from another
301 if (chit != NULL && chit->offset == last_offset) {
306 toc[io] = htobe64(chit->offset);
309 if (oblk->info.len > 0 && write(cfs.fdw, oblk->data,
310 oblk->info.len) < 0) {
311 err(1, "write(%s)", oname);
314 toc[io] = htobe64(offset);
315 last_offset = offset;
316 offset += oblk->info.len;
318 if (cfs.verbose != 0) {
319 fprintf(stderr, "cluster #%d, in %u bytes, "
320 "out len=%lu offset=%lu", io, cfs.blksz,
321 (u_long)oblk->info.len, (u_long)be64toh(toc[io]));
323 fprintf(stderr, " (backref'ed to #%d)",
326 fprintf(stderr, "\n");
330 if (iblk == MKUZ_BLK_EOF) {
333 /* Last block, see if we need to add some padding */
334 if ((offset % DEV_BSIZE) == 0)
336 oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE));
337 oblk->info.blkno = io;
338 oblk->info.len = oblk->alen;
339 if (cfs.verbose != 0) {
340 fprintf(stderr, "padding data with %lu bytes "
341 "so that file size is multiple of %d\n",
342 (u_long)oblk->alen, DEV_BSIZE);
344 mkuz_fqueue_enq(cvp->results, oblk);
351 if (cfs.verbose != 0 || summary.en != 0) {
353 fprintf(summary.f, "compressed data to %ju bytes, saved %lld "
354 "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset,
355 (long long)(sb.st_size - offset),
356 100.0 * (long long)(sb.st_size - offset) /
357 (float)sb.st_size, (float)sb.st_size / (et - st));
360 /* Convert to big endian */
361 hdr.blksz = htonl(cfs.blksz);
362 hdr.nblocks = htonl(hdr.nblocks);
363 /* Write headers into pre-allocated space */
364 lseek(cfs.fdw, 0, SEEK_SET);
365 if (writev(cfs.fdw, iov, 2) < 0) {
366 err(1, "writev(%s)", oname);
375 static struct mkuz_blk *
376 readblock(int fd, u_int32_t clstsize)
379 struct mkuz_blk *rval;
383 rval = mkuz_blk_ctor(clstsize);
385 rval->info.blkno = blockcnt;
387 cpos = lseek(fd, 0, SEEK_CUR);
389 err(1, "readblock: lseek() failed");
392 rval->info.offset = cpos;
394 numread = read(fd, rval->data, clstsize);
396 err(1, "readblock: read() failed");
403 rval->info.len = numread;
411 fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] "
412 "[-j ncompr] infile\n");
417 mkuz_safe_malloc(size_t size)
421 retval = malloc(size);
422 if (retval == NULL) {
423 err(1, "can't allocate memory");
430 mkuz_safe_zmalloc(size_t size)
434 retval = mkuz_safe_malloc(size);
443 if (cleanfile != NULL)
448 mkuz_memvcmp(const void *memory, unsigned char val, size_t size)
452 mm = (const u_char *)memory;
453 return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0;