2 * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression)
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
19 * 3. Neither the name of The DragonFly Project nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific, prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * This module handles low level logical file I/O (strategy) which backs
38 * the logical buffer cache.
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/fcntl.h>
47 #include <sys/namei.h>
48 #include <sys/mount.h>
49 #include <sys/vnode.h>
50 #include <sys/mountctl.h>
51 #include <sys/dirent.h>
53 #include <sys/objcache.h>
54 #include <sys/event.h>
56 #include <vfs/fifofs/fifo.h>
59 #include "hammer2_lz4.h"
61 #include "zlib/hammer2_zlib.h"
63 struct objcache *cache_buffer_read;
64 struct objcache *cache_buffer_write;
67 * Strategy code (async logical file buffer I/O from system)
69 * WARNING: The strategy code cannot safely use hammer2 transactions
70 * as this can deadlock against vfs_sync's vfsync() call
71 * if multiple flushes are queued. All H2 structures must
72 * already be present and ready for the DIO.
74 * Reads can be initiated asynchronously, writes have to be
75 * spooled to a separate thread for action to avoid deadlocks.
77 static int hammer2_strategy_read(struct vop_strategy_args *ap);
78 static int hammer2_strategy_write(struct vop_strategy_args *ap);
79 static void hammer2_strategy_read_callback(hammer2_iocb_t *iocb);
82 hammer2_vop_strategy(struct vop_strategy_args *ap)
93 error = hammer2_strategy_read(ap);
94 ++hammer2_iod_file_read;
97 error = hammer2_strategy_write(ap);
98 ++hammer2_iod_file_write;
101 bp->b_error = error = EINVAL;
102 bp->b_flags |= B_ERROR;
110 * Return the largest contiguous physical disk range for the logical
113 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
115 * Basically disabled, the logical buffer write thread has to deal with
116 * buffers one-at-a-time.
119 hammer2_vop_bmap(struct vop_bmap_args *ap)
121 *ap->a_doffsetp = NOOFFSET;
129 /****************************************************************************
131 ****************************************************************************/
133 * Callback used in read path in case that a block is compressed with LZ4.
137 hammer2_decompress_LZ4_callback(const char *data, u_int bytes, struct bio *bio)
140 char *compressed_buffer;
147 if bio->bio_caller_info2.index &&
148 bio->bio_caller_info1.uvalue32 !=
149 crc32(bp->b_data, bp->b_bufsize) --- return error
152 KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE);
153 compressed_size = *(const int *)data;
154 KKASSERT(compressed_size <= bytes - sizeof(int));
156 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT);
157 result = LZ4_decompress_safe(__DECONST(char *, &data[sizeof(int)]),
162 kprintf("READ PATH: Error during decompression."
164 (intmax_t)bio->bio_offset, bytes);
165 /* make sure it isn't random garbage */
166 bzero(compressed_buffer, bp->b_bufsize);
168 KKASSERT(result <= bp->b_bufsize);
169 bcopy(compressed_buffer, bp->b_data, bp->b_bufsize);
170 if (result < bp->b_bufsize)
171 bzero(bp->b_data + result, bp->b_bufsize - result);
172 objcache_put(cache_buffer_read, compressed_buffer);
174 bp->b_flags |= B_AGE;
178 * Callback used in read path in case that a block is compressed with ZLIB.
179 * It is almost identical to LZ4 callback, so in theory they can be unified,
180 * but we didn't want to make changes in bio structure for that.
184 hammer2_decompress_ZLIB_callback(const char *data, u_int bytes, struct bio *bio)
187 char *compressed_buffer;
188 z_stream strm_decompress;
194 KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE);
195 strm_decompress.avail_in = 0;
196 strm_decompress.next_in = Z_NULL;
198 ret = inflateInit(&strm_decompress);
201 kprintf("HAMMER2 ZLIB: Fatal error in inflateInit.\n");
203 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT);
204 strm_decompress.next_in = __DECONST(char *, data);
206 /* XXX supply proper size, subset of device bp */
207 strm_decompress.avail_in = bytes;
208 strm_decompress.next_out = compressed_buffer;
209 strm_decompress.avail_out = bp->b_bufsize;
211 ret = inflate(&strm_decompress, Z_FINISH);
212 if (ret != Z_STREAM_END) {
213 kprintf("HAMMER2 ZLIB: Fatar error during decompression.\n");
214 bzero(compressed_buffer, bp->b_bufsize);
216 bcopy(compressed_buffer, bp->b_data, bp->b_bufsize);
217 result = bp->b_bufsize - strm_decompress.avail_out;
218 if (result < bp->b_bufsize)
219 bzero(bp->b_data + result, strm_decompress.avail_out);
220 objcache_put(cache_buffer_read, compressed_buffer);
221 ret = inflateEnd(&strm_decompress);
224 bp->b_flags |= B_AGE;
228 * Logical buffer I/O, async read.
232 hammer2_strategy_read(struct vop_strategy_args *ap)
238 hammer2_cluster_t *cparent;
239 hammer2_cluster_t *cluster;
240 hammer2_key_t key_dummy;
247 nbio = push_bio(bio);
249 lbase = bio->bio_offset;
250 KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0);
253 * Lookup the file offset.
255 hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS |
256 HAMMER2_RESOLVE_SHARED);
257 cparent = hammer2_inode_cluster(ip, HAMMER2_RESOLVE_ALWAYS |
258 HAMMER2_RESOLVE_SHARED);
259 cluster = hammer2_cluster_lookup(cparent, &key_dummy,
261 HAMMER2_LOOKUP_NODATA |
262 HAMMER2_LOOKUP_SHARED);
263 hammer2_inode_unlock(ip, cparent);
266 * Data is zero-fill if no cluster could be found
267 * (XXX or EIO on a cluster failure).
269 if (cluster == NULL) {
272 bzero(bp->b_data, bp->b_bcount);
278 * Cluster elements must be type INODE or type DATA, but the
279 * compression mode (or not) for DATA chains can be different for
280 * each chain. This will be handled by the callback.
282 * If the cluster already has valid data the callback will be made
283 * immediately/synchronously.
285 btype = hammer2_cluster_type(cluster);
286 if (btype != HAMMER2_BREF_TYPE_INODE &&
287 btype != HAMMER2_BREF_TYPE_DATA) {
288 panic("READ PATH: hammer2_strategy_read: unknown bref type");
290 hammer2_cluster_load_async(cluster, hammer2_strategy_read_callback,
296 * Read callback for hammer2_cluster_load_async(). The load function may
297 * start several actual I/Os but will only make one callback, typically with
298 * the first valid I/O XXX
302 hammer2_strategy_read_callback(hammer2_iocb_t *iocb)
304 struct bio *bio = iocb->ptr; /* original logical buffer */
305 struct buf *bp = bio->bio_buf; /* original logical buffer */
306 hammer2_chain_t *chain;
307 hammer2_cluster_t *cluster;
313 * Extract data and handle iteration on I/O failure. iocb->off
314 * is the cluster index for iteration.
316 cluster = iocb->cluster;
317 dio = iocb->dio; /* can be NULL if iocb not in progress */
320 * Work to do if INPROG set, else dio is already good or dio is
321 * NULL (which is the shortcut case if chain->data is already good).
323 if (iocb->flags & HAMMER2_IOCB_INPROG) {
325 * Read attempt not yet made. Issue an asynchronous read
326 * if necessary and return, operation will chain back to
329 if ((iocb->flags & HAMMER2_IOCB_READ) == 0) {
330 if (dio->bp == NULL ||
331 (dio->bp->b_flags & B_CACHE) == 0) {
336 iocb->flags |= HAMMER2_IOCB_READ;
337 breadcb(dio->hmp->devvp,
338 dio->pbase, dio->psize,
339 hammer2_io_callback, iocb);
346 * If we have a DIO it is now done, check for an error and
347 * calculate the data.
349 * If there is no DIO it is an optimization by
350 * hammer2_cluster_load_async(), the data is available in
354 if (dio->bp->b_flags & B_ERROR) {
355 i = (int)iocb->lbase + 1;
356 if (i >= cluster->nchains) {
357 bp->b_flags |= B_ERROR;
358 bp->b_error = dio->bp->b_error;
359 hammer2_io_complete(iocb);
361 hammer2_cluster_unlock(cluster);
362 hammer2_cluster_drop(cluster);
364 hammer2_io_complete(iocb); /* XXX */
365 chain = cluster->array[i].chain;
366 kprintf("hammer2: IO CHAIN-%d %p\n", i, chain);
367 hammer2_adjreadcounter(&chain->bref,
370 iocb->lbase = (off_t)i;
373 hammer2_io_getblk(chain->hmp,
374 chain->bref.data_off,
381 data = hammer2_io_data(dio, chain->bref.data_off);
384 * Special synchronous case, data present in chain->data.
387 data = (void *)chain->data;
390 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
392 * Data is embedded in the inode (copy from inode).
394 bcopy(((hammer2_inode_data_t *)data)->u.data,
395 bp->b_data, HAMMER2_EMBEDDED_BYTES);
396 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES,
397 bp->b_bcount - HAMMER2_EMBEDDED_BYTES);
400 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
402 * Data is on-media, issue device I/O and copy.
404 * XXX direct-IO shortcut could go here XXX.
406 switch (HAMMER2_DEC_COMP(chain->bref.methods)) {
407 case HAMMER2_COMP_LZ4:
408 hammer2_decompress_LZ4_callback(data, chain->bytes,
411 case HAMMER2_COMP_ZLIB:
412 hammer2_decompress_ZLIB_callback(data, chain->bytes,
415 case HAMMER2_COMP_NONE:
416 KKASSERT(chain->bytes <= bp->b_bcount);
417 bcopy(data, bp->b_data, chain->bytes);
418 if (chain->bytes < bp->b_bcount) {
419 bzero(bp->b_data + chain->bytes,
420 bp->b_bcount - chain->bytes);
422 bp->b_flags |= B_NOTMETA;
427 panic("hammer2_strategy_read: "
428 "unknown compression type");
431 /* bqrelse the dio to help stabilize the call to panic() */
433 hammer2_io_bqrelse(&dio);
434 panic("hammer2_strategy_read: unknown bref type");
438 * Once the iocb is cleaned up the DIO (if any) will no longer be
439 * in-progress but will still have a ref. Be sure to release
442 hammer2_io_complete(iocb); /* physical management */
443 if (dio) /* physical dio & buffer */
444 hammer2_io_bqrelse(&dio);
445 hammer2_cluster_unlock(cluster); /* cluster management */
446 hammer2_cluster_drop(cluster); /* cluster management */
447 biodone(bio); /* logical buffer */
450 /****************************************************************************
452 ****************************************************************************/
455 * Functions for compression in threads,
456 * from hammer2_vnops.c
458 static void hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans,
460 hammer2_cluster_t *cparent,
461 hammer2_key_t lbase, int ioflag, int pblksize,
463 static void hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans,
465 hammer2_cluster_t *cparent,
466 hammer2_key_t lbase, int ioflag,
467 int pblksize, int *errorp,
468 int comp_algo, int check_algo);
469 static void hammer2_zero_check_and_write(struct buf *bp,
470 hammer2_trans_t *trans, hammer2_inode_t *ip,
471 hammer2_cluster_t *cparent,
473 int ioflag, int pblksize, int *errorp,
475 static int test_block_zeros(const char *buf, size_t bytes);
476 static void zero_write(struct buf *bp, hammer2_trans_t *trans,
478 hammer2_cluster_t *cparent,
481 static void hammer2_write_bp(hammer2_cluster_t *cluster, struct buf *bp,
482 int ioflag, int pblksize, int *errorp,
488 hammer2_strategy_write(struct vop_strategy_args *ap)
500 hammer2_lwinprog_ref(pmp);
501 hammer2_trans_assert_strategy(pmp);
502 hammer2_mtx_ex(&pmp->wthread_mtx);
503 if (TAILQ_EMPTY(&pmp->wthread_bioq.queue)) {
504 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio);
505 hammer2_mtx_unlock(&pmp->wthread_mtx);
506 wakeup(&pmp->wthread_bioq);
508 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio);
509 hammer2_mtx_unlock(&pmp->wthread_mtx);
511 hammer2_lwinprog_wait(pmp);
517 * Thread to handle bioq for strategy write (started from hammer2_vfsops.c)
520 hammer2_write_thread(void *arg)
525 hammer2_trans_t trans;
528 hammer2_cluster_t *cparent;
536 hammer2_mtx_ex(&pmp->wthread_mtx);
539 * Wait for work. Break out and destroy the thread only if
540 * requested and no work remains.
542 if (bioq_first(&pmp->wthread_bioq) == NULL) {
543 if (pmp->wthread_destroy)
545 mtxsleep(&pmp->wthread_bioq, &pmp->wthread_mtx,
551 * Special transaction for logical buffer cache writes.
553 hammer2_trans_init(&trans, pmp, HAMMER2_TRANS_BUFCACHE);
555 while ((bio = bioq_takefirst(&pmp->wthread_bioq)) != NULL) {
557 * dummy bio for synchronization. The transaction
558 * must be terminated.
560 if (bio->bio_buf == NULL) {
561 bio->bio_flags |= BIO_DONE;
562 /* bio will become invalid after DONE set */
568 * else normal bio processing
570 hammer2_mtx_unlock(&pmp->wthread_mtx);
572 hammer2_lwinprog_drop(pmp);
580 * Inode is modified, flush size and mtime changes
581 * to ensure that the file size remains consistent
582 * with the buffers being flushed.
584 * NOTE: The inode_fsync() call only flushes the
585 * inode's meta-data state, it doesn't try
586 * to flush underlying buffers or chains.
588 * NOTE: hammer2_write_file_core() may indirectly
589 * modify and modsync the inode.
591 hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS);
592 cparent = hammer2_inode_cluster(ip,
593 HAMMER2_RESOLVE_ALWAYS);
594 if (ip->flags & HAMMER2_INODE_RESIZED)
595 hammer2_inode_fsync(&trans, ip, cparent);
596 lblksize = hammer2_calc_logical(ip, bio->bio_offset,
598 pblksize = hammer2_calc_physical(ip, lbase);
599 hammer2_write_file_core(bp, &trans, ip,
603 hammer2_inode_unlock(ip, cparent);
605 kprintf("hammer2: error in buffer write\n");
606 bp->b_flags |= B_ERROR;
610 hammer2_mtx_ex(&pmp->wthread_mtx);
612 hammer2_trans_done(&trans);
614 pmp->wthread_destroy = -1;
615 wakeup(&pmp->wthread_destroy);
617 hammer2_mtx_unlock(&pmp->wthread_mtx);
621 * Wait for pending I/O to complete
624 hammer2_bioq_sync(hammer2_pfs_t *pmp)
628 bzero(&sync_bio, sizeof(sync_bio)); /* dummy with no bio_buf */
629 hammer2_mtx_ex(&pmp->wthread_mtx);
630 if (pmp->wthread_destroy == 0 &&
631 TAILQ_FIRST(&pmp->wthread_bioq.queue)) {
632 bioq_insert_tail(&pmp->wthread_bioq, &sync_bio);
633 while ((sync_bio.bio_flags & BIO_DONE) == 0)
634 mtxsleep(&sync_bio, &pmp->wthread_mtx, 0, "h2bioq", 0);
636 hammer2_mtx_unlock(&pmp->wthread_mtx);
640 * Create a new cluster at (cparent, lbase) and assign physical storage,
641 * returning a cluster suitable for I/O. The cluster will be in a modified
644 * cparent can wind up being anything.
646 * NOTE: Special case for data embedded in inode.
650 hammer2_assign_physical(hammer2_trans_t *trans,
651 hammer2_inode_t *ip, hammer2_cluster_t *cparent,
652 hammer2_key_t lbase, int pblksize, int *errorp)
654 hammer2_cluster_t *cluster;
655 hammer2_cluster_t *dparent;
656 hammer2_key_t key_dummy;
657 int pradix = hammer2_getradix(pblksize);
660 * Locate the chain associated with lbase, return a locked chain.
661 * However, do not instantiate any data reference (which utilizes a
662 * device buffer) because we will be using direct IO via the
663 * logical buffer cache buffer.
666 KKASSERT(pblksize >= HAMMER2_ALLOC_MIN);
668 dparent = hammer2_cluster_lookup_init(cparent, 0);
669 cluster = hammer2_cluster_lookup(dparent, &key_dummy,
671 HAMMER2_LOOKUP_NODATA);
673 if (cluster == NULL) {
675 * We found a hole, create a new chain entry.
677 * NOTE: DATA chains are created without device backing
678 * store (nor do we want any).
680 *errorp = hammer2_cluster_create(trans, dparent, &cluster,
681 lbase, HAMMER2_PBUFRADIX,
682 HAMMER2_BREF_TYPE_DATA,
684 if (cluster == NULL) {
685 hammer2_cluster_lookup_done(dparent);
686 panic("hammer2_cluster_create: par=%p error=%d\n",
687 dparent->focus, *errorp);
690 /*ip->delta_dcount += pblksize;*/
692 switch (hammer2_cluster_type(cluster)) {
693 case HAMMER2_BREF_TYPE_INODE:
695 * The data is embedded in the inode, which requires
698 hammer2_cluster_modify_ip(trans, ip, cluster, 0);
700 case HAMMER2_BREF_TYPE_DATA:
701 if (hammer2_cluster_need_resize(cluster, pblksize)) {
702 hammer2_cluster_resize(trans, ip,
705 HAMMER2_MODIFY_OPTDATA);
709 * DATA buffers must be marked modified whether the
710 * data is in a logical buffer or not. We also have
711 * to make this call to fixup the chain data pointers
712 * after resizing in case this is an encrypted or
715 hammer2_cluster_modify(trans, cluster,
716 HAMMER2_MODIFY_OPTDATA);
719 panic("hammer2_assign_physical: bad type");
726 * Cleanup. If cluster wound up being the inode itself, i.e.
727 * the DIRECTDATA case for offset 0, then we need to update cparent.
728 * The caller expects cparent to not become stale.
730 hammer2_cluster_lookup_done(dparent);
731 /* dparent = NULL; safety */
736 * hammer2_write_file_core() - hammer2_write_thread() helper
738 * The core write function which determines which path to take
739 * depending on compression settings. We also have to locate the
740 * related clusters so we can calculate and set the check data for
745 hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans,
747 hammer2_cluster_t *cparent,
748 hammer2_key_t lbase, int ioflag, int pblksize,
751 hammer2_cluster_t *cluster;
753 switch(HAMMER2_DEC_ALGO(ip->meta.comp_algo)) {
754 case HAMMER2_COMP_NONE:
756 * We have to assign physical storage to the buffer
757 * we intend to dirty or write now to avoid deadlocks
758 * in the strategy code later.
760 * This can return NOOFFSET for inode-embedded data.
761 * The strategy code will take care of it in that case.
763 cluster = hammer2_assign_physical(trans, ip, cparent,
766 if (cluster->ddflag) {
767 hammer2_inode_data_t *wipdata;
769 wipdata = hammer2_cluster_modify_ip(trans, ip,
771 KKASSERT(wipdata->meta.op_flags &
772 HAMMER2_OPFLAG_DIRECTDATA);
773 KKASSERT(bp->b_loffset == 0);
774 bcopy(bp->b_data, wipdata->u.data,
775 HAMMER2_EMBEDDED_BYTES);
776 hammer2_cluster_modsync(cluster);
778 hammer2_write_bp(cluster, bp, ioflag, pblksize,
779 errorp, ip->meta.check_algo);
782 hammer2_cluster_unlock(cluster);
783 hammer2_cluster_drop(cluster);
786 case HAMMER2_COMP_AUTOZERO:
788 * Check for zero-fill only
790 hammer2_zero_check_and_write(bp, trans, ip,
792 ioflag, pblksize, errorp,
793 ip->meta.check_algo);
795 case HAMMER2_COMP_LZ4:
796 case HAMMER2_COMP_ZLIB:
799 * Check for zero-fill and attempt compression.
801 hammer2_compress_and_write(bp, trans, ip,
806 ip->meta.check_algo);
814 * Generic function that will perform the compression in compression
815 * write path. The compression algorithm is determined by the settings
816 * obtained from inode.
820 hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans,
822 hammer2_cluster_t *cparent,
823 hammer2_key_t lbase, int ioflag, int pblksize,
824 int *errorp, int comp_algo, int check_algo)
826 hammer2_cluster_t *cluster;
827 hammer2_chain_t *chain;
833 if (test_block_zeros(bp->b_data, pblksize)) {
834 zero_write(bp, trans, ip, cparent, lbase, errorp);
841 KKASSERT(pblksize / 2 <= 32768);
843 if (ip->comp_heuristic < 8 || (ip->comp_heuristic & 7) == 0) {
844 z_stream strm_compress;
848 switch(HAMMER2_DEC_ALGO(comp_algo)) {
849 case HAMMER2_COMP_LZ4:
850 comp_buffer = objcache_get(cache_buffer_write,
852 comp_size = LZ4_compress_limitedOutput(
854 &comp_buffer[sizeof(int)],
856 pblksize / 2 - sizeof(int));
858 * We need to prefix with the size, LZ4
859 * doesn't do it for us. Add the related
862 *(int *)comp_buffer = comp_size;
864 comp_size += sizeof(int);
866 case HAMMER2_COMP_ZLIB:
867 comp_level = HAMMER2_DEC_LEVEL(comp_algo);
869 comp_level = 6; /* default zlib compression */
870 else if (comp_level < 6)
872 else if (comp_level > 9)
874 ret = deflateInit(&strm_compress, comp_level);
876 kprintf("HAMMER2 ZLIB: fatal error "
877 "on deflateInit.\n");
880 comp_buffer = objcache_get(cache_buffer_write,
882 strm_compress.next_in = bp->b_data;
883 strm_compress.avail_in = pblksize;
884 strm_compress.next_out = comp_buffer;
885 strm_compress.avail_out = pblksize / 2;
886 ret = deflate(&strm_compress, Z_FINISH);
887 if (ret == Z_STREAM_END) {
888 comp_size = pblksize / 2 -
889 strm_compress.avail_out;
893 ret = deflateEnd(&strm_compress);
896 kprintf("Error: Unknown compression method.\n");
897 kprintf("Comp_method = %d.\n", comp_algo);
902 if (comp_size == 0) {
904 * compression failed or turned off
906 comp_block_size = pblksize; /* safety */
907 if (++ip->comp_heuristic > 128)
908 ip->comp_heuristic = 8;
911 * compression succeeded
913 ip->comp_heuristic = 0;
914 if (comp_size <= 1024) {
915 comp_block_size = 1024;
916 } else if (comp_size <= 2048) {
917 comp_block_size = 2048;
918 } else if (comp_size <= 4096) {
919 comp_block_size = 4096;
920 } else if (comp_size <= 8192) {
921 comp_block_size = 8192;
922 } else if (comp_size <= 16384) {
923 comp_block_size = 16384;
924 } else if (comp_size <= 32768) {
925 comp_block_size = 32768;
927 panic("hammer2: WRITE PATH: "
928 "Weird comp_size value.");
930 comp_block_size = pblksize;
934 cluster = hammer2_assign_physical(trans, ip, cparent,
935 lbase, comp_block_size,
938 kprintf("WRITE PATH: An error occurred while "
939 "assigning physical space.\n");
940 KKASSERT(cluster == NULL);
944 if (cluster->ddflag) {
945 hammer2_inode_data_t *wipdata;
947 wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
948 KKASSERT(wipdata->meta.op_flags & HAMMER2_OPFLAG_DIRECTDATA);
949 KKASSERT(bp->b_loffset == 0);
950 bcopy(bp->b_data, wipdata->u.data, HAMMER2_EMBEDDED_BYTES);
951 hammer2_cluster_modsync(cluster);
953 for (i = 0; i < cluster->nchains; ++i) {
959 if ((cluster->array[i].flags & HAMMER2_CITEM_FEMOD) == 0)
961 chain = cluster->array[i].chain; /* XXX */
964 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED);
966 switch(chain->bref.type) {
967 case HAMMER2_BREF_TYPE_INODE:
968 panic("hammer2_write_bp: unexpected inode\n");
970 case HAMMER2_BREF_TYPE_DATA:
972 * Optimize out the read-before-write
975 *errorp = hammer2_io_newnz(chain->hmp,
976 chain->bref.data_off,
980 hammer2_io_brelse(&dio);
981 kprintf("hammer2: WRITE PATH: "
982 "dbp bread error\n");
985 bdata = hammer2_io_data(dio, chain->bref.data_off);
988 * When loading the block make sure we don't
989 * leave garbage after the compressed data.
992 chain->bref.methods =
993 HAMMER2_ENC_COMP(comp_algo) +
994 HAMMER2_ENC_CHECK(check_algo);
995 bcopy(comp_buffer, bdata, comp_size);
996 if (comp_size != comp_block_size) {
997 bzero(bdata + comp_size,
998 comp_block_size - comp_size);
1001 chain->bref.methods =
1003 HAMMER2_COMP_NONE) +
1004 HAMMER2_ENC_CHECK(check_algo);
1005 bcopy(bp->b_data, bdata, pblksize);
1009 * The flush code doesn't calculate check codes for
1010 * file data (doing so can result in excessive I/O),
1013 hammer2_chain_setcheck(chain, bdata);
1016 * Device buffer is now valid, chain is no longer in
1017 * the initial state.
1019 * (No blockref table worries with file data)
1021 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL);
1023 /* Now write the related bdp. */
1024 if (ioflag & IO_SYNC) {
1026 * Synchronous I/O requested.
1028 hammer2_io_bwrite(&dio);
1030 } else if ((ioflag & IO_DIRECT) &&
1031 loff + n == pblksize) {
1032 hammer2_io_bdwrite(&dio);
1034 } else if (ioflag & IO_ASYNC) {
1035 hammer2_io_bawrite(&dio);
1037 hammer2_io_bdwrite(&dio);
1041 panic("hammer2_write_bp: bad chain type %d\n",
1049 hammer2_cluster_unlock(cluster);
1050 hammer2_cluster_drop(cluster);
1053 objcache_put(cache_buffer_write, comp_buffer);
1059 * Function that performs zero-checking and writing without compression,
1060 * it corresponds to default zero-checking path.
1064 hammer2_zero_check_and_write(struct buf *bp, hammer2_trans_t *trans,
1065 hammer2_inode_t *ip,
1066 hammer2_cluster_t *cparent,
1067 hammer2_key_t lbase, int ioflag, int pblksize, int *errorp,
1070 hammer2_cluster_t *cluster;
1072 if (test_block_zeros(bp->b_data, pblksize)) {
1073 zero_write(bp, trans, ip, cparent, lbase, errorp);
1075 cluster = hammer2_assign_physical(trans, ip, cparent,
1076 lbase, pblksize, errorp);
1077 hammer2_write_bp(cluster, bp, ioflag, pblksize, errorp,
1080 hammer2_cluster_unlock(cluster);
1081 hammer2_cluster_drop(cluster);
1089 * A function to test whether a block of data contains only zeros,
1090 * returns TRUE (non-zero) if the block is all zeros.
1094 test_block_zeros(const char *buf, size_t bytes)
1098 for (i = 0; i < bytes; i += sizeof(long)) {
1099 if (*(const long *)(buf + i) != 0)
1108 * Function to "write" a block that contains only zeros.
1112 zero_write(struct buf *bp, hammer2_trans_t *trans,
1113 hammer2_inode_t *ip,
1114 hammer2_cluster_t *cparent,
1115 hammer2_key_t lbase, int *errorp __unused)
1117 hammer2_cluster_t *cluster;
1118 hammer2_key_t key_dummy;
1120 cparent = hammer2_cluster_lookup_init(cparent, 0);
1121 cluster = hammer2_cluster_lookup(cparent, &key_dummy, lbase, lbase,
1122 HAMMER2_LOOKUP_NODATA);
1124 if (cluster->ddflag) {
1125 hammer2_inode_data_t *wipdata;
1127 wipdata = hammer2_cluster_modify_ip(trans, ip,
1129 KKASSERT(wipdata->meta.op_flags &
1130 HAMMER2_OPFLAG_DIRECTDATA);
1131 KKASSERT(bp->b_loffset == 0);
1132 bzero(wipdata->u.data, HAMMER2_EMBEDDED_BYTES);
1133 hammer2_cluster_modsync(cluster);
1135 hammer2_cluster_delete(trans, cparent, cluster,
1136 HAMMER2_DELETE_PERMANENT);
1138 hammer2_cluster_unlock(cluster);
1139 hammer2_cluster_drop(cluster);
1141 hammer2_cluster_lookup_done(cparent);
1147 * Function to write the data as it is, without performing any sort of
1148 * compression. This function is used in path without compression and
1149 * default zero-checking path.
1153 hammer2_write_bp(hammer2_cluster_t *cluster, struct buf *bp, int ioflag,
1154 int pblksize, int *errorp, int check_algo)
1156 hammer2_chain_t *chain;
1157 hammer2_inode_data_t *wipdata;
1163 error = 0; /* XXX TODO below */
1165 for (i = 0; i < cluster->nchains; ++i) {
1166 if ((cluster->array[i].flags & HAMMER2_CITEM_FEMOD) == 0)
1168 chain = cluster->array[i].chain; /* XXX */
1171 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED);
1173 switch(chain->bref.type) {
1174 case HAMMER2_BREF_TYPE_INODE:
1175 wipdata = &hammer2_chain_wdata(chain)->ipdata;
1176 KKASSERT(wipdata->meta.op_flags &
1177 HAMMER2_OPFLAG_DIRECTDATA);
1178 KKASSERT(bp->b_loffset == 0);
1179 bcopy(bp->b_data, wipdata->u.data,
1180 HAMMER2_EMBEDDED_BYTES);
1183 case HAMMER2_BREF_TYPE_DATA:
1184 error = hammer2_io_newnz(chain->hmp,
1185 chain->bref.data_off,
1186 chain->bytes, &dio);
1188 hammer2_io_bqrelse(&dio);
1189 kprintf("hammer2: WRITE PATH: "
1190 "dbp bread error\n");
1193 bdata = hammer2_io_data(dio, chain->bref.data_off);
1195 chain->bref.methods = HAMMER2_ENC_COMP(
1196 HAMMER2_COMP_NONE) +
1197 HAMMER2_ENC_CHECK(check_algo);
1198 bcopy(bp->b_data, bdata, chain->bytes);
1201 * The flush code doesn't calculate check codes for
1202 * file data (doing so can result in excessive I/O),
1205 hammer2_chain_setcheck(chain, bdata);
1208 * Device buffer is now valid, chain is no longer in
1209 * the initial state.
1211 * (No blockref table worries with file data)
1213 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL);
1215 if (ioflag & IO_SYNC) {
1217 * Synchronous I/O requested.
1219 hammer2_io_bwrite(&dio);
1221 } else if ((ioflag & IO_DIRECT) &&
1222 loff + n == pblksize) {
1223 hammer2_io_bdwrite(&dio);
1225 } else if (ioflag & IO_ASYNC) {
1226 hammer2_io_bawrite(&dio);
1228 hammer2_io_bdwrite(&dio);
1232 panic("hammer2_write_bp: bad chain type %d\n",
1238 KKASSERT(error == 0); /* XXX TODO */