2 * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression)
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
19 * 3. Neither the name of The DragonFly Project nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific, prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * This module handles low level logical file I/O (strategy) which backs
38 * the logical buffer cache.
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/fcntl.h>
47 #include <sys/namei.h>
48 #include <sys/mount.h>
49 #include <sys/vnode.h>
50 #include <sys/mountctl.h>
51 #include <sys/dirent.h>
53 #include <sys/objcache.h>
54 #include <sys/event.h>
56 #include <vfs/fifofs/fifo.h>
59 #include "hammer2_lz4.h"
61 #include "zlib/hammer2_zlib.h"
63 struct objcache *cache_buffer_read;
64 struct objcache *cache_buffer_write;
67 * Callback used in read path in case that a block is compressed with LZ4.
71 hammer2_decompress_LZ4_callback(const char *data, u_int bytes, struct bio *bio)
74 char *compressed_buffer;
81 if bio->bio_caller_info2.index &&
82 bio->bio_caller_info1.uvalue32 !=
83 crc32(bp->b_data, bp->b_bufsize) --- return error
86 KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE);
87 compressed_size = *(const int *)data;
88 KKASSERT(compressed_size <= bytes - sizeof(int));
90 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT);
91 result = LZ4_decompress_safe(__DECONST(char *, &data[sizeof(int)]),
96 kprintf("READ PATH: Error during decompression."
98 (intmax_t)bio->bio_offset, bytes);
99 /* make sure it isn't random garbage */
100 bzero(compressed_buffer, bp->b_bufsize);
102 KKASSERT(result <= bp->b_bufsize);
103 bcopy(compressed_buffer, bp->b_data, bp->b_bufsize);
104 if (result < bp->b_bufsize)
105 bzero(bp->b_data + result, bp->b_bufsize - result);
106 objcache_put(cache_buffer_read, compressed_buffer);
108 bp->b_flags |= B_AGE;
112 * Callback used in read path in case that a block is compressed with ZLIB.
113 * It is almost identical to LZ4 callback, so in theory they can be unified,
114 * but we didn't want to make changes in bio structure for that.
118 hammer2_decompress_ZLIB_callback(const char *data, u_int bytes, struct bio *bio)
121 char *compressed_buffer;
122 z_stream strm_decompress;
128 KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE);
129 strm_decompress.avail_in = 0;
130 strm_decompress.next_in = Z_NULL;
132 ret = inflateInit(&strm_decompress);
135 kprintf("HAMMER2 ZLIB: Fatal error in inflateInit.\n");
137 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT);
138 strm_decompress.next_in = __DECONST(char *, data);
140 /* XXX supply proper size, subset of device bp */
141 strm_decompress.avail_in = bytes;
142 strm_decompress.next_out = compressed_buffer;
143 strm_decompress.avail_out = bp->b_bufsize;
145 ret = inflate(&strm_decompress, Z_FINISH);
146 if (ret != Z_STREAM_END) {
147 kprintf("HAMMER2 ZLIB: Fatar error during decompression.\n");
148 bzero(compressed_buffer, bp->b_bufsize);
150 bcopy(compressed_buffer, bp->b_data, bp->b_bufsize);
151 result = bp->b_bufsize - strm_decompress.avail_out;
152 if (result < bp->b_bufsize)
153 bzero(bp->b_data + result, strm_decompress.avail_out);
154 objcache_put(cache_buffer_read, compressed_buffer);
155 ret = inflateEnd(&strm_decompress);
158 bp->b_flags |= B_AGE;
162 * Return the largest contiguous physical disk range for the logical
165 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
167 * Basically disabled, the logical buffer write thread has to deal with
168 * buffers one-at-a-time.
171 hammer2_vop_bmap(struct vop_bmap_args *ap)
173 *ap->a_doffsetp = NOOFFSET;
182 * Strategy code (async logical file buffer I/O from system)
184 * WARNING: The strategy code cannot safely use hammer2 transactions
185 * as this can deadlock against vfs_sync's vfsync() call
186 * if multiple flushes are queued. All H2 structures must
187 * already be present and ready for the DIO.
189 * Reads can be initiated asynchronously, writes have to be
190 * spooled to a separate thread for action to avoid deadlocks.
192 static int hammer2_strategy_read(struct vop_strategy_args *ap);
193 static int hammer2_strategy_write(struct vop_strategy_args *ap);
194 static void hammer2_strategy_read_callback(hammer2_iocb_t *iocb);
197 hammer2_vop_strategy(struct vop_strategy_args *ap)
208 error = hammer2_strategy_read(ap);
209 ++hammer2_iod_file_read;
212 error = hammer2_strategy_write(ap);
213 ++hammer2_iod_file_write;
216 bp->b_error = error = EINVAL;
217 bp->b_flags |= B_ERROR;
225 * Logical buffer I/O, async read.
229 hammer2_strategy_read(struct vop_strategy_args *ap)
235 hammer2_cluster_t *cparent;
236 hammer2_cluster_t *cluster;
237 hammer2_key_t key_dummy;
244 nbio = push_bio(bio);
246 lbase = bio->bio_offset;
247 KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0);
250 * Lookup the file offset.
252 cparent = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS |
253 HAMMER2_RESOLVE_SHARED);
254 cluster = hammer2_cluster_lookup(cparent, &key_dummy,
256 HAMMER2_LOOKUP_NODATA |
257 HAMMER2_LOOKUP_SHARED);
258 hammer2_inode_unlock(ip, cparent);
261 * Data is zero-fill if no cluster could be found
262 * (XXX or EIO on a cluster failure).
264 if (cluster == NULL) {
267 bzero(bp->b_data, bp->b_bcount);
273 * Cluster elements must be type INODE or type DATA, but the
274 * compression mode (or not) for DATA chains can be different for
275 * each chain. This will be handled by the callback.
277 * If the cluster already has valid data the callback will be made
278 * immediately/synchronously.
280 btype = hammer2_cluster_type(cluster);
281 if (btype != HAMMER2_BREF_TYPE_INODE &&
282 btype != HAMMER2_BREF_TYPE_DATA) {
283 panic("READ PATH: hammer2_strategy_read: unknown bref type");
285 hammer2_cluster_load_async(cluster, hammer2_strategy_read_callback,
291 * Read callback for hammer2_cluster_load_async(). The load function may
292 * start several actual I/Os but will only make one callback, typically with
293 * the first valid I/O XXX
297 hammer2_strategy_read_callback(hammer2_iocb_t *iocb)
299 struct bio *bio = iocb->ptr; /* original logical buffer */
300 struct buf *bp = bio->bio_buf; /* original logical buffer */
301 hammer2_chain_t *chain;
302 hammer2_cluster_t *cluster;
308 * Extract data and handle iteration on I/O failure. iocb->off
309 * is the cluster index for iteration.
311 cluster = iocb->cluster;
312 dio = iocb->dio; /* can be NULL if iocb not in progress */
315 * Work to do if INPROG set, else dio is already good or dio is
316 * NULL (which is the shortcut case if chain->data is already good).
318 if (iocb->flags & HAMMER2_IOCB_INPROG) {
320 * Read attempt not yet made. Issue an asynchronous read
321 * if necessary and return, operation will chain back to
324 if ((iocb->flags & HAMMER2_IOCB_READ) == 0) {
325 if (dio->bp == NULL ||
326 (dio->bp->b_flags & B_CACHE) == 0) {
331 iocb->flags |= HAMMER2_IOCB_READ;
332 breadcb(dio->hmp->devvp,
333 dio->pbase, dio->psize,
334 hammer2_io_callback, iocb);
341 * If we have a DIO it is now done, check for an error and
342 * calculate the data.
344 * If there is no DIO it is an optimization by
345 * hammer2_cluster_load_async(), the data is available in
349 if (dio->bp->b_flags & B_ERROR) {
350 i = (int)iocb->lbase + 1;
351 if (i >= cluster->nchains) {
352 bp->b_flags |= B_ERROR;
353 bp->b_error = dio->bp->b_error;
354 hammer2_io_complete(iocb);
356 hammer2_cluster_unlock(cluster);
357 hammer2_cluster_drop(cluster);
359 hammer2_io_complete(iocb); /* XXX */
360 chain = cluster->array[i].chain;
361 kprintf("hammer2: IO CHAIN-%d %p\n", i, chain);
362 hammer2_adjreadcounter(&chain->bref,
365 iocb->lbase = (off_t)i;
368 hammer2_io_getblk(chain->hmp,
369 chain->bref.data_off,
376 data = hammer2_io_data(dio, chain->bref.data_off);
379 * Special synchronous case, data present in chain->data.
382 data = (void *)chain->data;
385 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
387 * Data is embedded in the inode (copy from inode).
389 bcopy(((hammer2_inode_data_t *)data)->u.data,
390 bp->b_data, HAMMER2_EMBEDDED_BYTES);
391 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES,
392 bp->b_bcount - HAMMER2_EMBEDDED_BYTES);
395 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
397 * Data is on-media, issue device I/O and copy.
399 * XXX direct-IO shortcut could go here XXX.
401 switch (HAMMER2_DEC_COMP(chain->bref.methods)) {
402 case HAMMER2_COMP_LZ4:
403 hammer2_decompress_LZ4_callback(data, chain->bytes,
406 case HAMMER2_COMP_ZLIB:
407 hammer2_decompress_ZLIB_callback(data, chain->bytes,
410 case HAMMER2_COMP_NONE:
411 KKASSERT(chain->bytes <= bp->b_bcount);
412 bcopy(data, bp->b_data, chain->bytes);
413 if (chain->bytes < bp->b_bcount) {
414 bzero(bp->b_data + chain->bytes,
415 bp->b_bcount - chain->bytes);
417 bp->b_flags |= B_NOTMETA;
422 panic("hammer2_strategy_read: "
423 "unknown compression type");
426 /* bqrelse the dio to help stabilize the call to panic() */
428 hammer2_io_bqrelse(&dio);
429 panic("hammer2_strategy_read: unknown bref type");
433 * Once the iocb is cleaned up the DIO (if any) will no longer be
434 * in-progress but will still have a ref. Be sure to release
437 hammer2_io_complete(iocb); /* physical management */
438 if (dio) /* physical dio & buffer */
439 hammer2_io_bqrelse(&dio);
440 hammer2_cluster_unlock(cluster); /* cluster management */
441 hammer2_cluster_drop(cluster); /* cluster management */
442 biodone(bio); /* logical buffer */
447 hammer2_strategy_write(struct vop_strategy_args *ap)
459 hammer2_lwinprog_ref(pmp);
460 hammer2_trans_assert_strategy(pmp);
461 hammer2_mtx_ex(&pmp->wthread_mtx);
462 if (TAILQ_EMPTY(&pmp->wthread_bioq.queue)) {
463 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio);
464 hammer2_mtx_unlock(&pmp->wthread_mtx);
465 wakeup(&pmp->wthread_bioq);
467 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio);
468 hammer2_mtx_unlock(&pmp->wthread_mtx);
470 hammer2_lwinprog_wait(pmp);