hammer2 - Refactor frontend part 1/many
[dragonfly.git] / sys / vfs / hammer2 / hammer2_strategy.c
1 /*
2  * Copyright (c) 2011-2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  * 3. Neither the name of The DragonFly Project nor the names of its
20  *    contributors may be used to endorse or promote products derived
21  *    from this software without specific, prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
27  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 /*
37  * This module handles low level logical file I/O (strategy) which backs
38  * the logical buffer cache.
39  */
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/fcntl.h>
45 #include <sys/buf.h>
46 #include <sys/proc.h>
47 #include <sys/namei.h>
48 #include <sys/mount.h>
49 #include <sys/vnode.h>
50 #include <sys/mountctl.h>
51 #include <sys/dirent.h>
52 #include <sys/uio.h>
53 #include <sys/objcache.h>
54 #include <sys/event.h>
55 #include <sys/file.h>
56 #include <vfs/fifofs/fifo.h>
57
58 #include "hammer2.h"
59 #include "hammer2_lz4.h"
60
61 #include "zlib/hammer2_zlib.h"
62
63 struct objcache *cache_buffer_read;
64 struct objcache *cache_buffer_write;
65
66 /* 
67  * Callback used in read path in case that a block is compressed with LZ4.
68  */
69 static
70 void
71 hammer2_decompress_LZ4_callback(const char *data, u_int bytes, struct bio *bio)
72 {
73         struct buf *bp;
74         char *compressed_buffer;
75         int compressed_size;
76         int result;
77
78         bp = bio->bio_buf;
79
80 #if 0
81         if bio->bio_caller_info2.index &&
82               bio->bio_caller_info1.uvalue32 !=
83               crc32(bp->b_data, bp->b_bufsize) --- return error
84 #endif
85
86         KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE);
87         compressed_size = *(const int *)data;
88         KKASSERT(compressed_size <= bytes - sizeof(int));
89
90         compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT);
91         result = LZ4_decompress_safe(__DECONST(char *, &data[sizeof(int)]),
92                                      compressed_buffer,
93                                      compressed_size,
94                                      bp->b_bufsize);
95         if (result < 0) {
96                 kprintf("READ PATH: Error during decompression."
97                         "bio %016jx/%d\n",
98                         (intmax_t)bio->bio_offset, bytes);
99                 /* make sure it isn't random garbage */
100                 bzero(compressed_buffer, bp->b_bufsize);
101         }
102         KKASSERT(result <= bp->b_bufsize);
103         bcopy(compressed_buffer, bp->b_data, bp->b_bufsize);
104         if (result < bp->b_bufsize)
105                 bzero(bp->b_data + result, bp->b_bufsize - result);
106         objcache_put(cache_buffer_read, compressed_buffer);
107         bp->b_resid = 0;
108         bp->b_flags |= B_AGE;
109 }
110
111 /*
112  * Callback used in read path in case that a block is compressed with ZLIB.
113  * It is almost identical to LZ4 callback, so in theory they can be unified,
114  * but we didn't want to make changes in bio structure for that.
115  */
116 static
117 void
118 hammer2_decompress_ZLIB_callback(const char *data, u_int bytes, struct bio *bio)
119 {
120         struct buf *bp;
121         char *compressed_buffer;
122         z_stream strm_decompress;
123         int result;
124         int ret;
125
126         bp = bio->bio_buf;
127
128         KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE);
129         strm_decompress.avail_in = 0;
130         strm_decompress.next_in = Z_NULL;
131
132         ret = inflateInit(&strm_decompress);
133
134         if (ret != Z_OK)
135                 kprintf("HAMMER2 ZLIB: Fatal error in inflateInit.\n");
136
137         compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT);
138         strm_decompress.next_in = __DECONST(char *, data);
139
140         /* XXX supply proper size, subset of device bp */
141         strm_decompress.avail_in = bytes;
142         strm_decompress.next_out = compressed_buffer;
143         strm_decompress.avail_out = bp->b_bufsize;
144
145         ret = inflate(&strm_decompress, Z_FINISH);
146         if (ret != Z_STREAM_END) {
147                 kprintf("HAMMER2 ZLIB: Fatar error during decompression.\n");
148                 bzero(compressed_buffer, bp->b_bufsize);
149         }
150         bcopy(compressed_buffer, bp->b_data, bp->b_bufsize);
151         result = bp->b_bufsize - strm_decompress.avail_out;
152         if (result < bp->b_bufsize)
153                 bzero(bp->b_data + result, strm_decompress.avail_out);
154         objcache_put(cache_buffer_read, compressed_buffer);
155         ret = inflateEnd(&strm_decompress);
156
157         bp->b_resid = 0;
158         bp->b_flags |= B_AGE;
159 }
160
161 /*
162  * Return the largest contiguous physical disk range for the logical
163  * request, in bytes.
164  *
165  * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
166  *
167  * Basically disabled, the logical buffer write thread has to deal with
168  * buffers one-at-a-time.
169  */
170 int
171 hammer2_vop_bmap(struct vop_bmap_args *ap)
172 {
173         *ap->a_doffsetp = NOOFFSET;
174         if (ap->a_runp)
175                 *ap->a_runp = 0;
176         if (ap->a_runb)
177                 *ap->a_runb = 0;
178         return (EOPNOTSUPP);
179 }
180
181 /*
182  * Strategy code (async logical file buffer I/O from system)
183  *
184  * WARNING: The strategy code cannot safely use hammer2 transactions
185  *          as this can deadlock against vfs_sync's vfsync() call
186  *          if multiple flushes are queued.  All H2 structures must
187  *          already be present and ready for the DIO.
188  *
189  *          Reads can be initiated asynchronously, writes have to be
190  *          spooled to a separate thread for action to avoid deadlocks.
191  */
192 static int hammer2_strategy_read(struct vop_strategy_args *ap);
193 static int hammer2_strategy_write(struct vop_strategy_args *ap);
194 static void hammer2_strategy_read_callback(hammer2_iocb_t *iocb);
195
196 int
197 hammer2_vop_strategy(struct vop_strategy_args *ap)
198 {
199         struct bio *biop;
200         struct buf *bp;
201         int error;
202
203         biop = ap->a_bio;
204         bp = biop->bio_buf;
205
206         switch(bp->b_cmd) {
207         case BUF_CMD_READ:
208                 error = hammer2_strategy_read(ap);
209                 ++hammer2_iod_file_read;
210                 break;
211         case BUF_CMD_WRITE:
212                 error = hammer2_strategy_write(ap);
213                 ++hammer2_iod_file_write;
214                 break;
215         default:
216                 bp->b_error = error = EINVAL;
217                 bp->b_flags |= B_ERROR;
218                 biodone(biop);
219                 break;
220         }
221         return (error);
222 }
223
224 /*
225  * Logical buffer I/O, async read.
226  */
227 static
228 int
229 hammer2_strategy_read(struct vop_strategy_args *ap)
230 {
231         struct buf *bp;
232         struct bio *bio;
233         struct bio *nbio;
234         hammer2_inode_t *ip;
235         hammer2_cluster_t *cparent;
236         hammer2_cluster_t *cluster;
237         hammer2_key_t key_dummy;
238         hammer2_key_t lbase;
239         uint8_t btype;
240
241         bio = ap->a_bio;
242         bp = bio->bio_buf;
243         ip = VTOI(ap->a_vp);
244         nbio = push_bio(bio);
245
246         lbase = bio->bio_offset;
247         KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0);
248
249         /*
250          * Lookup the file offset.
251          */
252         cparent = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS |
253                                          HAMMER2_RESOLVE_SHARED);
254         cluster = hammer2_cluster_lookup(cparent, &key_dummy,
255                                        lbase, lbase,
256                                        HAMMER2_LOOKUP_NODATA |
257                                        HAMMER2_LOOKUP_SHARED);
258         hammer2_inode_unlock(ip, cparent);
259
260         /*
261          * Data is zero-fill if no cluster could be found
262          * (XXX or EIO on a cluster failure).
263          */
264         if (cluster == NULL) {
265                 bp->b_resid = 0;
266                 bp->b_error = 0;
267                 bzero(bp->b_data, bp->b_bcount);
268                 biodone(nbio);
269                 return(0);
270         }
271
272         /*
273          * Cluster elements must be type INODE or type DATA, but the
274          * compression mode (or not) for DATA chains can be different for
275          * each chain.  This will be handled by the callback.
276          *
277          * If the cluster already has valid data the callback will be made
278          * immediately/synchronously.
279          */
280         btype = hammer2_cluster_type(cluster);
281         if (btype != HAMMER2_BREF_TYPE_INODE &&
282             btype != HAMMER2_BREF_TYPE_DATA) {
283                 panic("READ PATH: hammer2_strategy_read: unknown bref type");
284         }
285         hammer2_cluster_load_async(cluster, hammer2_strategy_read_callback,
286                                    nbio);
287         return(0);
288 }
289
290 /*
291  * Read callback for hammer2_cluster_load_async().  The load function may
292  * start several actual I/Os but will only make one callback, typically with
293  * the first valid I/O XXX
294  */
295 static
296 void
297 hammer2_strategy_read_callback(hammer2_iocb_t *iocb)
298 {
299         struct bio *bio = iocb->ptr;    /* original logical buffer */
300         struct buf *bp = bio->bio_buf;  /* original logical buffer */
301         hammer2_chain_t *chain;
302         hammer2_cluster_t *cluster;
303         hammer2_io_t *dio;
304         char *data;
305         int i;
306
307         /*
308          * Extract data and handle iteration on I/O failure.  iocb->off
309          * is the cluster index for iteration.
310          */
311         cluster = iocb->cluster;
312         dio = iocb->dio;        /* can be NULL if iocb not in progress */
313
314         /*
315          * Work to do if INPROG set, else dio is already good or dio is
316          * NULL (which is the shortcut case if chain->data is already good).
317          */
318         if (iocb->flags & HAMMER2_IOCB_INPROG) {
319                 /*
320                  * Read attempt not yet made.  Issue an asynchronous read
321                  * if necessary and return, operation will chain back to
322                  * this function.
323                  */
324                 if ((iocb->flags & HAMMER2_IOCB_READ) == 0) {
325                         if (dio->bp == NULL ||
326                             (dio->bp->b_flags & B_CACHE) == 0) {
327                                 if (dio->bp) {
328                                         bqrelse(dio->bp);
329                                         dio->bp = NULL;
330                                 }
331                                 iocb->flags |= HAMMER2_IOCB_READ;
332                                 breadcb(dio->hmp->devvp,
333                                         dio->pbase, dio->psize,
334                                         hammer2_io_callback, iocb);
335                                 return;
336                         }
337                 }
338         }
339
340         /*
341          * If we have a DIO it is now done, check for an error and
342          * calculate the data.
343          *
344          * If there is no DIO it is an optimization by
345          * hammer2_cluster_load_async(), the data is available in
346          * chain->data.
347          */
348         if (dio) {
349                 if (dio->bp->b_flags & B_ERROR) {
350                         i = (int)iocb->lbase + 1;
351                         if (i >= cluster->nchains) {
352                                 bp->b_flags |= B_ERROR;
353                                 bp->b_error = dio->bp->b_error;
354                                 hammer2_io_complete(iocb);
355                                 biodone(bio);
356                                 hammer2_cluster_unlock(cluster);
357                                 hammer2_cluster_drop(cluster);
358                         } else {
359                                 hammer2_io_complete(iocb); /* XXX */
360                                 chain = cluster->array[i].chain;
361                                 kprintf("hammer2: IO CHAIN-%d %p\n", i, chain);
362                                 hammer2_adjreadcounter(&chain->bref,
363                                                        chain->bytes);
364                                 iocb->chain = chain;
365                                 iocb->lbase = (off_t)i;
366                                 iocb->flags = 0;
367                                 iocb->error = 0;
368                                 hammer2_io_getblk(chain->hmp,
369                                                   chain->bref.data_off,
370                                                   chain->bytes,
371                                                   iocb);
372                         }
373                         return;
374                 }
375                 chain = iocb->chain;
376                 data = hammer2_io_data(dio, chain->bref.data_off);
377         } else {
378                 /*
379                  * Special synchronous case, data present in chain->data.
380                  */
381                 chain = iocb->chain;
382                 data = (void *)chain->data;
383         }
384
385         if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
386                 /*
387                  * Data is embedded in the inode (copy from inode).
388                  */
389                 bcopy(((hammer2_inode_data_t *)data)->u.data,
390                       bp->b_data, HAMMER2_EMBEDDED_BYTES);
391                 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES,
392                       bp->b_bcount - HAMMER2_EMBEDDED_BYTES);
393                 bp->b_resid = 0;
394                 bp->b_error = 0;
395         } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
396                 /*
397                  * Data is on-media, issue device I/O and copy.
398                  *
399                  * XXX direct-IO shortcut could go here XXX.
400                  */
401                 switch (HAMMER2_DEC_COMP(chain->bref.methods)) {
402                 case HAMMER2_COMP_LZ4:
403                         hammer2_decompress_LZ4_callback(data, chain->bytes,
404                                                         bio);
405                         break;
406                 case HAMMER2_COMP_ZLIB:
407                         hammer2_decompress_ZLIB_callback(data, chain->bytes,
408                                                          bio);
409                         break;
410                 case HAMMER2_COMP_NONE:
411                         KKASSERT(chain->bytes <= bp->b_bcount);
412                         bcopy(data, bp->b_data, chain->bytes);
413                         if (chain->bytes < bp->b_bcount) {
414                                 bzero(bp->b_data + chain->bytes,
415                                       bp->b_bcount - chain->bytes);
416                         }
417                         bp->b_flags |= B_NOTMETA;
418                         bp->b_resid = 0;
419                         bp->b_error = 0;
420                         break;
421                 default:
422                         panic("hammer2_strategy_read: "
423                               "unknown compression type");
424                 }
425         } else {
426                 /* bqrelse the dio to help stabilize the call to panic() */
427                 if (dio)
428                         hammer2_io_bqrelse(&dio);
429                 panic("hammer2_strategy_read: unknown bref type");
430         }
431
432         /*
433          * Once the iocb is cleaned up the DIO (if any) will no longer be
434          * in-progress but will still have a ref.  Be sure to release
435          * the ref.
436          */
437         hammer2_io_complete(iocb);              /* physical management */
438         if (dio)                                /* physical dio & buffer */
439                 hammer2_io_bqrelse(&dio);
440         hammer2_cluster_unlock(cluster);        /* cluster management */
441         hammer2_cluster_drop(cluster);          /* cluster management */
442         biodone(bio);                           /* logical buffer */
443 }
444
445 static
446 int
447 hammer2_strategy_write(struct vop_strategy_args *ap)
448 {       
449         hammer2_pfs_t *pmp;
450         struct bio *bio;
451         struct buf *bp;
452         hammer2_inode_t *ip;
453         
454         bio = ap->a_bio;
455         bp = bio->bio_buf;
456         ip = VTOI(ap->a_vp);
457         pmp = ip->pmp;
458         
459         hammer2_lwinprog_ref(pmp);
460         hammer2_trans_assert_strategy(pmp);
461         hammer2_mtx_ex(&pmp->wthread_mtx);
462         if (TAILQ_EMPTY(&pmp->wthread_bioq.queue)) {
463                 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio);
464                 hammer2_mtx_unlock(&pmp->wthread_mtx);
465                 wakeup(&pmp->wthread_bioq);
466         } else {
467                 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio);
468                 hammer2_mtx_unlock(&pmp->wthread_mtx);
469         }
470         hammer2_lwinprog_wait(pmp);
471
472         return(0);
473 }