2 * Copyright (c) 2013-2014 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * Implements an abstraction layer for synchronous and asynchronous
39 * buffered device I/O. Can be used for OS-abstraction but the main
40 * purpose is to allow larger buffers to be used against hammer2_chain's
41 * using smaller allocations, without causing deadlocks.
44 static void hammer2_io_callback(struct bio *bio);
45 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
48 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
50 if (io2->pbase < io1->pbase)
52 if (io2->pbase > io1->pbase)
57 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
58 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
61 struct hammer2_cleanupcb_info {
62 struct hammer2_io_tree tmptree;
67 #define HAMMER2_DIO_INPROG 0x80000000
68 #define HAMMER2_DIO_GOOD 0x40000000
69 #define HAMMER2_DIO_WAITING 0x20000000
70 #define HAMMER2_DIO_DIRTY 0x10000000
72 #define HAMMER2_DIO_MASK 0x0FFFFFFF
75 * Acquire the requested dio, set *ownerp based on state. If state is good
76 * *ownerp is set to 0, otherwise *ownerp is set to DIO_INPROG and the
77 * caller must resolve the buffer.
80 hammer2_io_getblk(hammer2_mount_t *hmp, off_t lbase, int lsize, int *ownerp)
86 int psize = hammer2_devblksize(lsize);
89 pmask = ~(hammer2_off_t)(psize - 1);
91 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
92 lbase &= ~HAMMER2_OFF_MASK_RADIX;
93 pbase = lbase & pmask;
94 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
97 * Access/Allocate the DIO
99 spin_lock_shared(&hmp->io_spin);
100 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
102 if ((atomic_fetchadd_int(&dio->refs, 1) &
103 HAMMER2_DIO_MASK) == 0) {
104 atomic_add_int(&dio->hmp->iofree_count, -1);
106 spin_unlock_shared(&hmp->io_spin);
108 spin_unlock_shared(&hmp->io_spin);
109 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
114 spin_lock(&hmp->io_spin);
115 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
117 atomic_add_int(&hammer2_dio_count, 1);
118 spin_unlock(&hmp->io_spin);
120 if ((atomic_fetchadd_int(&xio->refs, 1) &
121 HAMMER2_DIO_MASK) == 0) {
122 atomic_add_int(&xio->hmp->iofree_count, -1);
124 spin_unlock(&hmp->io_spin);
125 kfree(dio, M_HAMMER2);
131 * Obtain/Validate the buffer.
138 * Stop if the buffer is good. Once set GOOD the flag cannot
139 * be cleared until refs drops to 0.
141 if (refs & HAMMER2_DIO_GOOD) {
147 * We need to acquire the in-progress lock on the buffer
149 if (refs & HAMMER2_DIO_INPROG) {
150 tsleep_interlock(dio, 0);
151 if (atomic_cmpset_int(&dio->refs, refs,
152 refs | HAMMER2_DIO_WAITING)) {
153 tsleep(dio, PINTERLOCKED, "h2dio", 0);
157 if (atomic_cmpset_int(&dio->refs, refs,
158 refs | HAMMER2_DIO_INPROG)) {
166 * We need to do more work before the buffer is usable
168 *ownerp = HAMMER2_DIO_INPROG;
176 * If part of an asynchronous I/O the asynchronous I/O is biodone()'d.
178 * If the caller owned INPROG then the dio will be set GOOD or not
179 * depending on whether the caller disposed of dio->bp or not.
183 hammer2_io_complete(hammer2_io_t *dio, int owner)
188 while (owner & HAMMER2_DIO_INPROG) {
191 good = dio->bp ? HAMMER2_DIO_GOOD : 0;
192 if (atomic_cmpset_int(&dio->refs, refs,
193 (refs & ~(HAMMER2_DIO_WAITING |
194 HAMMER2_DIO_INPROG)) |
196 if (refs & HAMMER2_DIO_WAITING)
199 BUF_KERNPROC(dio->bp);
207 * Release our ref on *diop, dispose of the underlying buffer.
210 hammer2_io_putblk(hammer2_io_t **diop)
212 hammer2_mount_t *hmp;
226 if ((refs & HAMMER2_DIO_MASK) == 1) {
227 KKASSERT((refs & HAMMER2_DIO_INPROG) == 0);
228 if (atomic_cmpset_int(&dio->refs, refs,
231 HAMMER2_DIO_DIRTY)) |
232 HAMMER2_DIO_INPROG)) {
237 if (atomic_cmpset_int(&dio->refs, refs, refs - 1))
245 * Locked INPROG on 1->0 transition and we cleared DIO_GOOD (which is
246 * legal only on the last ref). This allows us to dispose of the
247 * buffer. refs is now 0.
249 * The instant we call io_complete dio is a free agent again and
250 * can be ripped out from under us. Acquisition of the dio after
251 * this point will require a shared or exclusive spinlock.
258 atomic_add_int(&hmp->iofree_count, 1);
259 hammer2_io_complete(dio, HAMMER2_DIO_INPROG); /* clears INPROG */
260 dio = NULL; /* dio stale */
262 if (refs & HAMMER2_DIO_GOOD) {
263 KKASSERT(bp != NULL);
264 if (refs & HAMMER2_DIO_DIRTY) {
265 if (hammer2_cluster_enable) {
266 peof = (pbase + HAMMER2_SEGMASK64) &
268 cluster_write(bp, peof, psize, 4);
270 bp->b_flags |= B_CLUSTEROK;
273 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
281 * We cache free buffers so re-use cases can use a shared lock, but
282 * if too many build up we have to clean them out.
284 if (hmp->iofree_count > 1000) {
285 struct hammer2_cleanupcb_info info;
287 RB_INIT(&info.tmptree);
288 spin_lock(&hmp->io_spin);
289 if (hmp->iofree_count > 1000) {
290 info.count = hmp->iofree_count / 2;
291 RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
292 hammer2_io_cleanup_callback, &info);
294 spin_unlock(&hmp->io_spin);
295 hammer2_io_cleanup(hmp, &info.tmptree);
300 * Cleanup any dio's with no references which are not in-progress.
304 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
306 struct hammer2_cleanupcb_info *info = arg;
309 if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
314 KKASSERT(dio->bp == NULL);
315 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
316 xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
317 KKASSERT(xio == NULL);
318 if (--info->count <= 0) /* limit scan */
325 hammer2_io_cleanup(hammer2_mount_t *hmp, struct hammer2_io_tree *tree)
329 while ((dio = RB_ROOT(tree)) != NULL) {
330 RB_REMOVE(hammer2_io_tree, tree, dio);
331 KKASSERT(dio->bp == NULL &&
332 (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
333 kfree(dio, M_HAMMER2);
334 atomic_add_int(&hammer2_dio_count, -1);
335 atomic_add_int(&hmp->iofree_count, -1);
340 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
346 KKASSERT(bp != NULL);
347 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
348 KKASSERT(off >= 0 && off < bp->b_bufsize);
349 return(bp->b_data + off);
354 _hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
355 hammer2_io_t **diop, int dozero, int quick)
361 dio = *diop = hammer2_io_getblk(hmp, lbase, lsize, &owner);
363 if (lsize == dio->psize) {
364 dio->bp = getblk(hmp->devvp,
365 dio->pbase, dio->psize,
366 (quick ? GETBLK_NOWAIT : 0),
369 vfs_bio_clrbuf(dio->bp);
371 dio->bp->b_flags |= B_CACHE;
381 error = bread(hmp->devvp, dio->pbase,
382 dio->psize, &dio->bp);
388 hammer2_io_complete(dio, owner);
394 bzero(hammer2_io_data(dio, lbase), lsize);
395 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
401 hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
404 return(_hammer2_io_new(hmp, lbase, lsize, diop, 1, 0));
408 hammer2_io_newnz(hammer2_mount_t *hmp, off_t lbase, int lsize,
411 return(_hammer2_io_new(hmp, lbase, lsize, diop, 0, 0));
415 hammer2_io_newq(hammer2_mount_t *hmp, off_t lbase, int lsize,
418 return(_hammer2_io_new(hmp, lbase, lsize, diop, 0, 1));
422 hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize,
430 dio = *diop = hammer2_io_getblk(hmp, lbase, lsize, &owner);
432 if (hammer2_cluster_enable) {
433 peof = (dio->pbase + HAMMER2_SEGMASK64) &
435 error = cluster_read(hmp->devvp, peof, dio->pbase,
437 dio->psize, HAMMER2_PBUFSIZE*4,
440 error = bread(hmp->devvp, dio->pbase,
441 dio->psize, &dio->bp);
447 hammer2_io_complete(dio, owner);
455 hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize,
456 void (*callback)(hammer2_io_t *dio,
457 hammer2_cluster_t *arg_l,
458 hammer2_chain_t *arg_c,
459 void *arg_p, off_t arg_o),
460 hammer2_cluster_t *arg_l, hammer2_chain_t *arg_c,
461 void *arg_p, off_t arg_o)
467 dio = hammer2_io_getblk(hmp, lbase, lsize, &owner);
469 dio->callback = callback;
474 breadcb(hmp->devvp, dio->pbase, dio->psize,
475 hammer2_io_callback, dio);
478 callback(dio, arg_l, arg_c, arg_p, arg_o);
479 hammer2_io_bqrelse(&dio);
484 hammer2_io_callback(struct bio *bio)
486 struct buf *dbp = bio->bio_buf;
487 hammer2_io_t *dio = bio->bio_caller_info1.ptr;
489 if ((bio->bio_flags & BIO_DONE) == 0)
491 bio->bio_flags &= ~(BIO_DONE | BIO_SYNC);
492 dio->bp = bio->bio_buf;
493 KKASSERT((dio->bp->b_flags & B_ERROR) == 0); /* XXX */
494 hammer2_io_complete(dio, HAMMER2_DIO_INPROG);
497 * We still have the ref and DIO_GOOD is now set so nothing else
498 * should mess with the callback fields until we release the dio.
500 dio->callback(dio, dio->arg_l, dio->arg_c, dio->arg_p, dio->arg_o);
501 hammer2_io_bqrelse(&dio);
502 /* TODO: async load meta-data and assign chain->dio */
506 hammer2_io_bawrite(hammer2_io_t **diop)
508 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
509 hammer2_io_putblk(diop);
513 hammer2_io_bdwrite(hammer2_io_t **diop)
515 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
516 hammer2_io_putblk(diop);
520 hammer2_io_bwrite(hammer2_io_t **diop)
522 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
523 hammer2_io_putblk(diop);
524 return (0); /* XXX */
528 hammer2_io_setdirty(hammer2_io_t *dio)
530 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
534 hammer2_io_setinval(hammer2_io_t *dio, u_int bytes)
536 if ((u_int)dio->psize == bytes)
537 dio->bp->b_flags |= B_INVAL | B_RELBUF;
541 hammer2_io_brelse(hammer2_io_t **diop)
543 hammer2_io_putblk(diop);
547 hammer2_io_bqrelse(hammer2_io_t **diop)
549 hammer2_io_putblk(diop);
553 hammer2_io_isdirty(hammer2_io_t *dio)
555 return((dio->refs & HAMMER2_DIO_DIRTY) != 0);