2 * Copyright (c) 2013-2014 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * Implements an abstraction layer for synchronous and asynchronous
39 * buffered device I/O. Can be used for OS-abstraction but the main
40 * purpose is to allow larger buffers to be used against hammer2_chain's
41 * using smaller allocations, without causing deadlocks.
44 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
47 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
49 if (io2->pbase < io1->pbase)
51 if (io2->pbase > io1->pbase)
56 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
57 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
60 struct hammer2_cleanupcb_info {
61 struct hammer2_io_tree tmptree;
65 #define HAMMER2_DIO_INPROG 0x80000000
66 #define HAMMER2_DIO_GOOD 0x40000000 /* buf/bio is good */
67 #define HAMMER2_DIO_WAITING 0x20000000 /* iocb's queued */
68 #define HAMMER2_DIO_DIRTY 0x10000000 /* flush on last drop */
70 #define HAMMER2_DIO_MASK 0x0FFFFFFF
72 #define HAMMER2_GETBLK_GOOD 0
73 #define HAMMER2_GETBLK_QUEUED 1
74 #define HAMMER2_GETBLK_OWNED 2
77 * Allocate/Locate the requested dio, reference it, issue or queue iocb.
80 hammer2_io_getblk(hammer2_mount_t *hmp, off_t lbase, int lsize,
87 int psize = hammer2_devblksize(lsize);
90 pmask = ~(hammer2_off_t)(psize - 1);
92 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
93 lbase &= ~HAMMER2_OFF_MASK_RADIX;
94 pbase = lbase & pmask;
95 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
98 * Access/Allocate the DIO
100 spin_lock_shared(&hmp->io_spin);
101 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
103 if ((atomic_fetchadd_int(&dio->refs, 1) &
104 HAMMER2_DIO_MASK) == 0) {
105 atomic_add_int(&dio->hmp->iofree_count, -1);
107 spin_unlock_shared(&hmp->io_spin);
109 spin_unlock_shared(&hmp->io_spin);
110 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
115 spin_init(&dio->spin, "h2dio");
116 TAILQ_INIT(&dio->iocbq);
117 spin_lock(&hmp->io_spin);
118 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
120 atomic_add_int(&hammer2_dio_count, 1);
121 spin_unlock(&hmp->io_spin);
123 if ((atomic_fetchadd_int(&xio->refs, 1) &
124 HAMMER2_DIO_MASK) == 0) {
125 atomic_add_int(&xio->hmp->iofree_count, -1);
127 spin_unlock(&hmp->io_spin);
128 kfree(dio, M_HAMMER2);
134 * Obtain/Validate the buffer.
143 * Issue the iocb immediately if the buffer is already good.
144 * Once set GOOD cannot be cleared until refs drops to 0.
146 if (refs & HAMMER2_DIO_GOOD) {
147 iocb->callback(iocb);
152 * Try to own the buffer. If we cannot we queue the iocb.
154 if (refs & HAMMER2_DIO_INPROG) {
155 spin_lock(&dio->spin);
156 if (atomic_cmpset_int(&dio->refs, refs,
157 refs | HAMMER2_DIO_WAITING)) {
158 iocb->flags |= HAMMER2_IOCB_ONQ |
160 TAILQ_INSERT_TAIL(&dio->iocbq, iocb, entry);
161 spin_unlock(&dio->spin);
164 spin_unlock(&dio->spin);
167 if (atomic_cmpset_int(&dio->refs, refs,
168 refs | HAMMER2_DIO_INPROG)) {
169 iocb->flags |= HAMMER2_IOCB_INPROG;
170 iocb->callback(iocb);
185 hammer2_io_complete(hammer2_iocb_t *iocb)
187 hammer2_io_t *dio = iocb->dio;
194 * If IOCB_INPROG is not set then the completion was synchronous.
195 * We can set IOCB_DONE safely without having to worry about waiters.
198 if ((iocb->flags & HAMMER2_IOCB_INPROG) == 0) {
199 iocb->flags |= HAMMER2_IOCB_DONE;
204 * bp is held for all comers, make sure the lock is not owned by
205 * a particular thread.
207 if (iocb->flags & HAMMER2_IOCB_DIDBP)
208 BUF_KERNPROC(dio->bp);
211 * Set the GOOD bit on completion with no error if dio->bp is
212 * not NULL. Only applicable if INPROG was set.
214 if (dio->bp && iocb->error == 0)
215 atomic_set_int(&dio->refs, HAMMER2_DIO_GOOD);
218 oflags = iocb->flags;
221 nflags &= ~(HAMMER2_IOCB_DIDBP |
222 HAMMER2_IOCB_WAKEUP |
223 HAMMER2_IOCB_INPROG);
224 nflags |= HAMMER2_IOCB_DONE;
226 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) {
227 if (oflags & HAMMER2_IOCB_WAKEUP)
229 /* SMP: iocb is now stale */
236 * Now finish up the dio. If another iocb is pending chain to it,
237 * otherwise clear INPROG (and WAITING).
241 nrefs = orefs & ~(HAMMER2_DIO_WAITING | HAMMER2_DIO_INPROG);
243 if ((orefs & HAMMER2_DIO_WAITING) && TAILQ_FIRST(&dio->iocbq)) {
244 spin_lock(&dio->spin);
245 iocb = TAILQ_FIRST(&dio->iocbq);
247 TAILQ_REMOVE(&dio->iocbq, iocb, entry);
248 spin_unlock(&dio->spin);
249 iocb->callback(iocb); /* chained */
252 spin_unlock(&dio->spin);
254 } else if (atomic_cmpset_int(&dio->refs, orefs, nrefs)) {
259 /* SMP: dio is stale now */
266 hammer2_iocb_wait(hammer2_iocb_t *iocb)
272 oflags = iocb->flags;
274 nflags = oflags | HAMMER2_IOCB_WAKEUP;
275 if (oflags & HAMMER2_IOCB_DONE)
277 tsleep_interlock(iocb, 0);
278 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) {
279 tsleep(iocb, PINTERLOCKED, "h2iocb", hz);
286 * Release our ref on *diop, dispose of the underlying buffer, and flush
287 * on last drop if it was dirty.
290 hammer2_io_putblk(hammer2_io_t **diop)
292 hammer2_mount_t *hmp;
305 * Drop refs, on 1->0 transition clear flags, set INPROG.
310 if ((refs & HAMMER2_DIO_MASK) == 1) {
311 KKASSERT((refs & HAMMER2_DIO_INPROG) == 0);
312 if (atomic_cmpset_int(&dio->refs, refs,
315 HAMMER2_DIO_DIRTY)) |
316 HAMMER2_DIO_INPROG)) {
321 if (atomic_cmpset_int(&dio->refs, refs, refs - 1))
329 * We have set DIO_INPROG to gain control of the buffer and we have
330 * cleared DIO_GOOD to prevent other accessors from thinking it is
333 * We can now dispose of the buffer, and should do it before calling
334 * io_complete() in case there's a race against a new reference
335 * which causes io_complete() to chain and instantiate the bp again.
342 if (refs & HAMMER2_DIO_GOOD) {
343 KKASSERT(bp != NULL);
344 if (refs & HAMMER2_DIO_DIRTY) {
345 if (hammer2_cluster_enable) {
346 peof = (pbase + HAMMER2_SEGMASK64) &
348 cluster_write(bp, peof, psize, 4);
350 bp->b_flags |= B_CLUSTEROK;
353 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
359 if (refs & HAMMER2_DIO_DIRTY) {
367 * The instant we call io_complete dio is a free agent again and
368 * can be ripped out from under us.
370 * we can cleanup our final DIO_INPROG by simulating an iocb
373 hmp = dio->hmp; /* extract fields */
374 atomic_add_int(&hmp->iofree_count, 1);
378 iocb.flags = HAMMER2_IOCB_INPROG;
379 hammer2_io_complete(&iocb);
380 dio = NULL; /* dio stale */
383 * We cache free buffers so re-use cases can use a shared lock, but
384 * if too many build up we have to clean them out.
386 if (hmp->iofree_count > 1000) {
387 struct hammer2_cleanupcb_info info;
389 RB_INIT(&info.tmptree);
390 spin_lock(&hmp->io_spin);
391 if (hmp->iofree_count > 1000) {
392 info.count = hmp->iofree_count / 2;
393 RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
394 hammer2_io_cleanup_callback, &info);
396 spin_unlock(&hmp->io_spin);
397 hammer2_io_cleanup(hmp, &info.tmptree);
402 * Cleanup any dio's with (INPROG | refs) == 0.
406 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
408 struct hammer2_cleanupcb_info *info = arg;
411 if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
416 KKASSERT(dio->bp == NULL);
417 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
418 xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
419 KKASSERT(xio == NULL);
420 if (--info->count <= 0) /* limit scan */
427 hammer2_io_cleanup(hammer2_mount_t *hmp, struct hammer2_io_tree *tree)
431 while ((dio = RB_ROOT(tree)) != NULL) {
432 RB_REMOVE(hammer2_io_tree, tree, dio);
433 KKASSERT(dio->bp == NULL &&
434 (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
435 kfree(dio, M_HAMMER2);
436 atomic_add_int(&hammer2_dio_count, -1);
437 atomic_add_int(&hmp->iofree_count, -1);
442 * Returns a pointer to the requested data.
445 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
451 KKASSERT(bp != NULL);
452 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
453 KKASSERT(off >= 0 && off < bp->b_bufsize);
454 return(bp->b_data + off);
458 * Helpers for hammer2_io_new*() functions
462 hammer2_iocb_new_callback(hammer2_iocb_t *iocb)
464 hammer2_io_t *dio = iocb->dio;
465 int gbctl = (iocb->flags & HAMMER2_IOCB_QUICK) ? GETBLK_NOWAIT : 0;
468 * If INPROG is not set the dio already has a good buffer and we
469 * can't mess with it other than zero the requested range.
471 * If INPROG is set it gets a bit messy.
473 if (iocb->flags & HAMMER2_IOCB_INPROG) {
474 if ((iocb->flags & HAMMER2_IOCB_READ) == 0) {
475 if (iocb->lsize == dio->psize) {
477 * Fully covered buffer, try to optimize to
480 if (dio->bp == NULL) {
481 dio->bp = getblk(dio->hmp->devvp,
482 dio->pbase, dio->psize,
486 vfs_bio_clrbuf(dio->bp);
487 if (iocb->flags & HAMMER2_IOCB_QUICK) {
488 dio->bp->b_flags |= B_CACHE;
493 } else if (iocb->flags & HAMMER2_IOCB_QUICK) {
495 * Partial buffer, quick mode. Do nothing.
497 } else if (dio->bp == NULL ||
498 (dio->bp->b_flags & B_CACHE) == 0) {
500 * Partial buffer, normal mode, requires
501 * read-before-write. Chain the read.
504 if (dio->refs & HAMMER2_DIO_DIRTY)
510 iocb->flags |= HAMMER2_IOCB_READ;
511 breadcb(dio->hmp->devvp,
512 dio->pbase, dio->psize,
513 hammer2_io_callback, iocb);
515 } /* else buffer is good */
519 if (iocb->flags & HAMMER2_IOCB_ZERO)
520 bzero(hammer2_io_data(dio, iocb->lbase), iocb->lsize);
521 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
523 hammer2_io_complete(iocb);
528 _hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
529 hammer2_io_t **diop, int flags)
534 iocb.callback = hammer2_iocb_new_callback;
542 hammer2_io_getblk(hmp, lbase, lsize, &iocb);
543 if ((iocb.flags & HAMMER2_IOCB_DONE) == 0)
544 hammer2_iocb_wait(&iocb);
545 dio = *diop = iocb.dio;
551 hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
554 return(_hammer2_io_new(hmp, lbase, lsize, diop, HAMMER2_IOCB_ZERO));
558 hammer2_io_newnz(hammer2_mount_t *hmp, off_t lbase, int lsize,
561 return(_hammer2_io_new(hmp, lbase, lsize, diop, 0));
565 hammer2_io_newq(hammer2_mount_t *hmp, off_t lbase, int lsize,
568 return(_hammer2_io_new(hmp, lbase, lsize, diop, HAMMER2_IOCB_QUICK));
573 hammer2_iocb_bread_callback(hammer2_iocb_t *iocb)
575 hammer2_io_t *dio = iocb->dio;
579 if (iocb->flags & HAMMER2_IOCB_INPROG) {
580 if (hammer2_cluster_enable) {
581 peof = (dio->pbase + HAMMER2_SEGMASK64) &
583 error = cluster_read(dio->hmp->devvp, peof, dio->pbase,
585 dio->psize, HAMMER2_PBUFSIZE*4,
588 error = bread(dio->hmp->devvp, dio->pbase,
589 dio->psize, &dio->bp);
596 hammer2_io_complete(iocb);
600 hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize,
606 iocb.callback = hammer2_iocb_bread_callback;
614 hammer2_io_getblk(hmp, lbase, lsize, &iocb);
615 if ((iocb.flags & HAMMER2_IOCB_DONE) == 0)
616 hammer2_iocb_wait(&iocb);
617 dio = *diop = iocb.dio;
623 * System buf/bio async callback extracts the iocb and chains
624 * to the iocb callback.
627 hammer2_io_callback(struct bio *bio)
629 struct buf *dbp = bio->bio_buf;
630 hammer2_iocb_t *iocb = bio->bio_caller_info1.ptr;
634 if ((bio->bio_flags & BIO_DONE) == 0)
636 bio->bio_flags &= ~(BIO_DONE | BIO_SYNC);
637 dio->bp = bio->bio_buf;
638 iocb->callback(iocb);
642 hammer2_io_bawrite(hammer2_io_t **diop)
644 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
645 hammer2_io_putblk(diop);
649 hammer2_io_bdwrite(hammer2_io_t **diop)
651 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
652 hammer2_io_putblk(diop);
656 hammer2_io_bwrite(hammer2_io_t **diop)
658 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
659 hammer2_io_putblk(diop);
660 return (0); /* XXX */
664 hammer2_io_setdirty(hammer2_io_t *dio)
666 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
670 hammer2_io_setinval(hammer2_io_t *dio, u_int bytes)
672 if ((u_int)dio->psize == bytes)
673 dio->bp->b_flags |= B_INVAL | B_RELBUF;
677 hammer2_io_brelse(hammer2_io_t **diop)
679 hammer2_io_putblk(diop);
683 hammer2_io_bqrelse(hammer2_io_t **diop)
685 hammer2_io_putblk(diop);
689 hammer2_io_isdirty(hammer2_io_t *dio)
691 return((dio->refs & HAMMER2_DIO_DIRTY) != 0);