ac2066f91c8a7e2cd8b01c96208fef5c8a67f1ad
[dragonfly.git] / sys / vfs / hammer2 / hammer2_io.c
1 /*
2  * Copyright (c) 2013-2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include "hammer2.h"
36
37 /*
38  * Implements an abstraction layer for synchronous and asynchronous
39  * buffered device I/O.  Can be used for OS-abstraction but the main
40  * purpose is to allow larger buffers to be used against hammer2_chain's
41  * using smaller allocations, without causing deadlocks.
42  *
43  */
44 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
45
46 static int
47 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
48 {
49         if (io2->pbase < io1->pbase)
50                 return(-1);
51         if (io2->pbase > io1->pbase)
52                 return(1);
53         return(0);
54 }
55
56 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
57 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
58                 off_t, pbase);
59
60 struct hammer2_cleanupcb_info {
61         struct hammer2_io_tree tmptree;
62         int     count;
63 };
64
65 #define HAMMER2_GETBLK_GOOD     0
66 #define HAMMER2_GETBLK_QUEUED   1
67 #define HAMMER2_GETBLK_OWNED    2
68
69 /*
70  * Allocate/Locate the requested dio, reference it, issue or queue iocb.
71  */
72 void
73 hammer2_io_getblk(hammer2_mount_t *hmp, off_t lbase, int lsize,
74                   hammer2_iocb_t *iocb)
75 {
76         hammer2_io_t *dio;
77         hammer2_io_t *xio;
78         off_t pbase;
79         off_t pmask;
80         /*
81          * XXX after free, buffer reuse case w/ different size can clash
82          * with dio cache.  Lets avoid it for now.  Ultimate we need to
83          * invalidate the dio cache when freeing blocks to allow a mix
84          * of 16KB and 64KB block sizes).
85          */
86         /*int psize = hammer2_devblksize(lsize);*/
87         int psize = HAMMER2_PBUFSIZE;
88         int refs;
89
90         pmask = ~(hammer2_off_t)(psize - 1);
91
92         KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
93         lbase &= ~HAMMER2_OFF_MASK_RADIX;
94         pbase = lbase & pmask;
95         KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
96
97         /*
98          * Access/Allocate the DIO, bump dio->refs to prevent destruction.
99          */
100         spin_lock_shared(&hmp->io_spin);
101         dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
102         if (dio) {
103                 if ((atomic_fetchadd_int(&dio->refs, 1) &
104                      HAMMER2_DIO_MASK) == 0) {
105                         atomic_add_int(&dio->hmp->iofree_count, -1);
106                 }
107                 spin_unlock_shared(&hmp->io_spin);
108         } else {
109                 spin_unlock_shared(&hmp->io_spin);
110                 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
111                 dio->hmp = hmp;
112                 dio->pbase = pbase;
113                 dio->psize = psize;
114                 dio->refs = 1;
115                 spin_init(&dio->spin, "h2dio");
116                 TAILQ_INIT(&dio->iocbq);
117                 spin_lock(&hmp->io_spin);
118                 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
119                 if (xio == NULL) {
120                         atomic_add_int(&hammer2_dio_count, 1);
121                         spin_unlock(&hmp->io_spin);
122                 } else {
123                         if ((atomic_fetchadd_int(&xio->refs, 1) &
124                              HAMMER2_DIO_MASK) == 0) {
125                                 atomic_add_int(&xio->hmp->iofree_count, -1);
126                         }
127                         spin_unlock(&hmp->io_spin);
128                         kfree(dio, M_HAMMER2);
129                         dio = xio;
130                 }
131         }
132
133         /*
134          * Obtain/Validate the buffer.
135          */
136         iocb->dio = dio;
137
138         if (dio->act < 5)       /* SMP race ok */
139                 ++dio->act;
140
141         for (;;) {
142                 refs = dio->refs;
143                 cpu_ccfence();
144
145                 /*
146                  * Issue the iocb immediately if the buffer is already good.
147                  * Once set GOOD cannot be cleared until refs drops to 0.
148                  */
149                 if (refs & HAMMER2_DIO_GOOD) {
150                         iocb->callback(iocb);
151                         break;
152                 }
153
154                 /*
155                  * Try to own the DIO by setting INPROG so we can issue
156                  * I/O on it.
157                  */
158                 if (refs & HAMMER2_DIO_INPROG) {
159                         /*
160                          * If DIO_INPROG is already set then set WAITING and
161                          * queue the iocb.
162                          */
163                         spin_lock(&dio->spin);
164                         if (atomic_cmpset_int(&dio->refs, refs,
165                                               refs | HAMMER2_DIO_WAITING)) {
166                                 iocb->flags |= HAMMER2_IOCB_ONQ |
167                                                HAMMER2_IOCB_INPROG;
168                                 TAILQ_INSERT_TAIL(&dio->iocbq, iocb, entry);
169                                 spin_unlock(&dio->spin);
170                                 break;
171                         }
172                         spin_unlock(&dio->spin);
173                         /* retry */
174                 } else {
175                         /*
176                          * If DIO_INPROG is not set then set it and issue the
177                          * callback immediately to start I/O.
178                          */
179                         if (atomic_cmpset_int(&dio->refs, refs,
180                                               refs | HAMMER2_DIO_INPROG)) {
181                                 iocb->flags |= HAMMER2_IOCB_INPROG;
182                                 iocb->callback(iocb);
183                                 break;
184                         }
185                         /* retry */
186                 }
187                 /* retry */
188         }
189 }
190
191 /*
192  * The originator of the iocb is finished with it.
193  */
194 void
195 hammer2_io_complete(hammer2_iocb_t *iocb)
196 {
197         hammer2_io_t *dio = iocb->dio;
198         hammer2_iocb_t *cbtmp;
199         uint32_t orefs;
200         uint32_t nrefs;
201         uint32_t oflags;
202         uint32_t nflags;
203
204         /*
205          * If IOCB_INPROG was not set completion is synchronous due to the
206          * buffer already being good.  We can simply set IOCB_DONE and return.
207          * In this situation DIO_INPROG is not set and we have no visibility
208          * on dio->bp.
209          */
210         if ((iocb->flags & HAMMER2_IOCB_INPROG) == 0) {
211                 atomic_set_int(&iocb->flags, HAMMER2_IOCB_DONE);
212                 return;
213         }
214
215         /*
216          * The iocb was queued, obtained DIO_INPROG, and its callback was
217          * made.  The callback is now complete.  We still own DIO_INPROG.
218          *
219          * We can set DIO_GOOD if no error occurred, which gives certain
220          * stability guarantees to dio->bp and allows other accessors to
221          * short-cut access.  DIO_GOOD cannot be cleared until the last
222          * ref is dropped.
223          */
224         KKASSERT(dio->refs & HAMMER2_DIO_INPROG);
225         if (dio->bp) {
226                 BUF_KERNPROC(dio->bp);
227                 if ((dio->bp->b_flags & B_ERROR) == 0) {
228                         KKASSERT(dio->bp->b_flags & B_CACHE);
229                         atomic_set_int(&dio->refs, HAMMER2_DIO_GOOD);
230                 }
231         }
232
233         /*
234          * Clean up the dio before marking the iocb as being done.  If another
235          * iocb is pending we chain to it while leaving DIO_INPROG set (it
236          * will call io completion and presumably clear DIO_INPROG).
237          *
238          * Otherwise if no other iocbs are pending we clear DIO_INPROG before
239          * finishing up the cbio.  This means that DIO_INPROG is cleared at
240          * the end of the chain before ANY of the cbios are marked done.
241          *
242          * NOTE: The TAILQ is not stable until the spin-lock is held.
243          */
244         for (;;) {
245                 orefs = dio->refs;
246                 nrefs = orefs & ~(HAMMER2_DIO_WAITING | HAMMER2_DIO_INPROG);
247
248                 if (orefs & HAMMER2_DIO_WAITING) {
249                         spin_lock(&dio->spin);
250                         cbtmp = TAILQ_FIRST(&dio->iocbq);
251                         if (cbtmp) {
252                                 /*
253                                  * NOTE: flags not adjusted in this case.
254                                  *       Flags will be adjusted by the last
255                                  *       iocb.
256                                  */
257                                 TAILQ_REMOVE(&dio->iocbq, cbtmp, entry);
258                                 spin_unlock(&dio->spin);
259                                 cbtmp->callback(cbtmp); /* chained */
260                                 break;
261                         } else if (atomic_cmpset_int(&dio->refs,
262                                                      orefs, nrefs)) {
263                                 spin_unlock(&dio->spin);
264                                 break;
265                         }
266                         spin_unlock(&dio->spin);
267                         /* retry */
268                 } else if (atomic_cmpset_int(&dio->refs, orefs, nrefs)) {
269                         break;
270                 } /* else retry */
271                 /* retry */
272         }
273
274         /*
275          * Mark the iocb as done and wakeup any waiters.  This is done after
276          * all iocb chains have been called back and after DIO_INPROG has been
277          * cleared.  This avoids races against ref count drops by the waiting
278          * threads (a hard but not impossible SMP race) which might result in
279          * a 1->0 transition of the refs while DIO_INPROG is still set.
280          */
281         for (;;) {
282                 oflags = iocb->flags;
283                 cpu_ccfence();
284                 nflags = oflags;
285                 nflags &= ~(HAMMER2_IOCB_WAKEUP | HAMMER2_IOCB_INPROG);
286                 nflags |= HAMMER2_IOCB_DONE;
287
288                 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) {
289                         if (oflags & HAMMER2_IOCB_WAKEUP)
290                                 wakeup(iocb);
291                         /* SMP: iocb is now stale */
292                         break;
293                 }
294                 /* retry */
295         }
296         iocb = NULL;
297
298 }
299
300 /*
301  * Wait for an iocb's I/O to finish.
302  */
303 void
304 hammer2_iocb_wait(hammer2_iocb_t *iocb)
305 {
306         uint32_t oflags;
307         uint32_t nflags;
308
309         for (;;) {
310                 oflags = iocb->flags;
311                 cpu_ccfence();
312                 nflags = oflags | HAMMER2_IOCB_WAKEUP;
313                 if (oflags & HAMMER2_IOCB_DONE)
314                         break;
315                 tsleep_interlock(iocb, 0);
316                 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) {
317                         tsleep(iocb, PINTERLOCKED, "h2iocb", hz);
318                 }
319         }
320
321 }
322
323 /*
324  * Release our ref on *diop.
325  *
326  * On the last ref we must atomically clear DIO_GOOD and set DIO_INPROG,
327  * then dispose of the underlying buffer.
328  */
329 void
330 hammer2_io_putblk(hammer2_io_t **diop)
331 {
332         hammer2_mount_t *hmp;
333         hammer2_io_t *dio;
334         hammer2_iocb_t iocb;
335         struct buf *bp;
336         off_t peof;
337         off_t pbase;
338         int psize;
339         int refs;
340
341         dio = *diop;
342         *diop = NULL;
343
344         /*
345          * Drop refs, on 1->0 transition clear flags, set INPROG.
346          */
347         for (;;) {
348                 refs = dio->refs;
349
350                 if ((refs & HAMMER2_DIO_MASK) == 1) {
351                         if (refs & HAMMER2_DIO_INPROG) {
352                                 hammer2_iocb_t *xcb;
353
354                                 xcb = TAILQ_FIRST(&dio->iocbq);
355                                 kprintf("BAD REFS dio %p %08x/%08x, cbio %p\n",
356                                         dio, refs, dio->refs, xcb);
357                                 if (xcb)
358                                         kprintf("   IOCB: func=%p dio=%p cl=%p ch=%p ptr=%p\n",
359                                                 xcb->callback,
360                                                 xcb->dio,
361                                                 xcb->cluster,
362                                                 xcb->chain,
363                                                 xcb->ptr);
364                         }
365                         KKASSERT((refs & HAMMER2_DIO_INPROG) == 0);
366                         if (atomic_cmpset_int(&dio->refs, refs,
367                                               ((refs - 1) &
368                                                ~(HAMMER2_DIO_GOOD |
369                                                  HAMMER2_DIO_DIRTY)) |
370                                               HAMMER2_DIO_INPROG)) {
371                                 break;
372                         }
373                         /* retry */
374                 } else {
375                         if (atomic_cmpset_int(&dio->refs, refs, refs - 1))
376                                 return;
377                         /* retry */
378                 }
379                 /* retry */
380         }
381
382         /*
383          * We have set DIO_INPROG to gain control of the buffer and we have
384          * cleared DIO_GOOD to prevent other accessors from thinking it is
385          * still good.
386          *
387          * We can now dispose of the buffer, and should do it before calling
388          * io_complete() in case there's a race against a new reference
389          * which causes io_complete() to chain and instantiate the bp again.
390          */
391         pbase = dio->pbase;
392         psize = dio->psize;
393         bp = dio->bp;
394         dio->bp = NULL;
395
396         if (refs & HAMMER2_DIO_GOOD) {
397                 KKASSERT(bp != NULL);
398                 if (refs & HAMMER2_DIO_DIRTY) {
399                         if (hammer2_cluster_enable) {
400                                 peof = (pbase + HAMMER2_SEGMASK64) &
401                                        ~HAMMER2_SEGMASK64;
402                                 cluster_write(bp, peof, psize, 4);
403                         } else {
404                                 bp->b_flags |= B_CLUSTEROK;
405                                 bdwrite(bp);
406                         }
407                 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
408                         brelse(bp);
409                 } else {
410                         bqrelse(bp);
411                 }
412         } else if (bp) {
413                 if (refs & HAMMER2_DIO_DIRTY) {
414                         bdwrite(bp);
415                 } else {
416                         brelse(bp);
417                 }
418         }
419
420         /*
421          * The instant we call io_complete dio is a free agent again and
422          * can be ripped out from under us.
423          *
424          * we can cleanup our final DIO_INPROG by simulating an iocb
425          * completion.
426          */
427         hmp = dio->hmp;                         /* extract fields */
428         atomic_add_int(&hmp->iofree_count, 1);
429         cpu_ccfence();
430
431         iocb.dio = dio;
432         iocb.flags = HAMMER2_IOCB_INPROG;
433         hammer2_io_complete(&iocb);
434         dio = NULL;                             /* dio stale */
435
436         /*
437          * We cache free buffers so re-use cases can use a shared lock, but
438          * if too many build up we have to clean them out.
439          */
440         if (hmp->iofree_count > 1000) {
441                 struct hammer2_cleanupcb_info info;
442
443                 RB_INIT(&info.tmptree);
444                 spin_lock(&hmp->io_spin);
445                 if (hmp->iofree_count > 1000) {
446                         info.count = hmp->iofree_count / 2;
447                         RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
448                                 hammer2_io_cleanup_callback, &info);
449                 }
450                 spin_unlock(&hmp->io_spin);
451                 hammer2_io_cleanup(hmp, &info.tmptree);
452         }
453 }
454
455 /*
456  * Cleanup any dio's with (INPROG | refs) == 0.
457  *
458  * Called to clean up cached DIOs on umount after all activity has been
459  * flushed.
460  */
461 static
462 int
463 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
464 {
465         struct hammer2_cleanupcb_info *info = arg;
466         hammer2_io_t *xio;
467
468         if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
469                 if (dio->act > 0) {
470                         --dio->act;
471                         return 0;
472                 }
473                 KKASSERT(dio->bp == NULL);
474                 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
475                 xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
476                 KKASSERT(xio == NULL);
477                 if (--info->count <= 0) /* limit scan */
478                         return(-1);
479         }
480         return 0;
481 }
482
483 void
484 hammer2_io_cleanup(hammer2_mount_t *hmp, struct hammer2_io_tree *tree)
485 {
486         hammer2_io_t *dio;
487
488         while ((dio = RB_ROOT(tree)) != NULL) {
489                 RB_REMOVE(hammer2_io_tree, tree, dio);
490                 KKASSERT(dio->bp == NULL &&
491                     (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
492                 kfree(dio, M_HAMMER2);
493                 atomic_add_int(&hammer2_dio_count, -1);
494                 atomic_add_int(&hmp->iofree_count, -1);
495         }
496 }
497
498 /*
499  * Returns a pointer to the requested data.
500  */
501 char *
502 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
503 {
504         struct buf *bp;
505         int off;
506
507         bp = dio->bp;
508         KKASSERT(bp != NULL);
509         off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
510         KKASSERT(off >= 0 && off < bp->b_bufsize);
511         return(bp->b_data + off);
512 }
513
514 /*
515  * Helpers for hammer2_io_new*() functions
516  */
517 static
518 void
519 hammer2_iocb_new_callback(hammer2_iocb_t *iocb)
520 {
521         hammer2_io_t *dio = iocb->dio;
522         int gbctl = (iocb->flags & HAMMER2_IOCB_QUICK) ? GETBLK_NOWAIT : 0;
523
524         /*
525          * If IOCB_INPROG is not set the dio already has a good buffer and we
526          * can't mess with it other than zero the requested range.
527          *
528          * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
529          * do what needs to be done with dio->bp.
530          */
531         if (iocb->flags & HAMMER2_IOCB_INPROG) {
532                 if ((iocb->flags & HAMMER2_IOCB_READ) == 0) {
533                         if (iocb->lsize == dio->psize) {
534                                 /*
535                                  * Fully covered buffer, try to optimize to
536                                  * avoid any I/O.  We might already have the
537                                  * buffer due to iocb chaining.
538                                  */
539                                 if (dio->bp == NULL) {
540                                         dio->bp = getblk(dio->hmp->devvp,
541                                                          dio->pbase, dio->psize,
542                                                          gbctl, 0);
543                                 }
544                                 if (dio->bp) {
545                                         vfs_bio_clrbuf(dio->bp);
546                                         dio->bp->b_flags |= B_CACHE;
547                                 }
548                         } else if (iocb->flags & HAMMER2_IOCB_QUICK) {
549                                 /*
550                                  * Partial buffer, quick mode.  Do nothing.
551                                  * Do not instantiate the buffer or try to
552                                  * mark it B_CACHE because other portions of
553                                  * the buffer might have to be read by other
554                                  * accessors.
555                                  */
556                         } else if (dio->bp == NULL ||
557                                    (dio->bp->b_flags & B_CACHE) == 0) {
558                                 /*
559                                  * Partial buffer, normal mode, requires
560                                  * read-before-write.  Chain the read.
561                                  *
562                                  * We might already have the buffer due to
563                                  * iocb chaining.  XXX unclear if we really
564                                  * need to write/release it and reacquire
565                                  * in that case.
566                                  *
567                                  * QUEUE ASYNC I/O, IOCB IS NOT YET COMPLETE.
568                                  */
569                                 if (dio->bp) {
570                                         if (dio->refs & HAMMER2_DIO_DIRTY)
571                                                 bdwrite(dio->bp);
572                                         else
573                                                 bqrelse(dio->bp);
574                                         dio->bp = NULL;
575                                 }
576                                 atomic_set_int(&iocb->flags, HAMMER2_IOCB_READ);
577                                 breadcb(dio->hmp->devvp,
578                                         dio->pbase, dio->psize,
579                                         hammer2_io_callback, iocb);
580                                 return;
581                         } /* else buffer is good */
582                 } /* else callback from breadcb is complete */
583         }
584         if (dio->bp) {
585                 if (iocb->flags & HAMMER2_IOCB_ZERO)
586                         bzero(hammer2_io_data(dio, iocb->lbase), iocb->lsize);
587                 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
588         }
589         hammer2_io_complete(iocb);
590 }
591
592 static
593 int
594 _hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
595                 hammer2_io_t **diop, int flags)
596 {
597         hammer2_iocb_t iocb;
598         hammer2_io_t *dio;
599
600         iocb.callback = hammer2_iocb_new_callback;
601         iocb.cluster = NULL;
602         iocb.chain = NULL;
603         iocb.ptr = NULL;
604         iocb.lbase = lbase;
605         iocb.lsize = lsize;
606         iocb.flags = flags;
607         iocb.error = 0;
608         hammer2_io_getblk(hmp, lbase, lsize, &iocb);
609         if ((iocb.flags & HAMMER2_IOCB_DONE) == 0)
610                 hammer2_iocb_wait(&iocb);
611         dio = *diop = iocb.dio;
612
613         return (iocb.error);
614 }
615
616 int
617 hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
618                hammer2_io_t **diop)
619 {
620         return(_hammer2_io_new(hmp, lbase, lsize, diop, HAMMER2_IOCB_ZERO));
621 }
622
623 int
624 hammer2_io_newnz(hammer2_mount_t *hmp, off_t lbase, int lsize,
625                hammer2_io_t **diop)
626 {
627         return(_hammer2_io_new(hmp, lbase, lsize, diop, 0));
628 }
629
630 int
631 hammer2_io_newq(hammer2_mount_t *hmp, off_t lbase, int lsize,
632                hammer2_io_t **diop)
633 {
634         return(_hammer2_io_new(hmp, lbase, lsize, diop, HAMMER2_IOCB_QUICK));
635 }
636
637 static
638 void
639 hammer2_iocb_bread_callback(hammer2_iocb_t *iocb)
640 {
641         hammer2_io_t *dio = iocb->dio;
642         off_t peof;
643         int error;
644
645         /*
646          * If IOCB_INPROG is not set the dio already has a good buffer and we
647          * can't mess with it other than zero the requested range.
648          *
649          * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
650          * do what needs to be done with dio->bp.
651          */
652         if (iocb->flags & HAMMER2_IOCB_INPROG) {
653                 if (dio->bp && (dio->bp->b_flags & B_CACHE)) {
654                         /*
655                          * Already good, likely due to being chained from
656                          * another iocb.
657                          */
658                         error = 0;
659                 } else if (hammer2_cluster_enable) {
660                         /*
661                          * Synchronous cluster I/O for now.
662                          */
663                         if (dio->bp) {
664                                 bqrelse(dio->bp);
665                                 dio->bp = NULL;
666                         }
667                         peof = (dio->pbase + HAMMER2_SEGMASK64) &
668                                ~HAMMER2_SEGMASK64;
669                         error = cluster_read(dio->hmp->devvp, peof, dio->pbase,
670                                              dio->psize,
671                                              dio->psize, HAMMER2_PBUFSIZE*4,
672                                              &dio->bp);
673                 } else {
674                         /*
675                          * Synchronous I/O for now.
676                          */
677                         if (dio->bp) {
678                                 bqrelse(dio->bp);
679                                 dio->bp = NULL;
680                         }
681                         error = bread(dio->hmp->devvp, dio->pbase,
682                                       dio->psize, &dio->bp);
683                 }
684                 if (error) {
685                         brelse(dio->bp);
686                         dio->bp = NULL;
687                 }
688         }
689         hammer2_io_complete(iocb);
690 }
691
692 int
693 hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize,
694                 hammer2_io_t **diop)
695 {
696         hammer2_iocb_t iocb;
697         hammer2_io_t *dio;
698
699         iocb.callback = hammer2_iocb_bread_callback;
700         iocb.cluster = NULL;
701         iocb.chain = NULL;
702         iocb.ptr = NULL;
703         iocb.lbase = lbase;
704         iocb.lsize = lsize;
705         iocb.flags = 0;
706         iocb.error = 0;
707         hammer2_io_getblk(hmp, lbase, lsize, &iocb);
708         if ((iocb.flags & HAMMER2_IOCB_DONE) == 0)
709                 hammer2_iocb_wait(&iocb);
710         dio = *diop = iocb.dio;
711
712         return (iocb.error);
713 }
714
715 /*
716  * System buf/bio async callback extracts the iocb and chains
717  * to the iocb callback.
718  */
719 void
720 hammer2_io_callback(struct bio *bio)
721 {
722         struct buf *dbp = bio->bio_buf;
723         hammer2_iocb_t *iocb = bio->bio_caller_info1.ptr;
724         hammer2_io_t *dio;
725
726         dio = iocb->dio;
727         if ((bio->bio_flags & BIO_DONE) == 0)
728                 bpdone(dbp, 0);
729         bio->bio_flags &= ~(BIO_DONE | BIO_SYNC);
730         dio->bp = bio->bio_buf;
731         iocb->callback(iocb);
732 }
733
734 void
735 hammer2_io_bawrite(hammer2_io_t **diop)
736 {
737         atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
738         hammer2_io_putblk(diop);
739 }
740
741 void
742 hammer2_io_bdwrite(hammer2_io_t **diop)
743 {
744         atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
745         hammer2_io_putblk(diop);
746 }
747
748 int
749 hammer2_io_bwrite(hammer2_io_t **diop)
750 {
751         atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
752         hammer2_io_putblk(diop);
753         return (0);     /* XXX */
754 }
755
756 void
757 hammer2_io_setdirty(hammer2_io_t *dio)
758 {
759         atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
760 }
761
762 void
763 hammer2_io_setinval(hammer2_io_t *dio, u_int bytes)
764 {
765         if ((u_int)dio->psize == bytes)
766                 dio->bp->b_flags |= B_INVAL | B_RELBUF;
767 }
768
769 void
770 hammer2_io_brelse(hammer2_io_t **diop)
771 {
772         hammer2_io_putblk(diop);
773 }
774
775 void
776 hammer2_io_bqrelse(hammer2_io_t **diop)
777 {
778         hammer2_io_putblk(diop);
779 }
780
781 int
782 hammer2_io_isdirty(hammer2_io_t *dio)
783 {
784         return((dio->refs & HAMMER2_DIO_DIRTY) != 0);
785 }