hammer2 - Fix DIO_INPROG race.
[dragonfly.git] / sys / vfs / hammer2 / hammer2_io.c
1 /*
2  * Copyright (c) 2013-2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include "hammer2.h"
36
37 /*
38  * Implements an abstraction layer for synchronous and asynchronous
39  * buffered device I/O.  Can be used for OS-abstraction but the main
40  * purpose is to allow larger buffers to be used against hammer2_chain's
41  * using smaller allocations, without causing deadlocks.
42  *
43  */
44 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
45
46 static int
47 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
48 {
49         if (io2->pbase < io1->pbase)
50                 return(-1);
51         if (io2->pbase > io1->pbase)
52                 return(1);
53         return(0);
54 }
55
56 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
57 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
58                 off_t, pbase);
59
60 struct hammer2_cleanupcb_info {
61         struct hammer2_io_tree tmptree;
62         int     count;
63 };
64
65 #define HAMMER2_GETBLK_GOOD     0
66 #define HAMMER2_GETBLK_QUEUED   1
67 #define HAMMER2_GETBLK_OWNED    2
68
69 /*
70  * Allocate/Locate the requested dio, reference it, issue or queue iocb.
71  */
72 void
73 hammer2_io_getblk(hammer2_mount_t *hmp, off_t lbase, int lsize,
74                   hammer2_iocb_t *iocb)
75 {
76         hammer2_io_t *dio;
77         hammer2_io_t *xio;
78         off_t pbase;
79         off_t pmask;
80         /*
81          * XXX after free, buffer reuse case w/ different size can clash
82          * with dio cache.  Lets avoid it for now.  Ultimate we need to
83          * invalidate the dio cache when freeing blocks to allow a mix
84          * of 16KB and 64KB block sizes).
85          */
86         /*int psize = hammer2_devblksize(lsize);*/
87         int psize = HAMMER2_PBUFSIZE;
88         int refs;
89
90         pmask = ~(hammer2_off_t)(psize - 1);
91
92         KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
93         lbase &= ~HAMMER2_OFF_MASK_RADIX;
94         pbase = lbase & pmask;
95         KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
96
97         /*
98          * Access/Allocate the DIO, bump dio->refs to prevent destruction.
99          */
100         spin_lock_shared(&hmp->io_spin);
101         dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
102         if (dio) {
103                 if ((atomic_fetchadd_int(&dio->refs, 1) &
104                      HAMMER2_DIO_MASK) == 0) {
105                         atomic_add_int(&dio->hmp->iofree_count, -1);
106                 }
107                 spin_unlock_shared(&hmp->io_spin);
108         } else {
109                 spin_unlock_shared(&hmp->io_spin);
110                 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
111                 dio->hmp = hmp;
112                 dio->pbase = pbase;
113                 dio->psize = psize;
114                 dio->refs = 1;
115                 spin_init(&dio->spin, "h2dio");
116                 TAILQ_INIT(&dio->iocbq);
117                 spin_lock(&hmp->io_spin);
118                 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
119                 if (xio == NULL) {
120                         atomic_add_int(&hammer2_dio_count, 1);
121                         spin_unlock(&hmp->io_spin);
122                 } else {
123                         if ((atomic_fetchadd_int(&xio->refs, 1) &
124                              HAMMER2_DIO_MASK) == 0) {
125                                 atomic_add_int(&xio->hmp->iofree_count, -1);
126                         }
127                         spin_unlock(&hmp->io_spin);
128                         kfree(dio, M_HAMMER2);
129                         dio = xio;
130                 }
131         }
132
133         /*
134          * Obtain/Validate the buffer.
135          */
136         iocb->dio = dio;
137
138         for (;;) {
139                 refs = dio->refs;
140                 cpu_ccfence();
141
142                 /*
143                  * Issue the iocb immediately if the buffer is already good.
144                  * Once set GOOD cannot be cleared until refs drops to 0.
145                  *
146                  * There is a race here if a chained DIO_INPROG is present
147                  * (typically DIO_INPROG and DIO_WAITING are both set
148                  *  along with GOOD).  The DIO can become GOOD but not
149                  * yet have finished its INPROG processing, causing an
150                  * assertion in putblk later on.
151                  *
152                  * To deal with this we do not take the shortcut if INPROG
153                  * is still set.
154                  */
155                 if ((refs & (HAMMER2_DIO_GOOD | HAMMER2_DIO_INPROG)) ==
156                     HAMMER2_DIO_GOOD) {
157                         iocb->callback(iocb);
158                         break;
159                 }
160
161                 /*
162                  * Try to own the DIO by setting INPROG so we can issue
163                  * I/O on it.
164                  */
165                 if (refs & HAMMER2_DIO_INPROG) {
166                         /*
167                          * If DIO_INPROG is already set then set WAITING and
168                          * queue the iocb.
169                          */
170                         spin_lock(&dio->spin);
171                         if (atomic_cmpset_int(&dio->refs, refs,
172                                               refs | HAMMER2_DIO_WAITING)) {
173                                 iocb->flags |= HAMMER2_IOCB_ONQ |
174                                                HAMMER2_IOCB_INPROG;
175                                 TAILQ_INSERT_TAIL(&dio->iocbq, iocb, entry);
176                                 spin_unlock(&dio->spin);
177                                 break;
178                         }
179                         spin_unlock(&dio->spin);
180                         /* retry */
181                 } else {
182                         /*
183                          * If DIO_INPROG is not set then set it and issue the
184                          * callback immediately to start I/O.
185                          */
186                         if (atomic_cmpset_int(&dio->refs, refs,
187                                               refs | HAMMER2_DIO_INPROG)) {
188                                 iocb->flags |= HAMMER2_IOCB_INPROG;
189                                 iocb->callback(iocb);
190                                 break;
191                         }
192                         /* retry */
193                 }
194                 /* retry */
195         }
196         if (dio->act < 5)
197                 ++dio->act;
198 }
199
200 /*
201  * The originator of the iocb is finished with it.
202  */
203 void
204 hammer2_io_complete(hammer2_iocb_t *iocb)
205 {
206         hammer2_io_t *dio = iocb->dio;
207         uint32_t orefs;
208         uint32_t nrefs;
209         uint32_t oflags;
210         uint32_t nflags;
211
212         /*
213          * If IOCB_INPROG was not set completion is synchronous due to the
214          * buffer already being good.  We can simply set IOCB_DONE and return.
215          * In this situation DIO_INPROG is not set and we have no visibility
216          * on dio->bp.
217          */
218         if ((iocb->flags & HAMMER2_IOCB_INPROG) == 0) {
219                 iocb->flags |= HAMMER2_IOCB_DONE;
220                 return;
221         }
222
223         /*
224          * The iocb was queued, obtained DIO_INPROG, and its callback was
225          * made.  The callback is now complete.  We still own DIO_INPROG.
226          *
227          * We can set DIO_GOOD if no error occurred, which gives certain
228          * stability guarantees to dio->bp and allows other accessors to
229          * short-cut access.  DIO_GOOD cannot be cleared until the last
230          * ref is dropped.
231          */
232         KKASSERT(dio->refs & HAMMER2_DIO_INPROG);
233         if (dio->bp) {
234                 BUF_KERNPROC(dio->bp);
235                 if ((dio->bp->b_flags & B_ERROR) == 0) {
236                         KKASSERT(dio->bp->b_flags & B_CACHE);
237                         atomic_set_int(&dio->refs, HAMMER2_DIO_GOOD);
238                 }
239         }
240
241         for (;;) {
242                 oflags = iocb->flags;
243                 cpu_ccfence();
244                 nflags = oflags;
245                 nflags &= ~(HAMMER2_IOCB_WAKEUP | HAMMER2_IOCB_INPROG);
246                 nflags |= HAMMER2_IOCB_DONE;
247
248                 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) {
249                         if (oflags & HAMMER2_IOCB_WAKEUP)
250                                 wakeup(iocb);
251                         /* SMP: iocb is now stale */
252                         break;
253                 }
254                 /* retry */
255         }
256         iocb = NULL;
257
258         /*
259          * Now finish up the dio.  If another iocb is pending chain to it
260          * leaving DIO_INPROG set.  Otherwise clear DIO_INPROG
261          * (and DIO_WAITING).
262          *
263          * NOTE: The TAILQ is not stable until the spin-lock is held.
264          */
265         for (;;) {
266                 orefs = dio->refs;
267                 nrefs = orefs & ~(HAMMER2_DIO_WAITING | HAMMER2_DIO_INPROG);
268
269                 if (orefs & HAMMER2_DIO_WAITING) {
270                         spin_lock(&dio->spin);
271                         iocb = TAILQ_FIRST(&dio->iocbq);
272                         if (iocb) {
273                                 TAILQ_REMOVE(&dio->iocbq, iocb, entry);
274                                 spin_unlock(&dio->spin);
275                                 iocb->callback(iocb);   /* chained */
276                                 break;
277                         } else if (atomic_cmpset_int(&dio->refs,
278                                                      orefs, nrefs)) {
279                                 spin_unlock(&dio->spin);
280                                 break;
281                         }
282                         spin_unlock(&dio->spin);
283                         /* retry */
284                 } else if (atomic_cmpset_int(&dio->refs, orefs, nrefs)) {
285                         break;
286                 } /* else retry */
287                 /* retry */
288         }
289         /* SMP: dio is stale now */
290 }
291
292 /*
293  * Wait for an iocb's I/O to finish.
294  */
295 void
296 hammer2_iocb_wait(hammer2_iocb_t *iocb)
297 {
298         uint32_t oflags;
299         uint32_t nflags;
300
301         for (;;) {
302                 oflags = iocb->flags;
303                 cpu_ccfence();
304                 nflags = oflags | HAMMER2_IOCB_WAKEUP;
305                 if (oflags & HAMMER2_IOCB_DONE)
306                         break;
307                 tsleep_interlock(iocb, 0);
308                 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) {
309                         tsleep(iocb, PINTERLOCKED, "h2iocb", hz);
310                 }
311         }
312
313 }
314
315 /*
316  * Release our ref on *diop.
317  *
318  * On the last ref we must atomically clear DIO_GOOD and set DIO_INPROG,
319  * then dispose of the underlying buffer.
320  */
321 void
322 hammer2_io_putblk(hammer2_io_t **diop)
323 {
324         hammer2_mount_t *hmp;
325         hammer2_io_t *dio;
326         hammer2_iocb_t iocb;
327         struct buf *bp;
328         off_t peof;
329         off_t pbase;
330         int psize;
331         int refs;
332
333         dio = *diop;
334         *diop = NULL;
335
336         /*
337          * Drop refs, on 1->0 transition clear flags, set INPROG.
338          */
339         for (;;) {
340                 refs = dio->refs;
341
342                 if ((refs & HAMMER2_DIO_MASK) == 1) {
343                         KKASSERT((refs & HAMMER2_DIO_INPROG) == 0);
344                         if (atomic_cmpset_int(&dio->refs, refs,
345                                               ((refs - 1) &
346                                                ~(HAMMER2_DIO_GOOD |
347                                                  HAMMER2_DIO_DIRTY)) |
348                                               HAMMER2_DIO_INPROG)) {
349                                 break;
350                         }
351                         /* retry */
352                 } else {
353                         if (atomic_cmpset_int(&dio->refs, refs, refs - 1))
354                                 return;
355                         /* retry */
356                 }
357                 /* retry */
358         }
359
360         /*
361          * We have set DIO_INPROG to gain control of the buffer and we have
362          * cleared DIO_GOOD to prevent other accessors from thinking it is
363          * still good.
364          *
365          * We can now dispose of the buffer, and should do it before calling
366          * io_complete() in case there's a race against a new reference
367          * which causes io_complete() to chain and instantiate the bp again.
368          */
369         pbase = dio->pbase;
370         psize = dio->psize;
371         bp = dio->bp;
372         dio->bp = NULL;
373
374         if (refs & HAMMER2_DIO_GOOD) {
375                 KKASSERT(bp != NULL);
376                 if (refs & HAMMER2_DIO_DIRTY) {
377                         if (hammer2_cluster_enable) {
378                                 peof = (pbase + HAMMER2_SEGMASK64) &
379                                        ~HAMMER2_SEGMASK64;
380                                 cluster_write(bp, peof, psize, 4);
381                         } else {
382                                 bp->b_flags |= B_CLUSTEROK;
383                                 bdwrite(bp);
384                         }
385                 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
386                         brelse(bp);
387                 } else {
388                         bqrelse(bp);
389                 }
390         } else if (bp) {
391                 if (refs & HAMMER2_DIO_DIRTY) {
392                         bdwrite(bp);
393                 } else {
394                         brelse(bp);
395                 }
396         }
397
398         /*
399          * The instant we call io_complete dio is a free agent again and
400          * can be ripped out from under us.
401          *
402          * we can cleanup our final DIO_INPROG by simulating an iocb
403          * completion.
404          */
405         hmp = dio->hmp;                         /* extract fields */
406         atomic_add_int(&hmp->iofree_count, 1);
407         cpu_ccfence();
408
409         iocb.dio = dio;
410         iocb.flags = HAMMER2_IOCB_INPROG;
411         hammer2_io_complete(&iocb);
412         dio = NULL;                             /* dio stale */
413
414         /*
415          * We cache free buffers so re-use cases can use a shared lock, but
416          * if too many build up we have to clean them out.
417          */
418         if (hmp->iofree_count > 1000) {
419                 struct hammer2_cleanupcb_info info;
420
421                 RB_INIT(&info.tmptree);
422                 spin_lock(&hmp->io_spin);
423                 if (hmp->iofree_count > 1000) {
424                         info.count = hmp->iofree_count / 2;
425                         RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
426                                 hammer2_io_cleanup_callback, &info);
427                 }
428                 spin_unlock(&hmp->io_spin);
429                 hammer2_io_cleanup(hmp, &info.tmptree);
430         }
431 }
432
433 /*
434  * Cleanup any dio's with (INPROG | refs) == 0.
435  *
436  * Called to clean up cached DIOs on umount after all activity has been
437  * flushed.
438  */
439 static
440 int
441 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
442 {
443         struct hammer2_cleanupcb_info *info = arg;
444         hammer2_io_t *xio;
445
446         if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
447                 if (dio->act > 0) {
448                         --dio->act;
449                         return 0;
450                 }
451                 KKASSERT(dio->bp == NULL);
452                 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
453                 xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
454                 KKASSERT(xio == NULL);
455                 if (--info->count <= 0) /* limit scan */
456                         return(-1);
457         }
458         return 0;
459 }
460
461 void
462 hammer2_io_cleanup(hammer2_mount_t *hmp, struct hammer2_io_tree *tree)
463 {
464         hammer2_io_t *dio;
465
466         while ((dio = RB_ROOT(tree)) != NULL) {
467                 RB_REMOVE(hammer2_io_tree, tree, dio);
468                 KKASSERT(dio->bp == NULL &&
469                     (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
470                 kfree(dio, M_HAMMER2);
471                 atomic_add_int(&hammer2_dio_count, -1);
472                 atomic_add_int(&hmp->iofree_count, -1);
473         }
474 }
475
476 /*
477  * Returns a pointer to the requested data.
478  */
479 char *
480 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
481 {
482         struct buf *bp;
483         int off;
484
485         bp = dio->bp;
486         KKASSERT(bp != NULL);
487         off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
488         KKASSERT(off >= 0 && off < bp->b_bufsize);
489         return(bp->b_data + off);
490 }
491
492 /*
493  * Helpers for hammer2_io_new*() functions
494  */
495 static
496 void
497 hammer2_iocb_new_callback(hammer2_iocb_t *iocb)
498 {
499         hammer2_io_t *dio = iocb->dio;
500         int gbctl = (iocb->flags & HAMMER2_IOCB_QUICK) ? GETBLK_NOWAIT : 0;
501
502         /*
503          * If IOCB_INPROG is not set the dio already has a good buffer and we
504          * can't mess with it other than zero the requested range.
505          *
506          * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
507          * do what needs to be done with dio->bp.
508          */
509         if (iocb->flags & HAMMER2_IOCB_INPROG) {
510                 if ((iocb->flags & HAMMER2_IOCB_READ) == 0) {
511                         if (iocb->lsize == dio->psize) {
512                                 /*
513                                  * Fully covered buffer, try to optimize to
514                                  * avoid any I/O.  We might already have the
515                                  * buffer due to iocb chaining.
516                                  */
517                                 if (dio->bp == NULL) {
518                                         dio->bp = getblk(dio->hmp->devvp,
519                                                          dio->pbase, dio->psize,
520                                                          gbctl, 0);
521                                 }
522                                 if (dio->bp) {
523                                         vfs_bio_clrbuf(dio->bp);
524                                         dio->bp->b_flags |= B_CACHE;
525                                 }
526                         } else if (iocb->flags & HAMMER2_IOCB_QUICK) {
527                                 /*
528                                  * Partial buffer, quick mode.  Do nothing.
529                                  * Do not instantiate the buffer or try to
530                                  * mark it B_CACHE because other portions of
531                                  * the buffer might have to be read by other
532                                  * accessors.
533                                  */
534                         } else if (dio->bp == NULL ||
535                                    (dio->bp->b_flags & B_CACHE) == 0) {
536                                 /*
537                                  * Partial buffer, normal mode, requires
538                                  * read-before-write.  Chain the read.
539                                  *
540                                  * We might already have the buffer due to
541                                  * iocb chaining.  XXX unclear if we really
542                                  * need to write/release it and reacquire
543                                  * in that case.
544                                  *
545                                  * QUEUE ASYNC I/O, IOCB IS NOT YET COMPLETE.
546                                  */
547                                 if (dio->bp) {
548                                         if (dio->refs & HAMMER2_DIO_DIRTY)
549                                                 bdwrite(dio->bp);
550                                         else
551                                                 bqrelse(dio->bp);
552                                         dio->bp = NULL;
553                                 }
554                                 iocb->flags |= HAMMER2_IOCB_READ;
555                                 breadcb(dio->hmp->devvp,
556                                         dio->pbase, dio->psize,
557                                         hammer2_io_callback, iocb);
558                                 return;
559                         } /* else buffer is good */
560                 } /* else callback from breadcb is complete */
561         }
562         if (dio->bp) {
563                 if (iocb->flags & HAMMER2_IOCB_ZERO)
564                         bzero(hammer2_io_data(dio, iocb->lbase), iocb->lsize);
565                 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
566         }
567         hammer2_io_complete(iocb);
568 }
569
570 static
571 int
572 _hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
573                 hammer2_io_t **diop, int flags)
574 {
575         hammer2_iocb_t iocb;
576         hammer2_io_t *dio;
577
578         iocb.callback = hammer2_iocb_new_callback;
579         iocb.cluster = NULL;
580         iocb.chain = NULL;
581         iocb.ptr = NULL;
582         iocb.lbase = lbase;
583         iocb.lsize = lsize;
584         iocb.flags = flags;
585         iocb.error = 0;
586         hammer2_io_getblk(hmp, lbase, lsize, &iocb);
587         if ((iocb.flags & HAMMER2_IOCB_DONE) == 0)
588                 hammer2_iocb_wait(&iocb);
589         dio = *diop = iocb.dio;
590
591         return (iocb.error);
592 }
593
594 int
595 hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
596                hammer2_io_t **diop)
597 {
598         return(_hammer2_io_new(hmp, lbase, lsize, diop, HAMMER2_IOCB_ZERO));
599 }
600
601 int
602 hammer2_io_newnz(hammer2_mount_t *hmp, off_t lbase, int lsize,
603                hammer2_io_t **diop)
604 {
605         return(_hammer2_io_new(hmp, lbase, lsize, diop, 0));
606 }
607
608 int
609 hammer2_io_newq(hammer2_mount_t *hmp, off_t lbase, int lsize,
610                hammer2_io_t **diop)
611 {
612         return(_hammer2_io_new(hmp, lbase, lsize, diop, HAMMER2_IOCB_QUICK));
613 }
614
615 static
616 void
617 hammer2_iocb_bread_callback(hammer2_iocb_t *iocb)
618 {
619         hammer2_io_t *dio = iocb->dio;
620         off_t peof;
621         int error;
622
623         /*
624          * If IOCB_INPROG is not set the dio already has a good buffer and we
625          * can't mess with it other than zero the requested range.
626          *
627          * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
628          * do what needs to be done with dio->bp.
629          */
630         if (iocb->flags & HAMMER2_IOCB_INPROG) {
631                 if (dio->bp && (dio->bp->b_flags & B_CACHE)) {
632                         /*
633                          * Already good, likely due to being chained from
634                          * another iocb.
635                          */
636                         error = 0;
637                 } else if (hammer2_cluster_enable) {
638                         /*
639                          * Synchronous cluster I/O for now.
640                          */
641                         if (dio->bp) {
642                                 bqrelse(dio->bp);
643                                 dio->bp = NULL;
644                         }
645                         peof = (dio->pbase + HAMMER2_SEGMASK64) &
646                                ~HAMMER2_SEGMASK64;
647                         error = cluster_read(dio->hmp->devvp, peof, dio->pbase,
648                                              dio->psize,
649                                              dio->psize, HAMMER2_PBUFSIZE*4,
650                                              &dio->bp);
651                 } else {
652                         /*
653                          * Synchronous I/O for now.
654                          */
655                         if (dio->bp) {
656                                 bqrelse(dio->bp);
657                                 dio->bp = NULL;
658                         }
659                         error = bread(dio->hmp->devvp, dio->pbase,
660                                       dio->psize, &dio->bp);
661                 }
662                 if (error) {
663                         brelse(dio->bp);
664                         dio->bp = NULL;
665                 }
666         }
667         hammer2_io_complete(iocb);
668 }
669
670 int
671 hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize,
672                 hammer2_io_t **diop)
673 {
674         hammer2_iocb_t iocb;
675         hammer2_io_t *dio;
676
677         iocb.callback = hammer2_iocb_bread_callback;
678         iocb.cluster = NULL;
679         iocb.chain = NULL;
680         iocb.ptr = NULL;
681         iocb.lbase = lbase;
682         iocb.lsize = lsize;
683         iocb.flags = 0;
684         iocb.error = 0;
685         hammer2_io_getblk(hmp, lbase, lsize, &iocb);
686         if ((iocb.flags & HAMMER2_IOCB_DONE) == 0)
687                 hammer2_iocb_wait(&iocb);
688         dio = *diop = iocb.dio;
689
690         return (iocb.error);
691 }
692
693 /*
694  * System buf/bio async callback extracts the iocb and chains
695  * to the iocb callback.
696  */
697 void
698 hammer2_io_callback(struct bio *bio)
699 {
700         struct buf *dbp = bio->bio_buf;
701         hammer2_iocb_t *iocb = bio->bio_caller_info1.ptr;
702         hammer2_io_t *dio;
703
704         dio = iocb->dio;
705         if ((bio->bio_flags & BIO_DONE) == 0)
706                 bpdone(dbp, 0);
707         bio->bio_flags &= ~(BIO_DONE | BIO_SYNC);
708         dio->bp = bio->bio_buf;
709         iocb->callback(iocb);
710 }
711
712 void
713 hammer2_io_bawrite(hammer2_io_t **diop)
714 {
715         atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
716         hammer2_io_putblk(diop);
717 }
718
719 void
720 hammer2_io_bdwrite(hammer2_io_t **diop)
721 {
722         atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
723         hammer2_io_putblk(diop);
724 }
725
726 int
727 hammer2_io_bwrite(hammer2_io_t **diop)
728 {
729         atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
730         hammer2_io_putblk(diop);
731         return (0);     /* XXX */
732 }
733
734 void
735 hammer2_io_setdirty(hammer2_io_t *dio)
736 {
737         atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
738 }
739
740 void
741 hammer2_io_setinval(hammer2_io_t *dio, u_int bytes)
742 {
743         if ((u_int)dio->psize == bytes)
744                 dio->bp->b_flags |= B_INVAL | B_RELBUF;
745 }
746
747 void
748 hammer2_io_brelse(hammer2_io_t **diop)
749 {
750         hammer2_io_putblk(diop);
751 }
752
753 void
754 hammer2_io_bqrelse(hammer2_io_t **diop)
755 {
756         hammer2_io_putblk(diop);
757 }
758
759 int
760 hammer2_io_isdirty(hammer2_io_t *dio)
761 {
762         return((dio->refs & HAMMER2_DIO_DIRTY) != 0);
763 }