hammer2 - stabilization
[dragonfly.git] / sys / vfs / hammer2 / hammer2_io.c
1 /*
2  * Copyright (c) 2013-2018 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include "hammer2.h"
36
37 #define HAMMER2_DOP_READ        1
38 #define HAMMER2_DOP_NEW         2
39 #define HAMMER2_DOP_NEWNZ       3
40 #define HAMMER2_DOP_READQ       4
41
42 /*
43  * Implements an abstraction layer for synchronous and asynchronous
44  * buffered device I/O.  Can be used as an OS-abstraction but the main
45  * purpose is to allow larger buffers to be used against hammer2_chain's
46  * using smaller allocations, without causing deadlocks.
47  *
48  * The DIOs also record temporary state with limited persistence.  This
49  * feature is used to keep track of dedupable blocks.
50  */
51 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
52 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp);
53
54 static int
55 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
56 {
57         if (io1->pbase < io2->pbase)
58                 return(-1);
59         if (io1->pbase > io2->pbase)
60                 return(1);
61         return(0);
62 }
63
64 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
65 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
66                 off_t, pbase);
67
68 struct hammer2_cleanupcb_info {
69         struct hammer2_io_tree tmptree;
70         int     count;
71 };
72
73 #if 0
74 static __inline
75 uint64_t
76 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
77 {
78         uint64_t mask;
79         int i;
80
81         if (bytes < 1024)       /* smaller chunks not supported */
82                 return 0;
83
84         /*
85          * Calculate crc check mask for larger chunks
86          */
87         i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) &
88              HAMMER2_PBUFMASK) >> 10;
89         if (i == 0 && bytes == HAMMER2_PBUFSIZE)
90                 return((uint64_t)-1);
91         mask = ((uint64_t)1U << (bytes >> 10)) - 1;
92         mask <<= i;
93
94         return mask;
95 }
96 #endif
97
98 /*
99  * Returns the DIO corresponding to the data|radix, creating it if necessary.
100  *
101  * If createit is 0, NULL can be returned indicating that the DIO does not
102  * exist.  (btype) is ignored when createit is 0.
103  */
104 static __inline
105 hammer2_io_t *
106 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype,
107                  int createit, int *isgoodp)
108 {
109         hammer2_io_t *dio;
110         hammer2_io_t *xio;
111         hammer2_key_t lbase;
112         hammer2_key_t pbase;
113         hammer2_key_t pmask;
114         uint64_t refs;
115         int lsize;
116         int psize;
117
118         psize = HAMMER2_PBUFSIZE;
119         pmask = ~(hammer2_off_t)(psize - 1);
120         lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX);
121         lbase = data_off & ~HAMMER2_OFF_MASK_RADIX;
122         pbase = lbase & pmask;
123
124         if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
125                 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
126                         pbase, lbase, lsize, pmask);
127         }
128         KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
129         *isgoodp = 0;
130
131         /*
132          * Access/Allocate the DIO, bump dio->refs to prevent destruction.
133          */
134         hammer2_spin_sh(&hmp->io_spin);
135         dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
136         if (dio) {
137                 refs = atomic_fetchadd_64(&dio->refs, 1);
138                 if ((refs & HAMMER2_DIO_MASK) == 0) {
139                         atomic_add_int(&dio->hmp->iofree_count, -1);
140                 }
141                 if (refs & HAMMER2_DIO_GOOD)
142                         *isgoodp = 1;
143                 hammer2_spin_unsh(&hmp->io_spin);
144         } else if (createit) {
145                 refs = 0;
146                 hammer2_spin_unsh(&hmp->io_spin);
147                 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
148                 dio->hmp = hmp;
149                 dio->pbase = pbase;
150                 dio->psize = psize;
151                 dio->btype = btype;
152                 dio->refs = refs + 1;
153                 dio->act = 5;
154                 hammer2_spin_ex(&hmp->io_spin);
155                 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
156                 if (xio == NULL) {
157                         atomic_add_int(&hammer2_dio_count, 1);
158                         hammer2_spin_unex(&hmp->io_spin);
159                 } else {
160                         refs = atomic_fetchadd_64(&xio->refs, 1);
161                         if ((refs & HAMMER2_DIO_MASK) == 0)
162                                 atomic_add_int(&xio->hmp->iofree_count, -1);
163                         if (refs & HAMMER2_DIO_GOOD)
164                                 *isgoodp = 1;
165                         hammer2_spin_unex(&hmp->io_spin);
166                         kfree(dio, M_HAMMER2);
167                         dio = xio;
168                 }
169         } else {
170                 hammer2_spin_unsh(&hmp->io_spin);
171                 return NULL;
172         }
173         dio->ticks = ticks;
174         if (dio->act < 10)
175                 ++dio->act;
176
177         return dio;
178 }
179
180 /*
181  * Acquire the requested dio.  If DIO_GOOD is not set we must instantiate
182  * a buffer.  If set the buffer already exists and is good to go.
183  */
184 hammer2_io_t *
185 hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, int op)
186 {
187         hammer2_io_t *dio;
188         off_t peof;
189         uint64_t orefs;
190         uint64_t nrefs;
191         int isgood;
192         int error;
193         int hce;
194         int bflags;
195
196         bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0);
197         bflags |= B_KVABIO;
198
199         KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
200
201         if (op == HAMMER2_DOP_READQ) {
202                 dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood);
203                 if (dio == NULL)
204                         return NULL;
205                 op = HAMMER2_DOP_READ;
206         } else {
207                 dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood);
208         }
209
210         for (;;) {
211                 orefs = dio->refs;
212                 cpu_ccfence();
213
214                 /*
215                  * Buffer is already good, handle the op and return.
216                  */
217                 if (orefs & HAMMER2_DIO_GOOD) {
218                         if (isgood == 0)
219                                 cpu_mfence();
220                         bkvasync(dio->bp);
221
222                         switch(op) {
223                         case HAMMER2_DOP_NEW:
224                                 bzero(hammer2_io_data(dio, lbase), lsize);
225                                 /* fall through */
226                         case HAMMER2_DOP_NEWNZ:
227                                 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
228                                 break;
229                         case HAMMER2_DOP_READ:
230                         default:
231                                 /* nothing to do */
232                                 break;
233                         }
234                         return (dio);
235                 }
236
237                 /*
238                  * Try to own the DIO
239                  */
240                 if (orefs & HAMMER2_DIO_INPROG) {
241                         nrefs = orefs | HAMMER2_DIO_WAITING;
242                         tsleep_interlock(dio, 0);
243                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
244                                 tsleep(dio, PINTERLOCKED, "h2dio", hz);
245                         }
246                         /* retry */
247                 } else {
248                         nrefs = orefs | HAMMER2_DIO_INPROG;
249                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
250                                 break;
251                         }
252                 }
253         }
254
255         /*
256          * We break to here if GOOD is not set and we acquired INPROG for
257          * the I/O.
258          */
259         KKASSERT(dio->bp == NULL);
260         if (btype == HAMMER2_BREF_TYPE_DATA)
261                 hce = hammer2_cluster_data_read;
262         else
263                 hce = hammer2_cluster_meta_read;
264
265         error = 0;
266         if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) &&
267             dio->psize == lsize) {
268                 switch(op) {
269                 case HAMMER2_DOP_NEW:
270                 case HAMMER2_DOP_NEWNZ:
271                         dio->bp = getblk(dio->hmp->devvp,
272                                          dio->pbase, dio->psize,
273                                          GETBLK_KVABIO, 0);
274                         if (op == HAMMER2_DOP_NEW) {
275                                 bkvasync(dio->bp);
276                                 bzero(dio->bp->b_data, dio->psize);
277                         }
278                         atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
279                         break;
280                 case HAMMER2_DOP_READ:
281                 default:
282                         if (hce > 0) {
283                                 /*
284                                  * Synchronous cluster I/O for now.
285                                  */
286                                 peof = (dio->pbase + HAMMER2_SEGMASK64) &
287                                        ~HAMMER2_SEGMASK64;
288                                 dio->bp = NULL;
289                                 error = cluster_readx(dio->hmp->devvp,
290                                                      peof, dio->pbase,
291                                                      dio->psize, bflags,
292                                                      dio->psize,
293                                                      HAMMER2_PBUFSIZE*hce,
294                                                      &dio->bp);
295                         } else {
296                                 dio->bp = NULL;
297                                 error = breadnx(dio->hmp->devvp, dio->pbase,
298                                                 dio->psize, bflags,
299                                                 NULL, NULL, 0, &dio->bp);
300                         }
301                 }
302         } else {
303                 if (hce > 0) {
304                         /*
305                          * Synchronous cluster I/O for now.
306                          */
307                         peof = (dio->pbase + HAMMER2_SEGMASK64) &
308                                ~HAMMER2_SEGMASK64;
309                         error = cluster_readx(dio->hmp->devvp,
310                                               peof, dio->pbase, dio->psize,
311                                               bflags,
312                                               dio->psize, HAMMER2_PBUFSIZE*hce,
313                                               &dio->bp);
314                 } else {
315                         error = breadnx(dio->hmp->devvp, dio->pbase,
316                                         dio->psize, bflags,
317                                         NULL, NULL, 0, &dio->bp);
318                 }
319                 if (dio->bp) {
320                         /*
321                          * Handle NEW flags
322                          */
323                         switch(op) {
324                         case HAMMER2_DOP_NEW:
325                                 bkvasync(dio->bp);
326                                 bzero(hammer2_io_data(dio, lbase), lsize);
327                                 /* fall through */
328                         case HAMMER2_DOP_NEWNZ:
329                                 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
330                                 break;
331                         case HAMMER2_DOP_READ:
332                         default:
333                                 break;
334                         }
335
336                         /*
337                          * Tell the kernel that the buffer cache is not
338                          * meta-data based on the btype.  This allows
339                          * swapcache to distinguish between data and
340                          * meta-data.
341                          */
342                         switch(btype) {
343                         case HAMMER2_BREF_TYPE_DATA:
344                                 dio->bp->b_flags |= B_NOTMETA;
345                                 break;
346                         default:
347                                 break;
348                         }
349                 }
350         }
351
352         if (dio->bp) {
353                 bkvasync(dio->bp);
354                 BUF_KERNPROC(dio->bp);
355                 dio->bp->b_flags &= ~B_AGE;
356         }
357         dio->error = error;
358
359         /*
360          * Clear INPROG and WAITING, set GOOD wake up anyone waiting.
361          */
362         for (;;) {
363                 orefs = dio->refs;
364                 cpu_ccfence();
365                 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING);
366                 if (error == 0)
367                         nrefs |= HAMMER2_DIO_GOOD;
368                 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
369                         if (orefs & HAMMER2_DIO_WAITING)
370                                 wakeup(dio);
371                         break;
372                 }
373                 cpu_pause();
374         }
375
376         /* XXX error handling */
377
378         return dio;
379 }
380
381 /*
382  * Release our ref on *diop.
383  *
384  * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose
385  * of dio->bp.  Then we clean up DIO_INPROG and DIO_WAITING.
386  */
387 void
388 hammer2_io_putblk(hammer2_io_t **diop)
389 {
390         hammer2_dev_t *hmp;
391         hammer2_io_t *dio;
392         struct buf *bp;
393         off_t pbase;
394         int psize;
395         int dio_limit;
396         uint64_t orefs;
397         uint64_t nrefs;
398
399         dio = *diop;
400         *diop = NULL;
401         hmp = dio->hmp;
402
403         KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0);
404
405         /*
406          * Drop refs.
407          *
408          * On the 1->0 transition clear GOOD and set INPROG, and break.
409          * On any other transition we can return early.
410          */
411         for (;;) {
412                 orefs = dio->refs;
413                 cpu_ccfence();
414
415                 if ((orefs & HAMMER2_DIO_MASK) == 1 &&
416                     (orefs & HAMMER2_DIO_INPROG) == 0) {
417                         /*
418                          * Lastdrop case, INPROG can be set.  GOOD must be
419                          * cleared to prevent the getblk shortcut.
420                          */
421                         nrefs = orefs - 1;
422                         nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
423                         nrefs |= HAMMER2_DIO_INPROG;
424                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
425                                 break;
426                 } else if ((orefs & HAMMER2_DIO_MASK) == 1) {
427                         /*
428                          * Lastdrop case, INPROG already set.  We must
429                          * wait for INPROG to clear.
430                          */
431                         nrefs = orefs | HAMMER2_DIO_WAITING;
432                         tsleep_interlock(dio, 0);
433                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
434                                 tsleep(dio, PINTERLOCKED, "h2dio", hz);
435                         }
436                         /* retry */
437                 } else {
438                         /*
439                          * Normal drop case.
440                          */
441                         nrefs = orefs - 1;
442                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
443                                 return;
444                         /* retry */
445                 }
446                 cpu_pause();
447                 /* retry */
448         }
449
450         /*
451          * Lastdrop (1->0 transition).  INPROG has been set, GOOD and DIRTY
452          * have been cleared.  iofree_count has not yet been incremented,
453          * note that another accessor race will decrement iofree_count so
454          * we have to increment it regardless.
455          *
456          * We can now dispose of the buffer, and should do it before calling
457          * io_complete() in case there's a race against a new reference
458          * which causes io_complete() to chain and instantiate the bp again.
459          */
460         pbase = dio->pbase;
461         psize = dio->psize;
462         bp = dio->bp;
463         dio->bp = NULL;
464
465         if ((orefs & HAMMER2_DIO_GOOD) && bp) {
466                 /*
467                  * Non-errored disposal of bp
468                  */
469                 if (orefs & HAMMER2_DIO_DIRTY) {
470                         dio_write_stats_update(dio, bp);
471
472                         /*
473                          * Allows dirty buffers to accumulate and
474                          * possibly be canceled (e.g. by a 'rm'),
475                          * will burst-write later.
476                          *
477                          * We normally do not allow the kernel to
478                          * cluster dirty buffers because H2 already
479                          * uses a large block size.
480                          *
481                          * NOTE: Do not use cluster_write() here.  The
482                          *       problem is that due to the way chains
483                          *       are locked, buffers are cycled in and out
484                          *       quite often so the disposal here is not
485                          *       necessarily the final disposal.  Avoid
486                          *       excessive rewriting of the same blocks
487                          *       by using bdwrite().
488                          */
489 #if 0
490                         off_t peof;
491                         int hce;
492
493                         if ((hce = hammer2_cluster_write) > 0) {
494                                 /*
495                                  * Allows write-behind to keep the buffer
496                                  * cache sane.
497                                  */
498                                 peof = (pbase + HAMMER2_SEGMASK64) &
499                                        ~HAMMER2_SEGMASK64;
500                                 bp->b_flags |= B_CLUSTEROK;
501                                 cluster_write(bp, peof, psize, hce);
502                         } else
503 #endif
504                         if (hammer2_cluster_write)
505                                 bp->b_flags |= B_CLUSTEROK;
506                         else
507                                 bp->b_flags &= ~B_CLUSTEROK;
508                         bdwrite(bp);
509                 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
510                         brelse(bp);
511                 } else {
512                         bqrelse(bp);
513                 }
514         } else if (bp) {
515                 /*
516                  * Errored disposal of bp
517                  */
518                 brelse(bp);
519         }
520
521         /*
522          * Update iofree_count before disposing of the dio
523          */
524         hmp = dio->hmp;
525         atomic_add_int(&hmp->iofree_count, 1);
526
527         /*
528          * Clear INPROG, GOOD, and WAITING (GOOD should already be clear).
529          */
530         for (;;) {
531                 orefs = dio->refs;
532                 cpu_ccfence();
533                 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD |
534                                   HAMMER2_DIO_WAITING);
535                 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
536                         if (orefs & HAMMER2_DIO_WAITING)
537                                 wakeup(dio);
538                         break;
539                 }
540                 cpu_pause();
541         }
542
543         /*
544          * We cache free buffers so re-use cases can use a shared lock, but
545          * if too many build up we have to clean them out.
546          */
547         dio_limit = hammer2_dio_limit;
548         if (dio_limit < 256)
549                 dio_limit = 256;
550         if (dio_limit > 1024*1024)
551                 dio_limit = 1024*1024;
552         if (hmp->iofree_count > dio_limit) {
553                 struct hammer2_cleanupcb_info info;
554
555                 RB_INIT(&info.tmptree);
556                 hammer2_spin_ex(&hmp->io_spin);
557                 if (hmp->iofree_count > dio_limit) {
558                         info.count = hmp->iofree_count / 5;
559                         RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
560                                 hammer2_io_cleanup_callback, &info);
561                 }
562                 hammer2_spin_unex(&hmp->io_spin);
563                 hammer2_io_cleanup(hmp, &info.tmptree);
564         }
565 }
566
567 /*
568  * Cleanup any dio's with (INPROG | refs) == 0.
569  *
570  * Called to clean up cached DIOs on umount after all activity has been
571  * flushed.
572  */
573 static
574 int
575 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
576 {
577         struct hammer2_cleanupcb_info *info = arg;
578         hammer2_io_t *xio;
579
580         if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
581                 if (dio->act > 0) {
582                         int act;
583
584                         act = dio->act - (ticks - dio->ticks) / hz - 1;
585                         if (act > 0) {
586                                 dio->act = act;
587                                 return 0;
588                         }
589                         dio->act = 0;
590                 }
591                 KKASSERT(dio->bp == NULL);
592                 if (info->count > 0) {
593                         RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
594                         xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
595                         KKASSERT(xio == NULL);
596                         --info->count;
597                 }
598         }
599         return 0;
600 }
601
602 void
603 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree)
604 {
605         hammer2_io_t *dio;
606
607         while ((dio = RB_ROOT(tree)) != NULL) {
608                 RB_REMOVE(hammer2_io_tree, tree, dio);
609                 KKASSERT(dio->bp == NULL &&
610                     (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
611                 if (dio->refs & HAMMER2_DIO_DIRTY) {
612                         kprintf("hammer2_io_cleanup: Dirty buffer "
613                                 "%016jx/%d (bp=%p)\n",
614                                 dio->pbase, dio->psize, dio->bp);
615                 }
616                 kfree(dio, M_HAMMER2);
617                 atomic_add_int(&hammer2_dio_count, -1);
618                 atomic_add_int(&hmp->iofree_count, -1);
619         }
620 }
621
622 /*
623  * Returns a pointer to the requested data.
624  */
625 char *
626 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
627 {
628         struct buf *bp;
629         int off;
630
631         bp = dio->bp;
632         KKASSERT(bp != NULL);
633         bkvasync(bp);
634         off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
635         KKASSERT(off >= 0 && off < bp->b_bufsize);
636         return(bp->b_data + off);
637 }
638
639 int
640 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
641                hammer2_io_t **diop)
642 {
643         *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW);
644         return ((*diop)->error);
645 }
646
647 int
648 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
649                  hammer2_io_t **diop)
650 {
651         *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ);
652         return ((*diop)->error);
653 }
654
655 int
656 hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
657                 hammer2_io_t **diop)
658 {
659         *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_READ);
660         return ((*diop)->error);
661 }
662
663 hammer2_io_t *
664 hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, int lsize)
665 {
666         hammer2_io_t *dio;
667
668         dio = hammer2_io_getblk(hmp, 0, lbase, lsize, HAMMER2_DOP_READQ);
669         return dio;
670 }
671
672 void
673 hammer2_io_bawrite(hammer2_io_t **diop)
674 {
675         atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
676         hammer2_io_putblk(diop);
677 }
678
679 void
680 hammer2_io_bdwrite(hammer2_io_t **diop)
681 {
682         atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
683         hammer2_io_putblk(diop);
684 }
685
686 int
687 hammer2_io_bwrite(hammer2_io_t **diop)
688 {
689         atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
690         hammer2_io_putblk(diop);
691         return (0);     /* XXX */
692 }
693
694 void
695 hammer2_io_setdirty(hammer2_io_t *dio)
696 {
697         atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
698 }
699
700 /*
701  * This routine is called when a MODIFIED chain is being DESTROYED,
702  * in an attempt to allow the related buffer cache buffer to be
703  * invalidated and discarded instead of flushing it to disk.
704  *
705  * At the moment this case is only really useful for file meta-data.
706  * File data is already handled via the logical buffer cache associated
707  * with the vnode, and will be discarded if it was never flushed to disk.
708  * File meta-data may include inodes, directory entries, and indirect blocks.
709  *
710  * XXX
711  * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being
712  * invalidated might be smaller.  Most of the meta-data structures above
713  * are in the 'smaller' category.  For now, don't try to invalidate the
714  * data areas.
715  */
716 void
717 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes)
718 {
719         /* NOP */
720 }
721
722 void
723 hammer2_io_brelse(hammer2_io_t **diop)
724 {
725         hammer2_io_putblk(diop);
726 }
727
728 void
729 hammer2_io_bqrelse(hammer2_io_t **diop)
730 {
731         hammer2_io_putblk(diop);
732 }
733
734 /*
735  * Set dedup validation bits in a DIO.  We do not need the buffer cache
736  * buffer for this.  This must be done concurrent with setting bits in
737  * the freemap so as to interlock with bulkfree's clearing of those bits.
738  */
739 void
740 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref)
741 {
742         hammer2_io_t *dio;
743         uint64_t mask;
744         int lsize;
745         int isgood;
746
747         dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood);
748         lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
749         mask = hammer2_dedup_mask(dio, bref->data_off, lsize);
750         atomic_clear_64(&dio->dedup_valid, mask);
751         atomic_set_64(&dio->dedup_alloc, mask);
752         hammer2_io_putblk(&dio);
753 }
754
755 /*
756  * Clear dedup validation bits in a DIO.  This is typically done when
757  * a modified chain is destroyed or by the bulkfree code.  No buffer
758  * is needed for this operation.  If the DIO no longer exists it is
759  * equivalent to the bits not being set.
760  */
761 void
762 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
763                         hammer2_off_t data_off, u_int bytes)
764 {
765         hammer2_io_t *dio;
766         uint64_t mask;
767         int isgood;
768
769         if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
770                 return;
771         if (btype != HAMMER2_BREF_TYPE_DATA)
772                 return;
773         dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood);
774         if (dio) {
775                 if (data_off < dio->pbase ||
776                     (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes >
777                     dio->pbase + dio->psize) {
778                         panic("hammer2_dedup_delete: DATAOFF BAD "
779                               "%016jx/%d %016jx\n",
780                               data_off, bytes, dio->pbase);
781                 }
782                 mask = hammer2_dedup_mask(dio, data_off, bytes);
783                 atomic_clear_64(&dio->dedup_alloc, mask);
784                 atomic_clear_64(&dio->dedup_valid, mask);
785                 hammer2_io_putblk(&dio);
786         }
787 }
788
789 /*
790  * Assert that dedup allocation bits in a DIO are not set.  This operation
791  * does not require a buffer.  The DIO does not need to exist.
792  */
793 void
794 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes)
795 {
796         hammer2_io_t *dio;
797         int isgood;
798
799         dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA,
800                                0, &isgood);
801         if (dio) {
802                 KASSERT((dio->dedup_alloc &
803                           hammer2_dedup_mask(dio, data_off, bytes)) == 0,
804                         ("hammer2_dedup_assert: %016jx/%d %016jx/%016jx",
805                         data_off,
806                         bytes,
807                         hammer2_dedup_mask(dio, data_off, bytes),
808                         dio->dedup_alloc));
809                 hammer2_io_putblk(&dio);
810         }
811 }
812
813 static
814 void
815 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp)
816 {
817         long *counterp;
818
819         if (bp->b_flags & B_DELWRI)
820                 return;
821
822         switch(dio->btype) {
823         case 0:
824                 return;
825         case HAMMER2_BREF_TYPE_DATA:
826                 counterp = &hammer2_iod_file_write;
827                 break;
828         case HAMMER2_BREF_TYPE_DIRENT:
829         case HAMMER2_BREF_TYPE_INODE:
830                 counterp = &hammer2_iod_meta_write;
831                 break;
832         case HAMMER2_BREF_TYPE_INDIRECT:
833                 counterp = &hammer2_iod_indr_write;
834                 break;
835         case HAMMER2_BREF_TYPE_FREEMAP_NODE:
836         case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
837                 counterp = &hammer2_iod_fmap_write;
838                 break;
839         default:
840                 counterp = &hammer2_iod_volu_write;
841                 break;
842         }
843         *counterp += dio->psize;
844 }
845
846 void
847 hammer2_io_bkvasync(hammer2_io_t *dio)
848 {
849         KKASSERT(dio->bp != NULL);
850         bkvasync(dio->bp);
851 }
852
853 /*
854  * Ref a dio that is already owned
855  */
856 void
857 hammer2_io_ref(hammer2_io_t *dio)
858 {
859         atomic_add_64(&dio->refs, 1);
860 }