hammer2 - More involved refactoring of chain_repparent, cleanup
[dragonfly.git] / sys / vfs / hammer2 / hammer2_io.c
1 /*
2  * Copyright (c) 2013-2018 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include "hammer2.h"
36
37 #define HAMMER2_DOP_READ        1
38 #define HAMMER2_DOP_NEW         2
39 #define HAMMER2_DOP_NEWNZ       3
40 #define HAMMER2_DOP_READQ       4
41
42 /*
43  * Implements an abstraction layer for synchronous and asynchronous
44  * buffered device I/O.  Can be used as an OS-abstraction but the main
45  * purpose is to allow larger buffers to be used against hammer2_chain's
46  * using smaller allocations, without causing deadlocks.
47  *
48  * The DIOs also record temporary state with limited persistence.  This
49  * feature is used to keep track of dedupable blocks.
50  */
51 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
52 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp);
53
54 static int
55 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
56 {
57         if (io1->pbase < io2->pbase)
58                 return(-1);
59         if (io1->pbase > io2->pbase)
60                 return(1);
61         return(0);
62 }
63
64 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
65 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
66                 off_t, pbase);
67
68 struct hammer2_cleanupcb_info {
69         struct hammer2_io_tree tmptree;
70         int     count;
71 };
72
73 #if 0
74 static __inline
75 uint64_t
76 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
77 {
78         uint64_t mask;
79         int i;
80
81         if (bytes < 1024)       /* smaller chunks not supported */
82                 return 0;
83
84         /*
85          * Calculate crc check mask for larger chunks
86          */
87         i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) &
88              HAMMER2_PBUFMASK) >> 10;
89         if (i == 0 && bytes == HAMMER2_PBUFSIZE)
90                 return((uint64_t)-1);
91         mask = ((uint64_t)1U << (bytes >> 10)) - 1;
92         mask <<= i;
93
94         return mask;
95 }
96 #endif
97
98 /*
99  * Returns the DIO corresponding to the data|radix, creating it if necessary.
100  *
101  * If createit is 0, NULL can be returned indicating that the DIO does not
102  * exist.  (btype) is ignored when createit is 0.
103  */
104 static __inline
105 hammer2_io_t *
106 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype,
107                  int createit, int *isgoodp)
108 {
109         hammer2_io_t *dio;
110         hammer2_io_t *xio;
111         hammer2_key_t lbase;
112         hammer2_key_t pbase;
113         hammer2_key_t pmask;
114         uint64_t refs;
115         int lsize;
116         int psize;
117
118         psize = HAMMER2_PBUFSIZE;
119         pmask = ~(hammer2_off_t)(psize - 1);
120         lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX);
121         lbase = data_off & ~HAMMER2_OFF_MASK_RADIX;
122         pbase = lbase & pmask;
123
124         if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
125                 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
126                         pbase, lbase, lsize, pmask);
127         }
128         KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
129         *isgoodp = 0;
130
131         /*
132          * Access/Allocate the DIO, bump dio->refs to prevent destruction.
133          */
134         hammer2_spin_sh(&hmp->io_spin);
135         dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
136         if (dio) {
137                 refs = atomic_fetchadd_64(&dio->refs, 1);
138                 if ((refs & HAMMER2_DIO_MASK) == 0) {
139                         atomic_add_int(&dio->hmp->iofree_count, -1);
140                 }
141                 if (refs & HAMMER2_DIO_GOOD)
142                         *isgoodp = 1;
143                 hammer2_spin_unsh(&hmp->io_spin);
144         } else if (createit) {
145                 refs = 0;
146                 hammer2_spin_unsh(&hmp->io_spin);
147                 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
148                 dio->hmp = hmp;
149                 dio->pbase = pbase;
150                 dio->psize = psize;
151                 dio->btype = btype;
152                 dio->refs = refs + 1;
153                 dio->act = 5;
154                 hammer2_spin_ex(&hmp->io_spin);
155                 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
156                 if (xio == NULL) {
157                         atomic_add_int(&hammer2_dio_count, 1);
158                         hammer2_spin_unex(&hmp->io_spin);
159                 } else {
160                         refs = atomic_fetchadd_64(&xio->refs, 1);
161                         if ((refs & HAMMER2_DIO_MASK) == 0)
162                                 atomic_add_int(&xio->hmp->iofree_count, -1);
163                         if (refs & HAMMER2_DIO_GOOD)
164                                 *isgoodp = 1;
165                         hammer2_spin_unex(&hmp->io_spin);
166                         kfree(dio, M_HAMMER2);
167                         dio = xio;
168                 }
169         } else {
170                 hammer2_spin_unsh(&hmp->io_spin);
171                 return NULL;
172         }
173         dio->ticks = ticks;
174         if (dio->act < 10)
175                 ++dio->act;
176
177         return dio;
178 }
179
180 /*
181  * Acquire the requested dio.  If DIO_GOOD is not set we must instantiate
182  * a buffer.  If set the buffer already exists and is good to go.
183  */
184 hammer2_io_t *
185 hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, int op)
186 {
187         hammer2_io_t *dio;
188         off_t peof;
189         uint64_t orefs;
190         uint64_t nrefs;
191         int isgood;
192         int error;
193         int hce;
194         int bflags;
195
196         bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0);
197         bflags |= B_KVABIO;
198
199         KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
200
201         if (op == HAMMER2_DOP_READQ) {
202                 dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood);
203                 if (dio == NULL)
204                         return NULL;
205                 op = HAMMER2_DOP_READ;
206         } else {
207                 dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood);
208         }
209
210         for (;;) {
211                 orefs = dio->refs;
212                 cpu_ccfence();
213
214                 /*
215                  * Buffer is already good, handle the op and return.
216                  */
217                 if (orefs & HAMMER2_DIO_GOOD) {
218                         if (isgood == 0)
219                                 cpu_mfence();
220                         bkvasync(dio->bp);
221
222                         switch(op) {
223                         case HAMMER2_DOP_NEW:
224                                 bzero(hammer2_io_data(dio, lbase), lsize);
225                                 /* fall through */
226                         case HAMMER2_DOP_NEWNZ:
227                                 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
228                                 break;
229                         case HAMMER2_DOP_READ:
230                         default:
231                                 /* nothing to do */
232                                 break;
233                         }
234                         return (dio);
235                 }
236
237                 /*
238                  * Try to own the DIO
239                  */
240                 if (orefs & HAMMER2_DIO_INPROG) {
241                         nrefs = orefs | HAMMER2_DIO_WAITING;
242                         tsleep_interlock(dio, 0);
243                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
244                                 tsleep(dio, PINTERLOCKED, "h2dio", hz);
245                         }
246                         /* retry */
247                 } else {
248                         nrefs = orefs | HAMMER2_DIO_INPROG;
249                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
250                                 break;
251                         }
252                 }
253         }
254
255         /*
256          * We break to here if GOOD is not set and we acquired INPROG for
257          * the I/O.
258          */
259         KKASSERT(dio->bp == NULL);
260         if (btype == HAMMER2_BREF_TYPE_DATA)
261                 hce = hammer2_cluster_data_read;
262         else
263                 hce = hammer2_cluster_meta_read;
264
265         error = 0;
266         if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) &&
267             dio->psize == lsize) {
268                 switch(op) {
269                 case HAMMER2_DOP_NEW:
270                 case HAMMER2_DOP_NEWNZ:
271                         dio->bp = getblk(dio->hmp->devvp,
272                                          dio->pbase, dio->psize,
273                                          GETBLK_KVABIO, 0);
274                         if (op == HAMMER2_DOP_NEW) {
275                                 bkvasync(dio->bp);
276                                 bzero(dio->bp->b_data, dio->psize);
277                         }
278                         atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
279                         break;
280                 case HAMMER2_DOP_READ:
281                 default:
282                         if (hce > 0) {
283                                 /*
284                                  * Synchronous cluster I/O for now.
285                                  */
286                                 peof = (dio->pbase + HAMMER2_SEGMASK64) &
287                                        ~HAMMER2_SEGMASK64;
288                                 dio->bp = NULL;
289                                 error = cluster_readx(dio->hmp->devvp,
290                                                      peof, dio->pbase,
291                                                      dio->psize, bflags,
292                                                      dio->psize,
293                                                      HAMMER2_PBUFSIZE*hce,
294                                                      &dio->bp);
295                         } else {
296                                 dio->bp = NULL;
297                                 error = breadnx(dio->hmp->devvp, dio->pbase,
298                                                 dio->psize, bflags,
299                                                 NULL, NULL, 0, &dio->bp);
300                         }
301                 }
302         } else {
303                 if (hce > 0) {
304                         /*
305                          * Synchronous cluster I/O for now.
306                          */
307                         peof = (dio->pbase + HAMMER2_SEGMASK64) &
308                                ~HAMMER2_SEGMASK64;
309                         error = cluster_readx(dio->hmp->devvp,
310                                               peof, dio->pbase, dio->psize,
311                                               bflags,
312                                               dio->psize, HAMMER2_PBUFSIZE*hce,
313                                               &dio->bp);
314                 } else {
315                         error = breadnx(dio->hmp->devvp, dio->pbase,
316                                         dio->psize, bflags,
317                                         NULL, NULL, 0, &dio->bp);
318                 }
319                 if (dio->bp) {
320                         /*
321                          * Handle NEW flags
322                          */
323                         switch(op) {
324                         case HAMMER2_DOP_NEW:
325                                 bkvasync(dio->bp);
326                                 bzero(hammer2_io_data(dio, lbase), lsize);
327                                 /* fall through */
328                         case HAMMER2_DOP_NEWNZ:
329                                 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
330                                 break;
331                         case HAMMER2_DOP_READ:
332                         default:
333                                 break;
334                         }
335
336                         /*
337                          * Tell the kernel that the buffer cache is not
338                          * meta-data based on the btype.  This allows
339                          * swapcache to distinguish between data and
340                          * meta-data.
341                          */
342                         switch(btype) {
343                         case HAMMER2_BREF_TYPE_DATA:
344                                 dio->bp->b_flags |= B_NOTMETA;
345                                 break;
346                         default:
347                                 break;
348                         }
349                 }
350         }
351
352         if (dio->bp) {
353                 bkvasync(dio->bp);
354                 BUF_KERNPROC(dio->bp);
355                 dio->bp->b_flags &= ~B_AGE;
356         }
357         dio->error = error;
358
359         /*
360          * Clear INPROG and WAITING, set GOOD wake up anyone waiting.
361          */
362         for (;;) {
363                 orefs = dio->refs;
364                 cpu_ccfence();
365                 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING);
366                 if (error == 0)
367                         nrefs |= HAMMER2_DIO_GOOD;
368                 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
369                         if (orefs & HAMMER2_DIO_WAITING)
370                                 wakeup(dio);
371                         break;
372                 }
373                 cpu_pause();
374         }
375
376         /* XXX error handling */
377
378         return dio;
379 }
380
381 /*
382  * Release our ref on *diop.
383  *
384  * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose
385  * of dio->bp.  Then we clean up DIO_INPROG and DIO_WAITING.
386  */
387 void
388 hammer2_io_putblk(hammer2_io_t **diop)
389 {
390         hammer2_dev_t *hmp;
391         hammer2_io_t *dio;
392         struct buf *bp;
393         off_t pbase;
394         int psize;
395         int dio_limit;
396         uint64_t orefs;
397         uint64_t nrefs;
398
399         dio = *diop;
400         *diop = NULL;
401         hmp = dio->hmp;
402
403         KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0);
404
405         /*
406          * Drop refs.
407          *
408          * On the 1->0 transition clear GOOD and set INPROG, and break.
409          * On any other transition we can return early.
410          */
411         for (;;) {
412                 orefs = dio->refs;
413                 cpu_ccfence();
414
415                 if ((orefs & HAMMER2_DIO_MASK) == 1 &&
416                     (orefs & HAMMER2_DIO_INPROG) == 0) {
417                         /*
418                          * Lastdrop case, INPROG can be set.
419                          */
420                         nrefs = orefs - 1;
421                         nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
422                         nrefs |= HAMMER2_DIO_INPROG;
423                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
424                                 break;
425                 } else if ((orefs & HAMMER2_DIO_MASK) == 1) {
426                         /*
427                          * Lastdrop case, INPROG already set.  We must
428                          * wait for INPROG to clear.
429                          */
430                         nrefs = orefs | HAMMER2_DIO_WAITING;
431                         tsleep_interlock(dio, 0);
432                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
433                                 tsleep(dio, PINTERLOCKED, "h2dio", hz);
434                         }
435                         /* retry */
436                 } else {
437                         /*
438                          * Normal drop case.
439                          */
440                         nrefs = orefs - 1;
441                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
442                                 return;
443                         /* retry */
444                 }
445                 cpu_pause();
446                 /* retry */
447         }
448
449         /*
450          * Lastdrop (1->0 transition).  INPROG has been set, GOOD and DIRTY
451          * have been cleared.  iofree_count has not yet been incremented,
452          * note that another accessor race will decrement iofree_count so
453          * we have to increment it regardless.
454          *
455          * We can now dispose of the buffer, and should do it before calling
456          * io_complete() in case there's a race against a new reference
457          * which causes io_complete() to chain and instantiate the bp again.
458          */
459         pbase = dio->pbase;
460         psize = dio->psize;
461         bp = dio->bp;
462         dio->bp = NULL;
463
464         if ((orefs & HAMMER2_DIO_GOOD) && bp) {
465                 /*
466                  * Non-errored disposal of bp
467                  */
468                 if (orefs & HAMMER2_DIO_DIRTY) {
469                         dio_write_stats_update(dio, bp);
470
471                         /*
472                          * Allows dirty buffers to accumulate and
473                          * possibly be canceled (e.g. by a 'rm'),
474                          * will burst-write later.  Allow the kernel
475                          * to cluster the dirty buffers.
476                          *
477                          * NOTE: Do not use cluster_write() here.  The
478                          *       problem is that due to the way chains
479                          *       are locked, buffers are cycled in and out
480                          *       quite often so the disposal here is not
481                          *       necessarily the final disposal.  Avoid
482                          *       excessive rewriting of the same blocks
483                          *       by using bdwrite().
484                          */
485 #if 0
486                         off_t peof;
487                         int hce;
488
489                         if ((hce = hammer2_cluster_write) > 0) {
490                                 /*
491                                  * Allows write-behind to keep the buffer
492                                  * cache sane.
493                                  */
494                                 peof = (pbase + HAMMER2_SEGMASK64) &
495                                        ~HAMMER2_SEGMASK64;
496                                 bp->b_flags |= B_CLUSTEROK;
497                                 cluster_write(bp, peof, psize, hce);
498                         } else
499 #endif
500                         {
501                                 bp->b_flags |= B_CLUSTEROK;
502                                 bdwrite(bp);
503                         }
504                 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
505                         brelse(bp);
506                 } else {
507                         bqrelse(bp);
508                 }
509         } else if (bp) {
510                 /*
511                  * Errored disposal of bp
512                  */
513                 brelse(bp);
514         }
515
516         /*
517          * Update iofree_count before disposing of the dio
518          */
519         hmp = dio->hmp;
520         atomic_add_int(&hmp->iofree_count, 1);
521
522         /*
523          * Clear INPROG, GOOD, and WAITING
524          */
525         for (;;) {
526                 orefs = dio->refs;
527                 cpu_ccfence();
528                 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD |
529                                   HAMMER2_DIO_WAITING);
530                 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
531                         if (orefs & HAMMER2_DIO_WAITING)
532                                 wakeup(dio);
533                         break;
534                 }
535                 cpu_pause();
536         }
537
538         /*
539          * We cache free buffers so re-use cases can use a shared lock, but
540          * if too many build up we have to clean them out.
541          */
542         dio_limit = hammer2_dio_limit;
543         if (dio_limit < 256)
544                 dio_limit = 256;
545         if (dio_limit > 1024*1024)
546                 dio_limit = 1024*1024;
547         if (hmp->iofree_count > dio_limit) {
548                 struct hammer2_cleanupcb_info info;
549
550                 RB_INIT(&info.tmptree);
551                 hammer2_spin_ex(&hmp->io_spin);
552                 if (hmp->iofree_count > dio_limit) {
553                         info.count = hmp->iofree_count / 5;
554                         RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
555                                 hammer2_io_cleanup_callback, &info);
556                 }
557                 hammer2_spin_unex(&hmp->io_spin);
558                 hammer2_io_cleanup(hmp, &info.tmptree);
559         }
560 }
561
562 /*
563  * Cleanup any dio's with (INPROG | refs) == 0.
564  *
565  * Called to clean up cached DIOs on umount after all activity has been
566  * flushed.
567  */
568 static
569 int
570 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
571 {
572         struct hammer2_cleanupcb_info *info = arg;
573         hammer2_io_t *xio;
574
575         if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
576                 if (dio->act > 0) {
577                         int act;
578
579                         act = dio->act - (ticks - dio->ticks) / hz - 1;
580                         if (act > 0) {
581                                 dio->act = act;
582                                 return 0;
583                         }
584                         dio->act = 0;
585                 }
586                 KKASSERT(dio->bp == NULL);
587                 if (info->count > 0) {
588                         RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
589                         xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
590                         KKASSERT(xio == NULL);
591                         --info->count;
592                 }
593         }
594         return 0;
595 }
596
597 void
598 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree)
599 {
600         hammer2_io_t *dio;
601
602         while ((dio = RB_ROOT(tree)) != NULL) {
603                 RB_REMOVE(hammer2_io_tree, tree, dio);
604                 KKASSERT(dio->bp == NULL &&
605                     (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
606                 if (dio->refs & HAMMER2_DIO_DIRTY) {
607                         kprintf("hammer2_io_cleanup: Dirty buffer "
608                                 "%016jx/%d (bp=%p)\n",
609                                 dio->pbase, dio->psize, dio->bp);
610                 }
611                 kfree(dio, M_HAMMER2);
612                 atomic_add_int(&hammer2_dio_count, -1);
613                 atomic_add_int(&hmp->iofree_count, -1);
614         }
615 }
616
617 /*
618  * Returns a pointer to the requested data.
619  */
620 char *
621 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
622 {
623         struct buf *bp;
624         int off;
625
626         bp = dio->bp;
627         KKASSERT(bp != NULL);
628         bkvasync(bp);
629         off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
630         KKASSERT(off >= 0 && off < bp->b_bufsize);
631         return(bp->b_data + off);
632 }
633
634 int
635 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
636                hammer2_io_t **diop)
637 {
638         *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW);
639         return ((*diop)->error);
640 }
641
642 int
643 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
644                  hammer2_io_t **diop)
645 {
646         *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ);
647         return ((*diop)->error);
648 }
649
650 int
651 hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
652                 hammer2_io_t **diop)
653 {
654         *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_READ);
655         return ((*diop)->error);
656 }
657
658 hammer2_io_t *
659 hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, int lsize)
660 {
661         hammer2_io_t *dio;
662
663         dio = hammer2_io_getblk(hmp, 0, lbase, lsize, HAMMER2_DOP_READQ);
664         return dio;
665 }
666
667 void
668 hammer2_io_bawrite(hammer2_io_t **diop)
669 {
670         atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
671         hammer2_io_putblk(diop);
672 }
673
674 void
675 hammer2_io_bdwrite(hammer2_io_t **diop)
676 {
677         atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
678         hammer2_io_putblk(diop);
679 }
680
681 int
682 hammer2_io_bwrite(hammer2_io_t **diop)
683 {
684         atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
685         hammer2_io_putblk(diop);
686         return (0);     /* XXX */
687 }
688
689 void
690 hammer2_io_setdirty(hammer2_io_t *dio)
691 {
692         atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
693 }
694
695 /*
696  * This routine is called when a MODIFIED chain is being DESTROYED,
697  * in an attempt to allow the related buffer cache buffer to be
698  * invalidated and discarded instead of flushing it to disk.
699  *
700  * At the moment this case is only really useful for file meta-data.
701  * File data is already handled via the logical buffer cache associated
702  * with the vnode, and will be discarded if it was never flushed to disk.
703  * File meta-data may include inodes, directory entries, and indirect blocks.
704  *
705  * XXX
706  * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being
707  * invalidated might be smaller.  Most of the meta-data structures above
708  * are in the 'smaller' category.  For now, don't try to invalidate the
709  * data areas.
710  */
711 void
712 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes)
713 {
714         /* NOP */
715 }
716
717 void
718 hammer2_io_brelse(hammer2_io_t **diop)
719 {
720         hammer2_io_putblk(diop);
721 }
722
723 void
724 hammer2_io_bqrelse(hammer2_io_t **diop)
725 {
726         hammer2_io_putblk(diop);
727 }
728
729 /*
730  * Set dedup validation bits in a DIO.  We do not need the buffer cache
731  * buffer for this.  This must be done concurrent with setting bits in
732  * the freemap so as to interlock with bulkfree's clearing of those bits.
733  */
734 void
735 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref)
736 {
737         hammer2_io_t *dio;
738         uint64_t mask;
739         int lsize;
740         int isgood;
741
742         dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood);
743         lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
744         mask = hammer2_dedup_mask(dio, bref->data_off, lsize);
745         atomic_clear_64(&dio->dedup_valid, mask);
746         atomic_set_64(&dio->dedup_alloc, mask);
747         hammer2_io_putblk(&dio);
748 }
749
750 /*
751  * Clear dedup validation bits in a DIO.  This is typically done when
752  * a modified chain is destroyed or by the bulkfree code.  No buffer
753  * is needed for this operation.  If the DIO no longer exists it is
754  * equivalent to the bits not being set.
755  */
756 void
757 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
758                         hammer2_off_t data_off, u_int bytes)
759 {
760         hammer2_io_t *dio;
761         uint64_t mask;
762         int isgood;
763
764         if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
765                 return;
766         if (btype != HAMMER2_BREF_TYPE_DATA)
767                 return;
768         dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood);
769         if (dio) {
770                 if (data_off < dio->pbase ||
771                     (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes >
772                     dio->pbase + dio->psize) {
773                         panic("hammer2_dedup_delete: DATAOFF BAD "
774                               "%016jx/%d %016jx\n",
775                               data_off, bytes, dio->pbase);
776                 }
777                 mask = hammer2_dedup_mask(dio, data_off, bytes);
778                 atomic_clear_64(&dio->dedup_alloc, mask);
779                 atomic_clear_64(&dio->dedup_valid, mask);
780                 hammer2_io_putblk(&dio);
781         }
782 }
783
784 /*
785  * Assert that dedup allocation bits in a DIO are not set.  This operation
786  * does not require a buffer.  The DIO does not need to exist.
787  */
788 void
789 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes)
790 {
791         hammer2_io_t *dio;
792         int isgood;
793
794         dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA,
795                                0, &isgood);
796         if (dio) {
797                 KASSERT((dio->dedup_alloc &
798                           hammer2_dedup_mask(dio, data_off, bytes)) == 0,
799                         ("hammer2_dedup_assert: %016jx/%d %016jx/%016jx",
800                         data_off,
801                         bytes,
802                         hammer2_dedup_mask(dio, data_off, bytes),
803                         dio->dedup_alloc));
804                 hammer2_io_putblk(&dio);
805         }
806 }
807
808 static
809 void
810 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp)
811 {
812         long *counterp;
813
814         if (bp->b_flags & B_DELWRI)
815                 return;
816
817         switch(dio->btype) {
818         case 0:
819                 return;
820         case HAMMER2_BREF_TYPE_DATA:
821                 counterp = &hammer2_iod_file_write;
822                 break;
823         case HAMMER2_BREF_TYPE_DIRENT:
824         case HAMMER2_BREF_TYPE_INODE:
825                 counterp = &hammer2_iod_meta_write;
826                 break;
827         case HAMMER2_BREF_TYPE_INDIRECT:
828                 counterp = &hammer2_iod_indr_write;
829                 break;
830         case HAMMER2_BREF_TYPE_FREEMAP_NODE:
831         case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
832                 counterp = &hammer2_iod_fmap_write;
833                 break;
834         default:
835                 counterp = &hammer2_iod_volu_write;
836                 break;
837         }
838         *counterp += dio->psize;
839 }
840
841 void
842 hammer2_io_bkvasync(hammer2_io_t *dio)
843 {
844         KKASSERT(dio->bp != NULL);
845         bkvasync(dio->bp);
846 }