hammer2 - Performance work
[dragonfly.git] / sys / vfs / hammer2 / hammer2_io.c
1 /*
2  * Copyright (c) 2013-2018 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include "hammer2.h"
36
37 #define HAMMER2_DOP_READ        1
38 #define HAMMER2_DOP_NEW         2
39 #define HAMMER2_DOP_NEWNZ       3
40 #define HAMMER2_DOP_READQ       4
41
42 /*
43  * Implements an abstraction layer for synchronous and asynchronous
44  * buffered device I/O.  Can be used as an OS-abstraction but the main
45  * purpose is to allow larger buffers to be used against hammer2_chain's
46  * using smaller allocations, without causing deadlocks.
47  *
48  * The DIOs also record temporary state with limited persistence.  This
49  * feature is used to keep track of dedupable blocks.
50  */
51 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
52 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp);
53
54 static int
55 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
56 {
57         if (io1->pbase < io2->pbase)
58                 return(-1);
59         if (io1->pbase > io2->pbase)
60                 return(1);
61         return(0);
62 }
63
64 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
65 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
66                 off_t, pbase);
67
68 struct hammer2_cleanupcb_info {
69         struct hammer2_io_tree tmptree;
70         int     count;
71 };
72
73 #if 0
74 static __inline
75 uint64_t
76 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
77 {
78         uint64_t mask;
79         int i;
80
81         if (bytes < 1024)       /* smaller chunks not supported */
82                 return 0;
83
84         /*
85          * Calculate crc check mask for larger chunks
86          */
87         i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) &
88              HAMMER2_PBUFMASK) >> 10;
89         if (i == 0 && bytes == HAMMER2_PBUFSIZE)
90                 return((uint64_t)-1);
91         mask = ((uint64_t)1U << (bytes >> 10)) - 1;
92         mask <<= i;
93
94         return mask;
95 }
96 #endif
97
98 #ifdef HAMMER2_IO_DEBUG
99
100 static __inline void
101 DIO_RECORD(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
102 {
103         int i;
104
105         i = atomic_fetchadd_int(&dio->debug_index, 1) & HAMMER2_IO_DEBUG_MASK;
106
107         dio->debug_file[i] = file;
108         dio->debug_line[i] = line;
109         dio->debug_refs[i] = dio->refs;
110         dio->debug_td[i] = curthread;
111 }
112
113 #else
114
115 #define DIO_RECORD(dio)
116
117 #endif
118
119 /*
120  * Returns the DIO corresponding to the data|radix, creating it if necessary.
121  *
122  * If createit is 0, NULL can be returned indicating that the DIO does not
123  * exist.  (btype) is ignored when createit is 0.
124  */
125 static __inline
126 hammer2_io_t *
127 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype,
128                  int createit, int *isgoodp)
129 {
130         hammer2_io_t *dio;
131         hammer2_io_t *xio;
132         hammer2_key_t lbase;
133         hammer2_key_t pbase;
134         hammer2_key_t pmask;
135         uint64_t refs;
136         int lsize;
137         int psize;
138
139         psize = HAMMER2_PBUFSIZE;
140         pmask = ~(hammer2_off_t)(psize - 1);
141         lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX);
142         lbase = data_off & ~HAMMER2_OFF_MASK_RADIX;
143         pbase = lbase & pmask;
144
145         if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
146                 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
147                         pbase, lbase, lsize, pmask);
148         }
149         KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
150         *isgoodp = 0;
151
152         /*
153          * Access/Allocate the DIO, bump dio->refs to prevent destruction.
154          */
155         hammer2_spin_sh(&hmp->io_spin);
156         dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
157         if (dio) {
158                 refs = atomic_fetchadd_64(&dio->refs, 1);
159                 if ((refs & HAMMER2_DIO_MASK) == 0) {
160                         atomic_add_int(&dio->hmp->iofree_count, -1);
161                 }
162                 if (refs & HAMMER2_DIO_GOOD)
163                         *isgoodp = 1;
164                 hammer2_spin_unsh(&hmp->io_spin);
165         } else if (createit) {
166                 refs = 0;
167                 hammer2_spin_unsh(&hmp->io_spin);
168                 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
169                 dio->hmp = hmp;
170                 dio->pbase = pbase;
171                 dio->psize = psize;
172                 dio->btype = btype;
173                 dio->refs = refs + 1;
174                 dio->act = 5;
175                 hammer2_spin_ex(&hmp->io_spin);
176                 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
177                 if (xio == NULL) {
178                         atomic_add_int(&hammer2_dio_count, 1);
179                         hammer2_spin_unex(&hmp->io_spin);
180                 } else {
181                         refs = atomic_fetchadd_64(&xio->refs, 1);
182                         if ((refs & HAMMER2_DIO_MASK) == 0)
183                                 atomic_add_int(&xio->hmp->iofree_count, -1);
184                         if (refs & HAMMER2_DIO_GOOD)
185                                 *isgoodp = 1;
186                         hammer2_spin_unex(&hmp->io_spin);
187                         kfree(dio, M_HAMMER2);
188                         dio = xio;
189                 }
190         } else {
191                 hammer2_spin_unsh(&hmp->io_spin);
192                 return NULL;
193         }
194         dio->ticks = ticks;
195         if (dio->act < 10)
196                 ++dio->act;
197
198         return dio;
199 }
200
201 /*
202  * Acquire the requested dio.  If DIO_GOOD is not set we must instantiate
203  * a buffer.  If set the buffer already exists and is good to go.
204  */
205 hammer2_io_t *
206 _hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase,
207                    int lsize, int op HAMMER2_IO_DEBUG_ARGS)
208 {
209         hammer2_io_t *dio;
210         off_t peof;
211         uint64_t orefs;
212         uint64_t nrefs;
213         int isgood;
214         int error;
215         int hce;
216         int bflags;
217
218         bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0);
219         bflags |= B_KVABIO;
220
221         KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
222
223         if (op == HAMMER2_DOP_READQ) {
224                 dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood);
225                 if (dio == NULL)
226                         return NULL;
227                 op = HAMMER2_DOP_READ;
228         } else {
229                 dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood);
230         }
231
232         for (;;) {
233                 orefs = dio->refs;
234                 cpu_ccfence();
235
236                 /*
237                  * Buffer is already good, handle the op and return.
238                  */
239                 if (orefs & HAMMER2_DIO_GOOD) {
240                         if (isgood == 0)
241                                 cpu_mfence();
242                         bkvasync(dio->bp);
243
244                         switch(op) {
245                         case HAMMER2_DOP_NEW:
246                                 bzero(hammer2_io_data(dio, lbase), lsize);
247                                 /* fall through */
248                         case HAMMER2_DOP_NEWNZ:
249                                 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
250                                 break;
251                         case HAMMER2_DOP_READ:
252                         default:
253                                 /* nothing to do */
254                                 break;
255                         }
256                         DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
257                         return (dio);
258                 }
259
260                 /*
261                  * Try to own the DIO
262                  */
263                 if (orefs & HAMMER2_DIO_INPROG) {
264                         nrefs = orefs | HAMMER2_DIO_WAITING;
265                         tsleep_interlock(dio, 0);
266                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
267                                 tsleep(dio, PINTERLOCKED, "h2dio", hz);
268                         }
269                         /* retry */
270                 } else {
271                         nrefs = orefs | HAMMER2_DIO_INPROG;
272                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
273                                 break;
274                         }
275                 }
276         }
277
278         /*
279          * We break to here if GOOD is not set and we acquired INPROG for
280          * the I/O.
281          */
282         KKASSERT(dio->bp == NULL);
283         if (btype == HAMMER2_BREF_TYPE_DATA)
284                 hce = hammer2_cluster_data_read;
285         else
286                 hce = hammer2_cluster_meta_read;
287
288         error = 0;
289         if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) &&
290             dio->psize == lsize) {
291                 switch(op) {
292                 case HAMMER2_DOP_NEW:
293                 case HAMMER2_DOP_NEWNZ:
294                         dio->bp = getblk(dio->hmp->devvp,
295                                          dio->pbase, dio->psize,
296                                          GETBLK_KVABIO, 0);
297                         if (op == HAMMER2_DOP_NEW) {
298                                 bkvasync(dio->bp);
299                                 bzero(dio->bp->b_data, dio->psize);
300                         }
301                         atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
302                         break;
303                 case HAMMER2_DOP_READ:
304                 default:
305                         if (hce > 0) {
306                                 /*
307                                  * Synchronous cluster I/O for now.
308                                  */
309                                 peof = (dio->pbase + HAMMER2_SEGMASK64) &
310                                        ~HAMMER2_SEGMASK64;
311                                 dio->bp = NULL;
312                                 error = cluster_readx(dio->hmp->devvp,
313                                                      peof, dio->pbase,
314                                                      dio->psize, bflags,
315                                                      dio->psize,
316                                                      HAMMER2_PBUFSIZE*hce,
317                                                      &dio->bp);
318                         } else {
319                                 dio->bp = NULL;
320                                 error = breadnx(dio->hmp->devvp, dio->pbase,
321                                                 dio->psize, bflags,
322                                                 NULL, NULL, 0, &dio->bp);
323                         }
324                 }
325         } else {
326                 if (hce > 0) {
327                         /*
328                          * Synchronous cluster I/O for now.
329                          */
330                         peof = (dio->pbase + HAMMER2_SEGMASK64) &
331                                ~HAMMER2_SEGMASK64;
332                         error = cluster_readx(dio->hmp->devvp,
333                                               peof, dio->pbase, dio->psize,
334                                               bflags,
335                                               dio->psize, HAMMER2_PBUFSIZE*hce,
336                                               &dio->bp);
337                 } else {
338                         error = breadnx(dio->hmp->devvp, dio->pbase,
339                                         dio->psize, bflags,
340                                         NULL, NULL, 0, &dio->bp);
341                 }
342                 if (dio->bp) {
343                         /*
344                          * Handle NEW flags
345                          */
346                         switch(op) {
347                         case HAMMER2_DOP_NEW:
348                                 bkvasync(dio->bp);
349                                 bzero(hammer2_io_data(dio, lbase), lsize);
350                                 /* fall through */
351                         case HAMMER2_DOP_NEWNZ:
352                                 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
353                                 break;
354                         case HAMMER2_DOP_READ:
355                         default:
356                                 break;
357                         }
358
359                         /*
360                          * Tell the kernel that the buffer cache is not
361                          * meta-data based on the btype.  This allows
362                          * swapcache to distinguish between data and
363                          * meta-data.
364                          */
365                         switch(btype) {
366                         case HAMMER2_BREF_TYPE_DATA:
367                                 dio->bp->b_flags |= B_NOTMETA;
368                                 break;
369                         default:
370                                 break;
371                         }
372                 }
373         }
374
375         if (dio->bp) {
376                 bkvasync(dio->bp);
377                 BUF_KERNPROC(dio->bp);
378                 dio->bp->b_flags &= ~B_AGE;
379                 /* dio->bp->b_debug_info2 = dio; */
380         }
381         dio->error = error;
382
383         /*
384          * Clear INPROG and WAITING, set GOOD wake up anyone waiting.
385          */
386         for (;;) {
387                 orefs = dio->refs;
388                 cpu_ccfence();
389                 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING);
390                 if (error == 0)
391                         nrefs |= HAMMER2_DIO_GOOD;
392                 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
393                         if (orefs & HAMMER2_DIO_WAITING)
394                                 wakeup(dio);
395                         break;
396                 }
397                 cpu_pause();
398         }
399
400         /* XXX error handling */
401         DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
402
403         return dio;
404 }
405
406 /*
407  * Release our ref on *diop.
408  *
409  * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose
410  * of dio->bp.  Then we clean up DIO_INPROG and DIO_WAITING.
411  */
412 void
413 _hammer2_io_putblk(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
414 {
415         hammer2_dev_t *hmp;
416         hammer2_io_t *dio;
417         struct buf *bp;
418         off_t pbase;
419         int psize;
420         int dio_limit;
421         uint64_t orefs;
422         uint64_t nrefs;
423
424         dio = *diop;
425         *diop = NULL;
426         hmp = dio->hmp;
427         DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
428
429         KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0);
430
431         /*
432          * Drop refs.
433          *
434          * On the 1->0 transition clear GOOD and set INPROG, and break.
435          * On any other transition we can return early.
436          */
437         for (;;) {
438                 orefs = dio->refs;
439                 cpu_ccfence();
440
441                 if ((orefs & HAMMER2_DIO_MASK) == 1 &&
442                     (orefs & HAMMER2_DIO_INPROG) == 0) {
443                         /*
444                          * Lastdrop case, INPROG can be set.  GOOD must be
445                          * cleared to prevent the getblk shortcut.
446                          */
447                         nrefs = orefs - 1;
448                         nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
449                         nrefs |= HAMMER2_DIO_INPROG;
450                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
451                                 break;
452                 } else if ((orefs & HAMMER2_DIO_MASK) == 1) {
453                         /*
454                          * Lastdrop case, INPROG already set.  We must
455                          * wait for INPROG to clear.
456                          */
457                         nrefs = orefs | HAMMER2_DIO_WAITING;
458                         tsleep_interlock(dio, 0);
459                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
460                                 tsleep(dio, PINTERLOCKED, "h2dio", hz);
461                         }
462                         /* retry */
463                 } else {
464                         /*
465                          * Normal drop case.
466                          */
467                         nrefs = orefs - 1;
468                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
469                                 return;
470                         /* retry */
471                 }
472                 cpu_pause();
473                 /* retry */
474         }
475
476         /*
477          * Lastdrop (1->0 transition).  INPROG has been set, GOOD and DIRTY
478          * have been cleared.  iofree_count has not yet been incremented,
479          * note that another accessor race will decrement iofree_count so
480          * we have to increment it regardless.
481          *
482          * We can now dispose of the buffer, and should do it before calling
483          * io_complete() in case there's a race against a new reference
484          * which causes io_complete() to chain and instantiate the bp again.
485          */
486         pbase = dio->pbase;
487         psize = dio->psize;
488         bp = dio->bp;
489         dio->bp = NULL;
490
491         if ((orefs & HAMMER2_DIO_GOOD) && bp) {
492                 /*
493                  * Non-errored disposal of bp
494                  */
495                 if (orefs & HAMMER2_DIO_DIRTY) {
496                         dio_write_stats_update(dio, bp);
497
498                         /*
499                          * Allows dirty buffers to accumulate and
500                          * possibly be canceled (e.g. by a 'rm'),
501                          * by default we will burst-write later.
502                          *
503                          * We generally do NOT want to issue an actual
504                          * b[a]write() or cluster_write() here.  Due to
505                          * the way chains are locked, buffers may be cycled
506                          * in and out quite often and disposal here can cause
507                          * multiple writes or write-read stalls.
508                          *
509                          * If FLUSH is set we do want to issue the actual
510                          * write.  This typically occurs in the write-behind
511                          * case when writing to large files.
512                          */
513                         off_t peof;
514                         int hce;
515                         if (dio->refs & HAMMER2_DIO_FLUSH) {
516                                 if ((hce = hammer2_cluster_write) != 0) {
517                                         peof = (pbase + HAMMER2_SEGMASK64) &
518                                                ~HAMMER2_SEGMASK64;
519                                         bp->b_flags |= B_CLUSTEROK;
520                                         cluster_write(bp, peof, psize, hce);
521                                 } else {
522                                         bp->b_flags &= ~B_CLUSTEROK;
523                                         bawrite(bp);
524                                 }
525                         } else {
526                                 bp->b_flags &= ~B_CLUSTEROK;
527                                 bdwrite(bp);
528                         }
529                 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
530                         brelse(bp);
531                 } else {
532                         bqrelse(bp);
533                 }
534         } else if (bp) {
535                 /*
536                  * Errored disposal of bp
537                  */
538                 brelse(bp);
539         }
540
541         /*
542          * Update iofree_count before disposing of the dio
543          */
544         hmp = dio->hmp;
545         atomic_add_int(&hmp->iofree_count, 1);
546
547         /*
548          * Clear INPROG, GOOD, and WAITING (GOOD should already be clear).
549          *
550          * Also clear FLUSH as it was handled above.
551          */
552         for (;;) {
553                 orefs = dio->refs;
554                 cpu_ccfence();
555                 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD |
556                                   HAMMER2_DIO_WAITING | HAMMER2_DIO_FLUSH);
557                 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
558                         if (orefs & HAMMER2_DIO_WAITING)
559                                 wakeup(dio);
560                         break;
561                 }
562                 cpu_pause();
563         }
564
565         /*
566          * We cache free buffers so re-use cases can use a shared lock, but
567          * if too many build up we have to clean them out.
568          */
569         dio_limit = hammer2_dio_limit;
570         if (dio_limit < 256)
571                 dio_limit = 256;
572         if (dio_limit > 1024*1024)
573                 dio_limit = 1024*1024;
574         if (hmp->iofree_count > dio_limit) {
575                 struct hammer2_cleanupcb_info info;
576
577                 RB_INIT(&info.tmptree);
578                 hammer2_spin_ex(&hmp->io_spin);
579                 if (hmp->iofree_count > dio_limit) {
580                         info.count = hmp->iofree_count / 5;
581                         RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
582                                 hammer2_io_cleanup_callback, &info);
583                 }
584                 hammer2_spin_unex(&hmp->io_spin);
585                 hammer2_io_cleanup(hmp, &info.tmptree);
586         }
587 }
588
589 /*
590  * Cleanup any dio's with (INPROG | refs) == 0.
591  *
592  * Called to clean up cached DIOs on umount after all activity has been
593  * flushed.
594  */
595 static
596 int
597 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
598 {
599         struct hammer2_cleanupcb_info *info = arg;
600         hammer2_io_t *xio;
601
602         if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
603                 if (dio->act > 0) {
604                         int act;
605
606                         act = dio->act - (ticks - dio->ticks) / hz - 1;
607                         if (act > 0) {
608                                 dio->act = act;
609                                 return 0;
610                         }
611                         dio->act = 0;
612                 }
613                 KKASSERT(dio->bp == NULL);
614                 if (info->count > 0) {
615                         RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
616                         xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
617                         KKASSERT(xio == NULL);
618                         --info->count;
619                 }
620         }
621         return 0;
622 }
623
624 void
625 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree)
626 {
627         hammer2_io_t *dio;
628
629         while ((dio = RB_ROOT(tree)) != NULL) {
630                 RB_REMOVE(hammer2_io_tree, tree, dio);
631                 KKASSERT(dio->bp == NULL &&
632                     (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
633                 if (dio->refs & HAMMER2_DIO_DIRTY) {
634                         kprintf("hammer2_io_cleanup: Dirty buffer "
635                                 "%016jx/%d (bp=%p)\n",
636                                 dio->pbase, dio->psize, dio->bp);
637                 }
638                 kfree(dio, M_HAMMER2);
639                 atomic_add_int(&hammer2_dio_count, -1);
640                 atomic_add_int(&hmp->iofree_count, -1);
641         }
642 }
643
644 /*
645  * Returns a pointer to the requested data.
646  */
647 char *
648 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
649 {
650         struct buf *bp;
651         int off;
652
653         bp = dio->bp;
654         KKASSERT(bp != NULL);
655         bkvasync(bp);
656         off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
657         KKASSERT(off >= 0 && off < bp->b_bufsize);
658         return(bp->b_data + off);
659 }
660
661 int
662 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
663                hammer2_io_t **diop)
664 {
665         *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW);
666         return ((*diop)->error);
667 }
668
669 int
670 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
671                  hammer2_io_t **diop)
672 {
673         *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ);
674         return ((*diop)->error);
675 }
676
677 int
678 _hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
679                 hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
680 {
681 #ifdef HAMMER2_IO_DEBUG
682         hammer2_io_t *dio;
683 #endif
684
685         *diop = _hammer2_io_getblk(hmp, btype, lbase, lsize,
686                                    HAMMER2_DOP_READ HAMMER2_IO_DEBUG_CALL);
687 #ifdef HAMMER2_IO_DEBUG
688         if ((dio = *diop) != NULL) {
689                 int i = (dio->debug_index - 1) & HAMMER2_IO_DEBUG_MASK;
690                 dio->debug_data[i] = debug_data;
691         }
692 #endif
693         return ((*diop)->error);
694 }
695
696 hammer2_io_t *
697 _hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase,
698                      int lsize HAMMER2_IO_DEBUG_ARGS)
699 {
700         hammer2_io_t *dio;
701
702         dio = _hammer2_io_getblk(hmp, 0, lbase, lsize,
703                                  HAMMER2_DOP_READQ HAMMER2_IO_DEBUG_CALL);
704         return dio;
705 }
706
707 void
708 _hammer2_io_bawrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
709 {
710         atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
711                                       HAMMER2_DIO_FLUSH);
712         _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
713 }
714
715 void
716 _hammer2_io_bdwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
717 {
718         atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
719         _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
720 }
721
722 int
723 _hammer2_io_bwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
724 {
725         atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
726                                       HAMMER2_DIO_FLUSH);
727         _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
728         return (0);     /* XXX */
729 }
730
731 void
732 hammer2_io_setdirty(hammer2_io_t *dio)
733 {
734         atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
735 }
736
737 /*
738  * This routine is called when a MODIFIED chain is being DESTROYED,
739  * in an attempt to allow the related buffer cache buffer to be
740  * invalidated and discarded instead of flushing it to disk.
741  *
742  * At the moment this case is only really useful for file meta-data.
743  * File data is already handled via the logical buffer cache associated
744  * with the vnode, and will be discarded if it was never flushed to disk.
745  * File meta-data may include inodes, directory entries, and indirect blocks.
746  *
747  * XXX
748  * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being
749  * invalidated might be smaller.  Most of the meta-data structures above
750  * are in the 'smaller' category.  For now, don't try to invalidate the
751  * data areas.
752  */
753 void
754 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes)
755 {
756         /* NOP */
757 }
758
759 void
760 _hammer2_io_brelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
761 {
762         _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
763 }
764
765 void
766 _hammer2_io_bqrelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
767 {
768         _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
769 }
770
771 /*
772  * Set dedup validation bits in a DIO.  We do not need the buffer cache
773  * buffer for this.  This must be done concurrent with setting bits in
774  * the freemap so as to interlock with bulkfree's clearing of those bits.
775  */
776 void
777 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref)
778 {
779         hammer2_io_t *dio;
780         uint64_t mask;
781         int lsize;
782         int isgood;
783
784         dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood);
785         lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
786         mask = hammer2_dedup_mask(dio, bref->data_off, lsize);
787         atomic_clear_64(&dio->dedup_valid, mask);
788         atomic_set_64(&dio->dedup_alloc, mask);
789         hammer2_io_putblk(&dio);
790 }
791
792 /*
793  * Clear dedup validation bits in a DIO.  This is typically done when
794  * a modified chain is destroyed or by the bulkfree code.  No buffer
795  * is needed for this operation.  If the DIO no longer exists it is
796  * equivalent to the bits not being set.
797  */
798 void
799 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
800                         hammer2_off_t data_off, u_int bytes)
801 {
802         hammer2_io_t *dio;
803         uint64_t mask;
804         int isgood;
805
806         if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
807                 return;
808         if (btype != HAMMER2_BREF_TYPE_DATA)
809                 return;
810         dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood);
811         if (dio) {
812                 if (data_off < dio->pbase ||
813                     (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes >
814                     dio->pbase + dio->psize) {
815                         panic("hammer2_dedup_delete: DATAOFF BAD "
816                               "%016jx/%d %016jx\n",
817                               data_off, bytes, dio->pbase);
818                 }
819                 mask = hammer2_dedup_mask(dio, data_off, bytes);
820                 atomic_clear_64(&dio->dedup_alloc, mask);
821                 atomic_clear_64(&dio->dedup_valid, mask);
822                 hammer2_io_putblk(&dio);
823         }
824 }
825
826 /*
827  * Assert that dedup allocation bits in a DIO are not set.  This operation
828  * does not require a buffer.  The DIO does not need to exist.
829  */
830 void
831 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes)
832 {
833         hammer2_io_t *dio;
834         int isgood;
835
836         dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA,
837                                0, &isgood);
838         if (dio) {
839                 KASSERT((dio->dedup_alloc &
840                           hammer2_dedup_mask(dio, data_off, bytes)) == 0,
841                         ("hammer2_dedup_assert: %016jx/%d %016jx/%016jx",
842                         data_off,
843                         bytes,
844                         hammer2_dedup_mask(dio, data_off, bytes),
845                         dio->dedup_alloc));
846                 hammer2_io_putblk(&dio);
847         }
848 }
849
850 static
851 void
852 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp)
853 {
854         long *counterp;
855
856         if (bp->b_flags & B_DELWRI)
857                 return;
858
859         switch(dio->btype) {
860         case 0:
861                 return;
862         case HAMMER2_BREF_TYPE_DATA:
863                 counterp = &hammer2_iod_file_write;
864                 break;
865         case HAMMER2_BREF_TYPE_DIRENT:
866         case HAMMER2_BREF_TYPE_INODE:
867                 counterp = &hammer2_iod_meta_write;
868                 break;
869         case HAMMER2_BREF_TYPE_INDIRECT:
870                 counterp = &hammer2_iod_indr_write;
871                 break;
872         case HAMMER2_BREF_TYPE_FREEMAP_NODE:
873         case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
874                 counterp = &hammer2_iod_fmap_write;
875                 break;
876         default:
877                 counterp = &hammer2_iod_volu_write;
878                 break;
879         }
880         *counterp += dio->psize;
881 }
882
883 void
884 hammer2_io_bkvasync(hammer2_io_t *dio)
885 {
886         KKASSERT(dio->bp != NULL);
887         bkvasync(dio->bp);
888 }
889
890 /*
891  * Ref a dio that is already owned
892  */
893 void
894 _hammer2_io_ref(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
895 {
896         DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
897         atomic_add_64(&dio->refs, 1);
898 }