usr.sbin/makefs: Sync with sys/vfs/hammer2
[dragonfly.git] / usr.sbin / makefs / hammer2 / hammer2_io.c
1 /*
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
5  * Copyright (c) 2011-2022 The DragonFly Project.  All rights reserved.
6  *
7  * This code is derived from software contributed to The DragonFly Project
8  * by Matthew Dillon <dillon@dragonflybsd.org>
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  *
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in
18  *    the documentation and/or other materials provided with the
19  *    distribution.
20  * 3. Neither the name of The DragonFly Project nor the names of its
21  *    contributors may be used to endorse or promote products derived
22  *    from this software without specific, prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
28  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
30  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
34  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37
38 #include "hammer2.h"
39
40 #define HAMMER2_DOP_READ        1
41 #define HAMMER2_DOP_NEW         2
42 #define HAMMER2_DOP_NEWNZ       3
43 #define HAMMER2_DOP_READQ       4
44
45 /*
46  * Implements an abstraction layer for synchronous and asynchronous
47  * buffered device I/O.  Can be used as an OS-abstraction but the main
48  * purpose is to allow larger buffers to be used against hammer2_chain's
49  * using smaller allocations, without causing deadlocks.
50  *
51  * The DIOs also record temporary state with limited persistence.  This
52  * feature is used to keep track of dedupable blocks.
53  */
54 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
55 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp);
56
57 static int
58 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
59 {
60         if (io1->pbase < io2->pbase)
61                 return(-1);
62         if (io1->pbase > io2->pbase)
63                 return(1);
64         return(0);
65 }
66
67 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
68 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
69                 off_t, pbase);
70
71 struct hammer2_cleanupcb_info {
72         struct hammer2_io_tree tmptree;
73         int     count;
74 };
75
76 #if 0
77 static __inline
78 uint64_t
79 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
80 {
81         uint64_t mask;
82         int i;
83
84         if (bytes < 1024)       /* smaller chunks not supported */
85                 return 0;
86
87         /*
88          * Calculate crc check mask for larger chunks
89          */
90         i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) &
91              HAMMER2_PBUFMASK) >> 10;
92         if (i == 0 && bytes == HAMMER2_PBUFSIZE)
93                 return((uint64_t)-1);
94         mask = ((uint64_t)1U << (bytes >> 10)) - 1;
95         mask <<= i;
96
97         return mask;
98 }
99 #endif
100
101 #ifdef HAMMER2_IO_DEBUG
102
103 static __inline void
104 DIO_RECORD(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
105 {
106         int i;
107
108         i = atomic_fetchadd_int(&dio->debug_index, 1) & HAMMER2_IO_DEBUG_MASK;
109
110         dio->debug_file[i] = file;
111         dio->debug_line[i] = line;
112         dio->debug_refs[i] = dio->refs;
113         dio->debug_td[i] = curthread;
114 }
115
116 #else
117
118 #define DIO_RECORD(dio)
119
120 #endif
121
122 /*
123  * Returns the DIO corresponding to the data|radix, creating it if necessary.
124  *
125  * If createit is 0, NULL can be returned indicating that the DIO does not
126  * exist.  (btype) is ignored when createit is 0.
127  */
128 static
129 hammer2_io_t *
130 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype,
131                  int createit, int *isgoodp)
132 {
133         hammer2_io_t *dio;
134         hammer2_io_t *xio;
135         hammer2_key_t lbase;
136         hammer2_key_t pbase;
137         hammer2_key_t pmask;
138         hammer2_vfsvolume_t *vol;
139         uint64_t refs;
140         int lsize;
141         int psize;
142
143         psize = HAMMER2_PBUFSIZE;
144         pmask = ~(hammer2_off_t)(psize - 1);
145         if ((int)(data_off & HAMMER2_OFF_MASK_RADIX))
146                 lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX);
147         else
148                 lsize = 0;
149         lbase = data_off & ~HAMMER2_OFF_MASK_RADIX;
150         pbase = lbase & pmask;
151
152         if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
153                 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
154                         pbase, lbase, lsize, pmask);
155         }
156         KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
157         *isgoodp = 0;
158
159         /*
160          * Access/Allocate the DIO, bump dio->refs to prevent destruction.
161          *
162          * If DIO_GOOD is set the ref should prevent it from being cleared
163          * out from under us, we can set *isgoodp, and the caller can operate
164          * on the buffer without any further interaction.
165          */
166         hammer2_spin_sh(&hmp->io_spin);
167         dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
168         if (dio) {
169                 refs = atomic_fetchadd_64(&dio->refs, 1);
170                 if ((refs & HAMMER2_DIO_MASK) == 0) {
171                         atomic_add_int(&dio->hmp->iofree_count, -1);
172                 }
173                 if (refs & HAMMER2_DIO_GOOD)
174                         *isgoodp = 1;
175                 hammer2_spin_unsh(&hmp->io_spin);
176         } else if (createit) {
177                 refs = 0;
178                 hammer2_spin_unsh(&hmp->io_spin);
179                 vol = hammer2_get_volume(hmp, pbase);
180                 dio = kmalloc_obj(sizeof(*dio), hmp->mio, M_INTWAIT | M_ZERO);
181                 dio->hmp = hmp;
182                 dio->devvp = vol->dev->devvp;
183                 dio->dbase = vol->offset;
184                 KKASSERT((dio->dbase & HAMMER2_FREEMAP_LEVEL1_MASK) == 0);
185                 dio->pbase = pbase;
186                 dio->psize = psize;
187                 dio->btype = btype;
188                 dio->refs = refs + 1;
189                 dio->act = 5;
190                 hammer2_spin_ex(&hmp->io_spin);
191                 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
192                 if (xio == NULL) {
193                         atomic_add_int(&hammer2_dio_count, 1);
194                         hammer2_spin_unex(&hmp->io_spin);
195                 } else {
196                         refs = atomic_fetchadd_64(&xio->refs, 1);
197                         if ((refs & HAMMER2_DIO_MASK) == 0)
198                                 atomic_add_int(&xio->hmp->iofree_count, -1);
199                         if (refs & HAMMER2_DIO_GOOD)
200                                 *isgoodp = 1;
201                         hammer2_spin_unex(&hmp->io_spin);
202                         kfree_obj(dio, hmp->mio);
203                         dio = xio;
204                 }
205         } else {
206                 hammer2_spin_unsh(&hmp->io_spin);
207                 return NULL;
208         }
209         dio->ticks = ticks;
210         if (dio->act < 10)
211                 ++dio->act;
212
213         return dio;
214 }
215
216 /*
217  * Acquire the requested dio.  If DIO_GOOD is not set we must instantiate
218  * a buffer.  If set the buffer already exists and is good to go.
219  */
220 hammer2_io_t *
221 _hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase,
222                    int lsize, int op HAMMER2_IO_DEBUG_ARGS)
223 {
224         hammer2_io_t *dio;
225         hammer2_off_t dev_pbase;
226         //off_t peof;
227         uint64_t orefs;
228         uint64_t nrefs;
229         int isgood;
230         int error;
231         int hce;
232         //int bflags;
233
234         //bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0);
235         //bflags |= B_KVABIO;
236
237         KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
238
239         if (op == HAMMER2_DOP_READQ) {
240                 dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood);
241                 if (dio == NULL)
242                         return NULL;
243                 op = HAMMER2_DOP_READ;
244         } else {
245                 dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood);
246         }
247
248         for (;;) {
249                 orefs = dio->refs;
250                 cpu_ccfence();
251
252                 /*
253                  * Buffer is already good, handle the op and return.
254                  */
255                 if (orefs & HAMMER2_DIO_GOOD) {
256                         if (isgood == 0)
257                                 cpu_mfence();
258                         bkvasync(dio->bp);
259
260                         switch(op) {
261                         case HAMMER2_DOP_NEW:
262                                 bzero(hammer2_io_data(dio, lbase), lsize);
263                                 /* fall through */
264                         case HAMMER2_DOP_NEWNZ:
265                                 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
266                                 break;
267                         case HAMMER2_DOP_READ:
268                         default:
269                                 /* nothing to do */
270                                 break;
271                         }
272                         DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
273                         return (dio);
274                 }
275
276                 /*
277                  * Try to own the DIO
278                  */
279                 if (orefs & HAMMER2_DIO_INPROG) {
280                         nrefs = orefs | HAMMER2_DIO_WAITING;
281                         tsleep_interlock(dio, 0);
282                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
283                                 tsleep(dio, PINTERLOCKED, "h2dio", hz);
284                         }
285                         /* retry */
286                 } else {
287                         nrefs = orefs | HAMMER2_DIO_INPROG;
288                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
289                                 break;
290                         }
291                 }
292         }
293
294         /*
295          * We break to here if GOOD is not set and we acquired INPROG for
296          * the I/O.
297          */
298         KKASSERT(dio->bp == NULL);
299         if (btype == HAMMER2_BREF_TYPE_DATA)
300                 hce = hammer2_cluster_data_read;
301         else
302                 hce = hammer2_cluster_meta_read;
303
304         error = 0;
305         dev_pbase = dio->pbase - dio->dbase;
306         if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) &&
307             dio->psize == lsize) {
308                 switch(op) {
309                 case HAMMER2_DOP_NEW:
310                 case HAMMER2_DOP_NEWNZ:
311                         dio->bp = getblkx(dio->devvp,
312                                          dev_pbase, dio->psize,
313                                          GETBLK_KVABIO, 0);
314                         if (op == HAMMER2_DOP_NEW) {
315                                 bkvasync(dio->bp);
316                                 bzero(dio->bp->b_data, dio->psize);
317                         }
318                         atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
319                         break;
320                 case HAMMER2_DOP_READ:
321                 default:
322                         KKASSERT(dio->bp == NULL);
323 #if 0
324                         if (hce > 0) {
325                                 /*
326                                  * Synchronous cluster I/O for now.
327                                  */
328                                 peof = (dio->pbase + HAMMER2_SEGMASK64) &
329                                        ~HAMMER2_SEGMASK64;
330                                 peof -= dio->dbase;
331                                 error = cluster_readx(dio->devvp,
332                                                      peof, dev_pbase,
333                                                      dio->psize, bflags,
334                                                      dio->psize,
335                                                      HAMMER2_PBUFSIZE*hce,
336                                                      &dio->bp);
337                         } else {
338                                 error = breadnx(dio->devvp, dev_pbase,
339                                                 dio->psize, bflags,
340                                                 NULL, NULL, 0, &dio->bp);
341                         }
342 #else
343                         error = breadx(dio->devvp, dev_pbase, dio->psize, &dio->bp);
344 #endif
345                         break;
346                 }
347         } else {
348 #if 0
349                 if (hce > 0) {
350                         /*
351                          * Synchronous cluster I/O for now.
352                          */
353                         peof = (dio->pbase + HAMMER2_SEGMASK64) &
354                                ~HAMMER2_SEGMASK64;
355                         peof -= dio->dbase;
356                         error = cluster_readx(dio->devvp,
357                                               peof, dev_pbase, dio->psize,
358                                               bflags,
359                                               dio->psize, HAMMER2_PBUFSIZE*hce,
360                                               &dio->bp);
361                 } else {
362                         error = breadnx(dio->devvp, dev_pbase,
363                                         dio->psize, bflags,
364                                         NULL, NULL, 0, &dio->bp);
365                 }
366 #else
367                 error = breadx(dio->devvp, dev_pbase, dio->psize, &dio->bp);
368 #endif
369                 if (dio->bp) {
370                         /*
371                          * Handle NEW flags
372                          */
373                         switch(op) {
374                         case HAMMER2_DOP_NEW:
375                                 bkvasync(dio->bp);
376                                 bzero(hammer2_io_data(dio, lbase), lsize);
377                                 /* fall through */
378                         case HAMMER2_DOP_NEWNZ:
379                                 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
380                                 break;
381                         case HAMMER2_DOP_READ:
382                         default:
383                                 break;
384                         }
385
386                         /*
387                          * Tell the kernel that the buffer cache is not
388                          * meta-data based on the btype.  This allows
389                          * swapcache to distinguish between data and
390                          * meta-data.
391                          */
392                         switch(btype) {
393                         case HAMMER2_BREF_TYPE_DATA:
394                                 //dio->bp->b_flags |= B_NOTMETA;
395                                 break;
396                         default:
397                                 break;
398                         }
399                 }
400         }
401
402         if (dio->bp) {
403                 bkvasync(dio->bp);
404                 BUF_KERNPROC(dio->bp);
405                 //dio->bp->b_flags &= ~B_AGE;
406                 /* dio->bp->b_debug_info2 = dio; */
407         }
408         dio->error = error;
409
410         /*
411          * Clear INPROG and WAITING, set GOOD wake up anyone waiting.
412          */
413         for (;;) {
414                 orefs = dio->refs;
415                 cpu_ccfence();
416                 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING);
417                 if (error == 0)
418                         nrefs |= HAMMER2_DIO_GOOD;
419                 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
420                         if (orefs & HAMMER2_DIO_WAITING)
421                                 wakeup(dio);
422                         break;
423                 }
424                 cpu_pause();
425         }
426
427         /* XXX error handling */
428         DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
429
430         return dio;
431 }
432
433 /*
434  * Release our ref on *diop.
435  *
436  * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose
437  * of dio->bp.  Then we clean up DIO_INPROG and DIO_WAITING.
438  */
439 void
440 _hammer2_io_putblk(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
441 {
442         hammer2_dev_t *hmp;
443         hammer2_io_t *dio;
444         struct buf *bp;
445         off_t pbase;
446         int psize;
447         int dio_limit;
448         uint64_t orefs;
449         uint64_t nrefs;
450
451         dio = *diop;
452         *diop = NULL;
453         hmp = dio->hmp;
454         DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
455
456         KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0);
457
458         /*
459          * Drop refs.
460          *
461          * On the 1->0 transition clear GOOD and set INPROG, and break.
462          * On any other transition we can return early.
463          */
464         for (;;) {
465                 orefs = dio->refs;
466                 cpu_ccfence();
467
468                 if ((orefs & HAMMER2_DIO_MASK) == 1 &&
469                     (orefs & HAMMER2_DIO_INPROG) == 0) {
470                         /*
471                          * Lastdrop case, INPROG can be set.  GOOD must be
472                          * cleared to prevent the getblk shortcut.
473                          */
474                         nrefs = orefs - 1;
475                         nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
476                         nrefs |= HAMMER2_DIO_INPROG;
477                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
478                                 break;
479                 } else if ((orefs & HAMMER2_DIO_MASK) == 1) {
480                         /*
481                          * Lastdrop case, INPROG already set.  We must
482                          * wait for INPROG to clear.
483                          */
484                         nrefs = orefs | HAMMER2_DIO_WAITING;
485                         tsleep_interlock(dio, 0);
486                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
487                                 tsleep(dio, PINTERLOCKED, "h2dio", hz);
488                         }
489                         /* retry */
490                 } else {
491                         /*
492                          * Normal drop case.
493                          */
494                         nrefs = orefs - 1;
495                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
496                                 return;
497                         /* retry */
498                 }
499                 cpu_pause();
500                 /* retry */
501         }
502
503         /*
504          * Lastdrop (1->0 transition).  INPROG has been set, GOOD and DIRTY
505          * have been cleared.  iofree_count has not yet been incremented,
506          * note that another accessor race will decrement iofree_count so
507          * we have to increment it regardless.
508          * We can now dispose of the buffer.
509          */
510         pbase = dio->pbase;
511         psize = dio->psize;
512         bp = dio->bp;
513         dio->bp = NULL;
514
515         if ((orefs & HAMMER2_DIO_GOOD) && bp) {
516                 /*
517                  * Non-errored disposal of bp
518                  */
519                 if (orefs & HAMMER2_DIO_DIRTY) {
520                         dio_write_stats_update(dio, bp);
521
522                         /*
523                          * Allows dirty buffers to accumulate and
524                          * possibly be canceled (e.g. by a 'rm'),
525                          * by default we will burst-write later.
526                          *
527                          * We generally do NOT want to issue an actual
528                          * b[a]write() or cluster_write() here.  Due to
529                          * the way chains are locked, buffers may be cycled
530                          * in and out quite often and disposal here can cause
531                          * multiple writes or write-read stalls.
532                          *
533                          * If FLUSH is set we do want to issue the actual
534                          * write.  This typically occurs in the write-behind
535                          * case when writing to large files.
536                          */
537                         //off_t peof;
538                         //int hce;
539                         if (dio->refs & HAMMER2_DIO_FLUSH) {
540 #if 0
541                                 if ((hce = hammer2_cluster_write) != 0) {
542                                         peof = (pbase + HAMMER2_SEGMASK64) &
543                                                ~HAMMER2_SEGMASK64;
544                                         peof -= dio->dbase;
545                                         bp->b_flags |= B_CLUSTEROK;
546                                         cluster_write(bp, peof, psize, hce);
547                                 } else {
548                                         bp->b_flags &= ~B_CLUSTEROK;
549                                         bawrite(bp);
550                                 }
551 #else
552                                 bawrite(bp);
553 #endif
554                         } else {
555                                 //bp->b_flags &= ~B_CLUSTEROK;
556                                 bdwrite(bp);
557                         }
558 #if 0
559                 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
560                         brelse(bp);
561 #endif
562                 } else {
563                         bqrelse(bp);
564                 }
565         } else if (bp) {
566                 /*
567                  * Errored disposal of bp
568                  */
569                 brelse(bp);
570         }
571
572         /*
573          * Update iofree_count before disposing of the dio
574          */
575         hmp = dio->hmp;
576         atomic_add_int(&hmp->iofree_count, 1);
577
578         /*
579          * Clear INPROG, GOOD, and WAITING (GOOD should already be clear).
580          *
581          * Also clear FLUSH as it was handled above.
582          */
583         for (;;) {
584                 orefs = dio->refs;
585                 cpu_ccfence();
586                 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD |
587                                   HAMMER2_DIO_WAITING | HAMMER2_DIO_FLUSH);
588                 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
589                         if (orefs & HAMMER2_DIO_WAITING)
590                                 wakeup(dio);
591                         break;
592                 }
593                 cpu_pause();
594         }
595
596         /*
597          * We cache free buffers so re-use cases can use a shared lock, but
598          * if too many build up we have to clean them out.
599          */
600         dio_limit = hammer2_dio_limit;
601         if (dio_limit < 256)
602                 dio_limit = 256;
603         if (dio_limit > 1024*1024)
604                 dio_limit = 1024*1024;
605         if (hmp->iofree_count > dio_limit) {
606                 struct hammer2_cleanupcb_info info;
607
608                 RB_INIT(&info.tmptree);
609                 hammer2_spin_ex(&hmp->io_spin);
610                 if (hmp->iofree_count > dio_limit) {
611                         info.count = hmp->iofree_count / 5;
612                         RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
613                                 hammer2_io_cleanup_callback, &info);
614                 }
615                 hammer2_spin_unex(&hmp->io_spin);
616                 hammer2_io_cleanup(hmp, &info.tmptree);
617         }
618 }
619
620 /*
621  * Cleanup any dio's with (INPROG | refs) == 0.
622  */
623 static
624 int
625 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
626 {
627         struct hammer2_cleanupcb_info *info = arg;
628         hammer2_io_t *xio __debugvar;
629
630         if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
631                 /*
632                 if (dio->act > 0) {
633                         int act;
634
635                         act = dio->act - (ticks - dio->ticks) / hz - 1;
636                         if (act > 0) {
637                                 dio->act = act;
638                                 return 0;
639                         }
640                         dio->act = 0;
641                 }
642                 */
643                 KKASSERT(dio->bp == NULL);
644                 if (info->count > 0) {
645                         RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
646                         xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
647                         KKASSERT(xio == NULL);
648                         --info->count;
649                 }
650         }
651         return 0;
652 }
653
654 void
655 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree)
656 {
657         hammer2_io_t *dio;
658
659         while ((dio = RB_ROOT(tree)) != NULL) {
660                 RB_REMOVE(hammer2_io_tree, tree, dio);
661                 KKASSERT(dio->bp == NULL &&
662                     (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
663                 if (dio->refs & HAMMER2_DIO_DIRTY) {
664                         kprintf("hammer2_io_cleanup: Dirty buffer "
665                                 "%016jx/%d (bp=%p)\n",
666                                 dio->pbase, dio->psize, dio->bp);
667                 }
668                 kfree_obj(dio, hmp->mio);
669                 atomic_add_int(&hammer2_dio_count, -1);
670                 atomic_add_int(&hmp->iofree_count, -1);
671         }
672 }
673
674 /*
675  * Returns a pointer to the requested data.
676  */
677 char *
678 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
679 {
680         struct buf *bp;
681         int off;
682
683         bp = dio->bp;
684         KKASSERT(bp != NULL);
685         bkvasync(bp);
686         lbase -= dio->dbase;
687         off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
688         KKASSERT(off >= 0 && off < bp->b_bufsize);
689         return((char *)bp->b_data + off);
690 }
691
692 int
693 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
694                hammer2_io_t **diop)
695 {
696         *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW);
697         return ((*diop)->error);
698 }
699
700 int
701 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
702                  hammer2_io_t **diop)
703 {
704         *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ);
705         return ((*diop)->error);
706 }
707
708 int
709 _hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
710                 hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
711 {
712 #ifdef HAMMER2_IO_DEBUG
713         hammer2_io_t *dio;
714 #endif
715
716         *diop = _hammer2_io_getblk(hmp, btype, lbase, lsize,
717                                    HAMMER2_DOP_READ HAMMER2_IO_DEBUG_CALL);
718 #ifdef HAMMER2_IO_DEBUG
719         if ((dio = *diop) != NULL) {
720 #if 0
721                 int i = (dio->debug_index - 1) & HAMMER2_IO_DEBUG_MASK;
722                 dio->debug_data[i] = debug_data;
723 #endif
724         }
725 #endif
726         return ((*diop)->error);
727 }
728
729 hammer2_io_t *
730 _hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase,
731                      int lsize HAMMER2_IO_DEBUG_ARGS)
732 {
733         hammer2_io_t *dio;
734
735         dio = _hammer2_io_getblk(hmp, 0, lbase, lsize,
736                                  HAMMER2_DOP_READQ HAMMER2_IO_DEBUG_CALL);
737         return dio;
738 }
739
740 void
741 _hammer2_io_bawrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
742 {
743         atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
744                                       HAMMER2_DIO_FLUSH);
745         _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
746 }
747
748 void
749 _hammer2_io_bdwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
750 {
751         atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
752         _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
753 }
754
755 int
756 _hammer2_io_bwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
757 {
758         atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
759                                       HAMMER2_DIO_FLUSH);
760         _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
761         return (0);     /* XXX */
762 }
763
764 void
765 hammer2_io_setdirty(hammer2_io_t *dio)
766 {
767         atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
768 }
769
770 /*
771  * This routine is called when a MODIFIED chain is being DESTROYED,
772  * in an attempt to allow the related buffer cache buffer to be
773  * invalidated and discarded instead of flushing it to disk.
774  *
775  * At the moment this case is only really useful for file meta-data.
776  * File data is already handled via the logical buffer cache associated
777  * with the vnode, and will be discarded if it was never flushed to disk.
778  * File meta-data may include inodes, directory entries, and indirect blocks.
779  *
780  * XXX
781  * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being
782  * invalidated might be smaller.  Most of the meta-data structures above
783  * are in the 'smaller' category.  For now, don't try to invalidate the
784  * data areas.
785  */
786 void
787 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes)
788 {
789         /* NOP */
790 }
791
792 void
793 _hammer2_io_brelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
794 {
795         _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
796 }
797
798 void
799 _hammer2_io_bqrelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
800 {
801         _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
802 }
803
804 /*
805  * Set dedup validation bits in a DIO.  We do not need the buffer cache
806  * buffer for this.  This must be done concurrent with setting bits in
807  * the freemap so as to interlock with bulkfree's clearing of those bits.
808  */
809 void
810 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref)
811 {
812         hammer2_io_t *dio;
813         uint64_t mask;
814         int lsize;
815         int isgood;
816
817         dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood);
818         if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX))
819                 lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
820         else
821                 lsize = 0;
822         mask = hammer2_dedup_mask(dio, bref->data_off, lsize);
823         atomic_clear_64(&dio->dedup_valid, mask);
824         atomic_set_64(&dio->dedup_alloc, mask);
825         hammer2_io_putblk(&dio);
826 }
827
828 /*
829  * Clear dedup validation bits in a DIO.  This is typically done when
830  * a modified chain is destroyed or by the bulkfree code.  No buffer
831  * is needed for this operation.  If the DIO no longer exists it is
832  * equivalent to the bits not being set.
833  */
834 void
835 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
836                         hammer2_off_t data_off, u_int bytes)
837 {
838         hammer2_io_t *dio;
839         uint64_t mask;
840         int isgood;
841
842         if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
843                 return;
844         if (btype != HAMMER2_BREF_TYPE_DATA)
845                 return;
846         dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood);
847         if (dio) {
848                 if (data_off < dio->pbase ||
849                     (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes >
850                     dio->pbase + dio->psize) {
851                         panic("hammer2_io_dedup_delete: DATAOFF BAD "
852                               "%016jx/%d %016jx\n",
853                               data_off, bytes, dio->pbase);
854                 }
855                 mask = hammer2_dedup_mask(dio, data_off, bytes);
856                 atomic_clear_64(&dio->dedup_alloc, mask);
857                 atomic_clear_64(&dio->dedup_valid, mask);
858                 hammer2_io_putblk(&dio);
859         }
860 }
861
862 /*
863  * Assert that dedup allocation bits in a DIO are not set.  This operation
864  * does not require a buffer.  The DIO does not need to exist.
865  */
866 void
867 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes)
868 {
869         hammer2_io_t *dio;
870         int isgood;
871
872         dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA,
873                                0, &isgood);
874         if (dio) {
875                 KASSERT((dio->dedup_alloc &
876                           hammer2_dedup_mask(dio, data_off, bytes)) == 0,
877                         ("hammer2_dedup_assert: %016jx/%d %016jx/%016jx",
878                         data_off,
879                         bytes,
880                         hammer2_dedup_mask(dio, data_off, bytes),
881                         dio->dedup_alloc));
882                 hammer2_io_putblk(&dio);
883         }
884 }
885
886 static
887 void
888 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp)
889 {
890         /*
891         if (bp->b_flags & B_DELWRI)
892                 return;
893         */
894         hammer2_adjwritecounter(dio->btype, dio->psize);
895 }
896
897 void
898 hammer2_io_bkvasync(hammer2_io_t *dio)
899 {
900         KKASSERT(dio->bp != NULL);
901         bkvasync(dio->bp);
902 }
903
904 /*
905  * Ref a dio that is already owned
906  */
907 void
908 _hammer2_io_ref(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
909 {
910         DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
911         atomic_add_64(&dio->refs, 1);
912 }