sys/vfs/hammer2: Adjust some kprintfs in vfsops
[dragonfly.git] / sys / vfs / hammer2 / hammer2_io.c
1 /*
2  * Copyright (c) 2013-2018 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include "hammer2.h"
36
37 #define HAMMER2_DOP_READ        1
38 #define HAMMER2_DOP_NEW         2
39 #define HAMMER2_DOP_NEWNZ       3
40 #define HAMMER2_DOP_READQ       4
41
42 /*
43  * Implements an abstraction layer for synchronous and asynchronous
44  * buffered device I/O.  Can be used as an OS-abstraction but the main
45  * purpose is to allow larger buffers to be used against hammer2_chain's
46  * using smaller allocations, without causing deadlocks.
47  *
48  * The DIOs also record temporary state with limited persistence.  This
49  * feature is used to keep track of dedupable blocks.
50  */
51 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
52 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp);
53
54 static int
55 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
56 {
57         if (io1->pbase < io2->pbase)
58                 return(-1);
59         if (io1->pbase > io2->pbase)
60                 return(1);
61         return(0);
62 }
63
64 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
65 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
66                 off_t, pbase);
67
68 struct hammer2_cleanupcb_info {
69         struct hammer2_io_tree tmptree;
70         int     count;
71 };
72
73 #if 0
74 static __inline
75 uint64_t
76 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
77 {
78         uint64_t mask;
79         int i;
80
81         if (bytes < 1024)       /* smaller chunks not supported */
82                 return 0;
83
84         /*
85          * Calculate crc check mask for larger chunks
86          */
87         i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) &
88              HAMMER2_PBUFMASK) >> 10;
89         if (i == 0 && bytes == HAMMER2_PBUFSIZE)
90                 return((uint64_t)-1);
91         mask = ((uint64_t)1U << (bytes >> 10)) - 1;
92         mask <<= i;
93
94         return mask;
95 }
96 #endif
97
98 #ifdef HAMMER2_IO_DEBUG
99
100 static __inline void
101 DIO_RECORD(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
102 {
103         int i;
104
105         i = atomic_fetchadd_int(&dio->debug_index, 1) & HAMMER2_IO_DEBUG_MASK;
106
107         dio->debug_file[i] = file;
108         dio->debug_line[i] = line;
109         dio->debug_refs[i] = dio->refs;
110         dio->debug_td[i] = curthread;
111 }
112
113 #else
114
115 #define DIO_RECORD(dio)
116
117 #endif
118
119 /*
120  * Returns the DIO corresponding to the data|radix, creating it if necessary.
121  *
122  * If createit is 0, NULL can be returned indicating that the DIO does not
123  * exist.  (btype) is ignored when createit is 0.
124  */
125 static __inline
126 hammer2_io_t *
127 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype,
128                  int createit, int *isgoodp)
129 {
130         hammer2_io_t *dio;
131         hammer2_io_t *xio;
132         hammer2_key_t lbase;
133         hammer2_key_t pbase;
134         hammer2_key_t pmask;
135         uint64_t refs;
136         int lsize;
137         int psize;
138
139         psize = HAMMER2_PBUFSIZE;
140         pmask = ~(hammer2_off_t)(psize - 1);
141         lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX);
142         lbase = data_off & ~HAMMER2_OFF_MASK_RADIX;
143         pbase = lbase & pmask;
144
145         if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
146                 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
147                         pbase, lbase, lsize, pmask);
148         }
149         KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
150         *isgoodp = 0;
151
152         /*
153          * Access/Allocate the DIO, bump dio->refs to prevent destruction.
154          *
155          * If DIO_GOOD is set the ref should prevent it from being cleared
156          * out from under us, we can set *isgoodp, and the caller can operate
157          * on the buffer without any further interaction.
158          */
159         hammer2_spin_sh(&hmp->io_spin);
160         dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
161         if (dio) {
162                 refs = atomic_fetchadd_64(&dio->refs, 1);
163                 if ((refs & HAMMER2_DIO_MASK) == 0) {
164                         atomic_add_int(&dio->hmp->iofree_count, -1);
165                 }
166                 if (refs & HAMMER2_DIO_GOOD)
167                         *isgoodp = 1;
168                 hammer2_spin_unsh(&hmp->io_spin);
169         } else if (createit) {
170                 refs = 0;
171                 hammer2_spin_unsh(&hmp->io_spin);
172                 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
173                 dio->hmp = hmp;
174                 dio->pbase = pbase;
175                 dio->psize = psize;
176                 dio->btype = btype;
177                 dio->refs = refs + 1;
178                 dio->act = 5;
179                 hammer2_spin_ex(&hmp->io_spin);
180                 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
181                 if (xio == NULL) {
182                         atomic_add_int(&hammer2_dio_count, 1);
183                         hammer2_spin_unex(&hmp->io_spin);
184                 } else {
185                         refs = atomic_fetchadd_64(&xio->refs, 1);
186                         if ((refs & HAMMER2_DIO_MASK) == 0)
187                                 atomic_add_int(&xio->hmp->iofree_count, -1);
188                         if (refs & HAMMER2_DIO_GOOD)
189                                 *isgoodp = 1;
190                         hammer2_spin_unex(&hmp->io_spin);
191                         kfree(dio, M_HAMMER2);
192                         dio = xio;
193                 }
194         } else {
195                 hammer2_spin_unsh(&hmp->io_spin);
196                 return NULL;
197         }
198         dio->ticks = ticks;
199         if (dio->act < 10)
200                 ++dio->act;
201
202         return dio;
203 }
204
205 /*
206  * Acquire the requested dio.  If DIO_GOOD is not set we must instantiate
207  * a buffer.  If set the buffer already exists and is good to go.
208  */
209 hammer2_io_t *
210 _hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase,
211                    int lsize, int op HAMMER2_IO_DEBUG_ARGS)
212 {
213         hammer2_io_t *dio;
214         off_t peof;
215         uint64_t orefs;
216         uint64_t nrefs;
217         int isgood;
218         int error;
219         int hce;
220         int bflags;
221
222         bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0);
223         bflags |= B_KVABIO;
224
225         KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
226
227         if (op == HAMMER2_DOP_READQ) {
228                 dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood);
229                 if (dio == NULL)
230                         return NULL;
231                 op = HAMMER2_DOP_READ;
232         } else {
233                 dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood);
234         }
235
236         for (;;) {
237                 orefs = dio->refs;
238                 cpu_ccfence();
239
240                 /*
241                  * Buffer is already good, handle the op and return.
242                  */
243                 if (orefs & HAMMER2_DIO_GOOD) {
244                         if (isgood == 0)
245                                 cpu_mfence();
246                         bkvasync(dio->bp);
247
248                         switch(op) {
249                         case HAMMER2_DOP_NEW:
250                                 bzero(hammer2_io_data(dio, lbase), lsize);
251                                 /* fall through */
252                         case HAMMER2_DOP_NEWNZ:
253                                 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
254                                 break;
255                         case HAMMER2_DOP_READ:
256                         default:
257                                 /* nothing to do */
258                                 break;
259                         }
260                         DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
261                         return (dio);
262                 }
263
264                 /*
265                  * Try to own the DIO
266                  */
267                 if (orefs & HAMMER2_DIO_INPROG) {
268                         nrefs = orefs | HAMMER2_DIO_WAITING;
269                         tsleep_interlock(dio, 0);
270                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
271                                 tsleep(dio, PINTERLOCKED, "h2dio", hz);
272                         }
273                         /* retry */
274                 } else {
275                         nrefs = orefs | HAMMER2_DIO_INPROG;
276                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
277                                 break;
278                         }
279                 }
280         }
281
282         /*
283          * We break to here if GOOD is not set and we acquired INPROG for
284          * the I/O.
285          */
286         KKASSERT(dio->bp == NULL);
287         if (btype == HAMMER2_BREF_TYPE_DATA)
288                 hce = hammer2_cluster_data_read;
289         else
290                 hce = hammer2_cluster_meta_read;
291
292         error = 0;
293         if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) &&
294             dio->psize == lsize) {
295                 switch(op) {
296                 case HAMMER2_DOP_NEW:
297                 case HAMMER2_DOP_NEWNZ:
298                         dio->bp = getblk(dio->hmp->devvp,
299                                          dio->pbase, dio->psize,
300                                          GETBLK_KVABIO, 0);
301                         if (op == HAMMER2_DOP_NEW) {
302                                 bkvasync(dio->bp);
303                                 bzero(dio->bp->b_data, dio->psize);
304                         }
305                         atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
306                         break;
307                 case HAMMER2_DOP_READ:
308                 default:
309                         KKASSERT(dio->bp == NULL);
310                         if (hce > 0) {
311                                 /*
312                                  * Synchronous cluster I/O for now.
313                                  */
314                                 peof = (dio->pbase + HAMMER2_SEGMASK64) &
315                                        ~HAMMER2_SEGMASK64;
316                                 error = cluster_readx(dio->hmp->devvp,
317                                                      peof, dio->pbase,
318                                                      dio->psize, bflags,
319                                                      dio->psize,
320                                                      HAMMER2_PBUFSIZE*hce,
321                                                      &dio->bp);
322                         } else {
323                                 error = breadnx(dio->hmp->devvp, dio->pbase,
324                                                 dio->psize, bflags,
325                                                 NULL, NULL, 0, &dio->bp);
326                         }
327                 }
328         } else {
329                 if (hce > 0) {
330                         /*
331                          * Synchronous cluster I/O for now.
332                          */
333                         peof = (dio->pbase + HAMMER2_SEGMASK64) &
334                                ~HAMMER2_SEGMASK64;
335                         error = cluster_readx(dio->hmp->devvp,
336                                               peof, dio->pbase, dio->psize,
337                                               bflags,
338                                               dio->psize, HAMMER2_PBUFSIZE*hce,
339                                               &dio->bp);
340                 } else {
341                         error = breadnx(dio->hmp->devvp, dio->pbase,
342                                         dio->psize, bflags,
343                                         NULL, NULL, 0, &dio->bp);
344                 }
345                 if (dio->bp) {
346                         /*
347                          * Handle NEW flags
348                          */
349                         switch(op) {
350                         case HAMMER2_DOP_NEW:
351                                 bkvasync(dio->bp);
352                                 bzero(hammer2_io_data(dio, lbase), lsize);
353                                 /* fall through */
354                         case HAMMER2_DOP_NEWNZ:
355                                 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
356                                 break;
357                         case HAMMER2_DOP_READ:
358                         default:
359                                 break;
360                         }
361
362                         /*
363                          * Tell the kernel that the buffer cache is not
364                          * meta-data based on the btype.  This allows
365                          * swapcache to distinguish between data and
366                          * meta-data.
367                          */
368                         switch(btype) {
369                         case HAMMER2_BREF_TYPE_DATA:
370                                 dio->bp->b_flags |= B_NOTMETA;
371                                 break;
372                         default:
373                                 break;
374                         }
375                 }
376         }
377
378         if (dio->bp) {
379                 bkvasync(dio->bp);
380                 BUF_KERNPROC(dio->bp);
381                 dio->bp->b_flags &= ~B_AGE;
382                 /* dio->bp->b_debug_info2 = dio; */
383         }
384         dio->error = error;
385
386         /*
387          * Clear INPROG and WAITING, set GOOD wake up anyone waiting.
388          */
389         for (;;) {
390                 orefs = dio->refs;
391                 cpu_ccfence();
392                 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING);
393                 if (error == 0)
394                         nrefs |= HAMMER2_DIO_GOOD;
395                 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
396                         if (orefs & HAMMER2_DIO_WAITING)
397                                 wakeup(dio);
398                         break;
399                 }
400                 cpu_pause();
401         }
402
403         /* XXX error handling */
404         DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
405
406         return dio;
407 }
408
409 /*
410  * Release our ref on *diop.
411  *
412  * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose
413  * of dio->bp.  Then we clean up DIO_INPROG and DIO_WAITING.
414  */
415 void
416 _hammer2_io_putblk(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
417 {
418         hammer2_dev_t *hmp;
419         hammer2_io_t *dio;
420         struct buf *bp;
421         off_t pbase;
422         int psize;
423         int dio_limit;
424         uint64_t orefs;
425         uint64_t nrefs;
426
427         dio = *diop;
428         *diop = NULL;
429         hmp = dio->hmp;
430         DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
431
432         KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0);
433
434         /*
435          * Drop refs.
436          *
437          * On the 1->0 transition clear GOOD and set INPROG, and break.
438          * On any other transition we can return early.
439          */
440         for (;;) {
441                 orefs = dio->refs;
442                 cpu_ccfence();
443
444                 if ((orefs & HAMMER2_DIO_MASK) == 1 &&
445                     (orefs & HAMMER2_DIO_INPROG) == 0) {
446                         /*
447                          * Lastdrop case, INPROG can be set.  GOOD must be
448                          * cleared to prevent the getblk shortcut.
449                          */
450                         nrefs = orefs - 1;
451                         nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
452                         nrefs |= HAMMER2_DIO_INPROG;
453                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
454                                 break;
455                 } else if ((orefs & HAMMER2_DIO_MASK) == 1) {
456                         /*
457                          * Lastdrop case, INPROG already set.  We must
458                          * wait for INPROG to clear.
459                          */
460                         nrefs = orefs | HAMMER2_DIO_WAITING;
461                         tsleep_interlock(dio, 0);
462                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
463                                 tsleep(dio, PINTERLOCKED, "h2dio", hz);
464                         }
465                         /* retry */
466                 } else {
467                         /*
468                          * Normal drop case.
469                          */
470                         nrefs = orefs - 1;
471                         if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
472                                 return;
473                         /* retry */
474                 }
475                 cpu_pause();
476                 /* retry */
477         }
478
479         /*
480          * Lastdrop (1->0 transition).  INPROG has been set, GOOD and DIRTY
481          * have been cleared.  iofree_count has not yet been incremented,
482          * note that another accessor race will decrement iofree_count so
483          * we have to increment it regardless.
484          *
485          * We can now dispose of the buffer, and should do it before calling
486          * io_complete() in case there's a race against a new reference
487          * which causes io_complete() to chain and instantiate the bp again.
488          */
489         pbase = dio->pbase;
490         psize = dio->psize;
491         bp = dio->bp;
492         dio->bp = NULL;
493
494         if ((orefs & HAMMER2_DIO_GOOD) && bp) {
495                 /*
496                  * Non-errored disposal of bp
497                  */
498                 if (orefs & HAMMER2_DIO_DIRTY) {
499                         dio_write_stats_update(dio, bp);
500
501                         /*
502                          * Allows dirty buffers to accumulate and
503                          * possibly be canceled (e.g. by a 'rm'),
504                          * by default we will burst-write later.
505                          *
506                          * We generally do NOT want to issue an actual
507                          * b[a]write() or cluster_write() here.  Due to
508                          * the way chains are locked, buffers may be cycled
509                          * in and out quite often and disposal here can cause
510                          * multiple writes or write-read stalls.
511                          *
512                          * If FLUSH is set we do want to issue the actual
513                          * write.  This typically occurs in the write-behind
514                          * case when writing to large files.
515                          */
516                         off_t peof;
517                         int hce;
518                         if (dio->refs & HAMMER2_DIO_FLUSH) {
519                                 if ((hce = hammer2_cluster_write) != 0) {
520                                         peof = (pbase + HAMMER2_SEGMASK64) &
521                                                ~HAMMER2_SEGMASK64;
522                                         bp->b_flags |= B_CLUSTEROK;
523                                         cluster_write(bp, peof, psize, hce);
524                                 } else {
525                                         bp->b_flags &= ~B_CLUSTEROK;
526                                         bawrite(bp);
527                                 }
528                         } else {
529                                 bp->b_flags &= ~B_CLUSTEROK;
530                                 bdwrite(bp);
531                         }
532                 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
533                         brelse(bp);
534                 } else {
535                         bqrelse(bp);
536                 }
537         } else if (bp) {
538                 /*
539                  * Errored disposal of bp
540                  */
541                 brelse(bp);
542         }
543
544         /*
545          * Update iofree_count before disposing of the dio
546          */
547         hmp = dio->hmp;
548         atomic_add_int(&hmp->iofree_count, 1);
549
550         /*
551          * Clear INPROG, GOOD, and WAITING (GOOD should already be clear).
552          *
553          * Also clear FLUSH as it was handled above.
554          */
555         for (;;) {
556                 orefs = dio->refs;
557                 cpu_ccfence();
558                 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD |
559                                   HAMMER2_DIO_WAITING | HAMMER2_DIO_FLUSH);
560                 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
561                         if (orefs & HAMMER2_DIO_WAITING)
562                                 wakeup(dio);
563                         break;
564                 }
565                 cpu_pause();
566         }
567
568         /*
569          * We cache free buffers so re-use cases can use a shared lock, but
570          * if too many build up we have to clean them out.
571          */
572         dio_limit = hammer2_dio_limit;
573         if (dio_limit < 256)
574                 dio_limit = 256;
575         if (dio_limit > 1024*1024)
576                 dio_limit = 1024*1024;
577         if (hmp->iofree_count > dio_limit) {
578                 struct hammer2_cleanupcb_info info;
579
580                 RB_INIT(&info.tmptree);
581                 hammer2_spin_ex(&hmp->io_spin);
582                 if (hmp->iofree_count > dio_limit) {
583                         info.count = hmp->iofree_count / 5;
584                         RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
585                                 hammer2_io_cleanup_callback, &info);
586                 }
587                 hammer2_spin_unex(&hmp->io_spin);
588                 hammer2_io_cleanup(hmp, &info.tmptree);
589         }
590 }
591
592 /*
593  * Cleanup any dio's with (INPROG | refs) == 0.
594  *
595  * Called to clean up cached DIOs on umount after all activity has been
596  * flushed.
597  */
598 static
599 int
600 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
601 {
602         struct hammer2_cleanupcb_info *info = arg;
603         hammer2_io_t *xio;
604
605         if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
606                 if (dio->act > 0) {
607                         int act;
608
609                         act = dio->act - (ticks - dio->ticks) / hz - 1;
610                         if (act > 0) {
611                                 dio->act = act;
612                                 return 0;
613                         }
614                         dio->act = 0;
615                 }
616                 KKASSERT(dio->bp == NULL);
617                 if (info->count > 0) {
618                         RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
619                         xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
620                         KKASSERT(xio == NULL);
621                         --info->count;
622                 }
623         }
624         return 0;
625 }
626
627 void
628 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree)
629 {
630         hammer2_io_t *dio;
631
632         while ((dio = RB_ROOT(tree)) != NULL) {
633                 RB_REMOVE(hammer2_io_tree, tree, dio);
634                 KKASSERT(dio->bp == NULL &&
635                     (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
636                 if (dio->refs & HAMMER2_DIO_DIRTY) {
637                         kprintf("hammer2_io_cleanup: Dirty buffer "
638                                 "%016jx/%d (bp=%p)\n",
639                                 dio->pbase, dio->psize, dio->bp);
640                 }
641                 kfree(dio, M_HAMMER2);
642                 atomic_add_int(&hammer2_dio_count, -1);
643                 atomic_add_int(&hmp->iofree_count, -1);
644         }
645 }
646
647 /*
648  * Returns a pointer to the requested data.
649  */
650 char *
651 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
652 {
653         struct buf *bp;
654         int off;
655
656         bp = dio->bp;
657         KKASSERT(bp != NULL);
658         bkvasync(bp);
659         off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
660         KKASSERT(off >= 0 && off < bp->b_bufsize);
661         return(bp->b_data + off);
662 }
663
664 int
665 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
666                hammer2_io_t **diop)
667 {
668         *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW);
669         return ((*diop)->error);
670 }
671
672 int
673 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
674                  hammer2_io_t **diop)
675 {
676         *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ);
677         return ((*diop)->error);
678 }
679
680 int
681 _hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
682                 hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
683 {
684 #ifdef HAMMER2_IO_DEBUG
685         hammer2_io_t *dio;
686 #endif
687
688         *diop = _hammer2_io_getblk(hmp, btype, lbase, lsize,
689                                    HAMMER2_DOP_READ HAMMER2_IO_DEBUG_CALL);
690 #ifdef HAMMER2_IO_DEBUG
691         if ((dio = *diop) != NULL) {
692                 int i = (dio->debug_index - 1) & HAMMER2_IO_DEBUG_MASK;
693                 dio->debug_data[i] = debug_data;
694         }
695 #endif
696         return ((*diop)->error);
697 }
698
699 hammer2_io_t *
700 _hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase,
701                      int lsize HAMMER2_IO_DEBUG_ARGS)
702 {
703         hammer2_io_t *dio;
704
705         dio = _hammer2_io_getblk(hmp, 0, lbase, lsize,
706                                  HAMMER2_DOP_READQ HAMMER2_IO_DEBUG_CALL);
707         return dio;
708 }
709
710 void
711 _hammer2_io_bawrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
712 {
713         atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
714                                       HAMMER2_DIO_FLUSH);
715         _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
716 }
717
718 void
719 _hammer2_io_bdwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
720 {
721         atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
722         _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
723 }
724
725 int
726 _hammer2_io_bwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
727 {
728         atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
729                                       HAMMER2_DIO_FLUSH);
730         _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
731         return (0);     /* XXX */
732 }
733
734 void
735 hammer2_io_setdirty(hammer2_io_t *dio)
736 {
737         atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
738 }
739
740 /*
741  * This routine is called when a MODIFIED chain is being DESTROYED,
742  * in an attempt to allow the related buffer cache buffer to be
743  * invalidated and discarded instead of flushing it to disk.
744  *
745  * At the moment this case is only really useful for file meta-data.
746  * File data is already handled via the logical buffer cache associated
747  * with the vnode, and will be discarded if it was never flushed to disk.
748  * File meta-data may include inodes, directory entries, and indirect blocks.
749  *
750  * XXX
751  * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being
752  * invalidated might be smaller.  Most of the meta-data structures above
753  * are in the 'smaller' category.  For now, don't try to invalidate the
754  * data areas.
755  */
756 void
757 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes)
758 {
759         /* NOP */
760 }
761
762 void
763 _hammer2_io_brelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
764 {
765         _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
766 }
767
768 void
769 _hammer2_io_bqrelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
770 {
771         _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
772 }
773
774 /*
775  * Set dedup validation bits in a DIO.  We do not need the buffer cache
776  * buffer for this.  This must be done concurrent with setting bits in
777  * the freemap so as to interlock with bulkfree's clearing of those bits.
778  */
779 void
780 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref)
781 {
782         hammer2_io_t *dio;
783         uint64_t mask;
784         int lsize;
785         int isgood;
786
787         dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood);
788         lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
789         mask = hammer2_dedup_mask(dio, bref->data_off, lsize);
790         atomic_clear_64(&dio->dedup_valid, mask);
791         atomic_set_64(&dio->dedup_alloc, mask);
792         hammer2_io_putblk(&dio);
793 }
794
795 /*
796  * Clear dedup validation bits in a DIO.  This is typically done when
797  * a modified chain is destroyed or by the bulkfree code.  No buffer
798  * is needed for this operation.  If the DIO no longer exists it is
799  * equivalent to the bits not being set.
800  */
801 void
802 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
803                         hammer2_off_t data_off, u_int bytes)
804 {
805         hammer2_io_t *dio;
806         uint64_t mask;
807         int isgood;
808
809         if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
810                 return;
811         if (btype != HAMMER2_BREF_TYPE_DATA)
812                 return;
813         dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood);
814         if (dio) {
815                 if (data_off < dio->pbase ||
816                     (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes >
817                     dio->pbase + dio->psize) {
818                         panic("hammer2_io_dedup_delete: DATAOFF BAD "
819                               "%016jx/%d %016jx\n",
820                               data_off, bytes, dio->pbase);
821                 }
822                 mask = hammer2_dedup_mask(dio, data_off, bytes);
823                 atomic_clear_64(&dio->dedup_alloc, mask);
824                 atomic_clear_64(&dio->dedup_valid, mask);
825                 hammer2_io_putblk(&dio);
826         }
827 }
828
829 /*
830  * Assert that dedup allocation bits in a DIO are not set.  This operation
831  * does not require a buffer.  The DIO does not need to exist.
832  */
833 void
834 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes)
835 {
836         hammer2_io_t *dio;
837         int isgood;
838
839         dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA,
840                                0, &isgood);
841         if (dio) {
842                 KASSERT((dio->dedup_alloc &
843                           hammer2_dedup_mask(dio, data_off, bytes)) == 0,
844                         ("hammer2_dedup_assert: %016jx/%d %016jx/%016jx",
845                         data_off,
846                         bytes,
847                         hammer2_dedup_mask(dio, data_off, bytes),
848                         dio->dedup_alloc));
849                 hammer2_io_putblk(&dio);
850         }
851 }
852
853 static
854 void
855 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp)
856 {
857         if (bp->b_flags & B_DELWRI)
858                 return;
859         hammer2_adjwritecounter(dio->btype, dio->psize);
860 }
861
862 void
863 hammer2_io_bkvasync(hammer2_io_t *dio)
864 {
865         KKASSERT(dio->bp != NULL);
866         bkvasync(dio->bp);
867 }
868
869 /*
870  * Ref a dio that is already owned
871  */
872 void
873 _hammer2_io_ref(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
874 {
875         DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
876         atomic_add_64(&dio->refs, 1);
877 }