hammer2 - Implement variable-sized indirect blocks, clustered reads
[dragonfly.git] / sys / vfs / hammer2 / hammer2_chain.c
1 /*
2  * Copyright (c) 2011-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 /*
36  * This subsystem handles direct and indirect block searches, recursions,
37  * creation, and deletion.  Chains of blockrefs are tracked and modifications
38  * are flag for propagation... eventually all the way back to the volume
39  * header.
40  */
41
42 #include <sys/cdefs.h>
43 #include <sys/cdefs.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/types.h>
47 #include <sys/lock.h>
48 #include <sys/uuid.h>
49
50 #include "hammer2.h"
51
52 SPLAY_GENERATE(hammer2_chain_splay, hammer2_chain, snode, hammer2_chain_cmp);
53
54 static int hammer2_indirect_optimize;   /* XXX SYSCTL */
55
56 static hammer2_chain_t *hammer2_chain_create_indirect(
57                         hammer2_mount_t *hmp, hammer2_chain_t *parent,
58                         hammer2_key_t key, int keybits);
59
60 /*
61  * Compare function for chain splay tree
62  */
63 int
64 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2)
65 {
66         return(chain2->index - chain1->index);
67 }
68
69 /*
70  * Allocate a new disconnected chain element representing the specified
71  * bref.  The chain element is locked exclusively and refs is set to 1.
72  *
73  * This essentially allocates a system memory structure representing one
74  * of the media structure types, including inodes.
75  */
76 hammer2_chain_t *
77 hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_blockref_t *bref)
78 {
79         hammer2_chain_t *chain;
80         hammer2_inode_t *ip;
81         hammer2_indblock_t *np;
82         hammer2_data_t *dp;
83         u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
84
85         /*
86          * Construct the appropriate system structure.
87          */
88         switch(bref->type) {
89         case HAMMER2_BREF_TYPE_INODE:
90                 ip = kmalloc(sizeof(*ip), hmp->minode, M_WAITOK | M_ZERO);
91                 chain = &ip->chain;
92                 chain->u.ip = ip;
93                 lockinit(&chain->lk, "inode", 0, LK_CANRECURSE);
94                 ip->hmp = hmp;
95                 break;
96         case HAMMER2_BREF_TYPE_INDIRECT:
97                 if (bytes == HAMMER2_PBUFSIZE) {
98                         np = kmalloc(sizeof(*np),
99                                      hmp->mchain,
100                                      M_WAITOK | M_ZERO);
101                         np->is_embedded = 0;
102                 } else {
103                         np = kmalloc(offsetof(struct hammer2_indblock,
104                                               buf[bytes]),
105                                      hmp->mchain,
106                                      M_WAITOK | M_ZERO);
107                         np->is_embedded = 1;
108                 }
109                 chain = &np->chain;
110                 chain->u.np = np;
111                 lockinit(&chain->lk, "iblk", 0, LK_CANRECURSE);
112                 break;
113         case HAMMER2_BREF_TYPE_DATA:
114                 dp = kmalloc(sizeof(*dp), hmp->mchain, M_WAITOK | M_ZERO);
115                 chain = &dp->chain;
116                 chain->u.dp = dp;
117                 lockinit(&chain->lk, "dblk", 0, LK_CANRECURSE);
118                 break;
119         case HAMMER2_BREF_TYPE_VOLUME:
120                 chain = NULL;
121                 panic("hammer2_chain_alloc volume type illegal for op");
122         default:
123                 chain = NULL;
124                 panic("hammer2_chain_alloc: unrecognized blockref type: %d",
125                       bref->type);
126         }
127         chain->bref = *bref;
128         chain->index = -1;      /* not yet assigned */
129         chain->refs = 1;
130         chain->bytes = bytes;
131         lockmgr(&chain->lk, LK_EXCLUSIVE);
132
133         return (chain);
134 }
135
136 /*
137  * Free a disconnected chain element
138  */
139 void
140 hammer2_chain_free(hammer2_mount_t *hmp, hammer2_chain_t *chain)
141 {
142         void *mem;
143
144         KKASSERT(chain->bp == NULL);
145         KKASSERT(chain->data == NULL);
146         KKASSERT(chain->bref.type != HAMMER2_BREF_TYPE_INODE ||
147                  chain->u.ip->vp == NULL);
148
149         if ((mem = chain->u.mem) != NULL) {
150                 chain->u.mem = NULL;
151                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
152                         kfree(mem, hmp->minode);
153                 else
154                         kfree(mem, hmp->mchain);
155         }
156 }
157
158 /*
159  * Add a reference to a chain element (for shared access).  The chain
160  * element must already have at least 1 ref controlled by the caller.
161  */
162 void
163 hammer2_chain_ref(hammer2_mount_t *hmp, hammer2_chain_t *chain)
164 {
165         KKASSERT(chain->refs > 0);
166         atomic_add_int(&chain->refs, 1);
167 }
168
169 /*
170  * Drop the callers reference to the chain element.  If the ref count
171  * reaches zero the chain element and its related structure (typically an
172  * inode or indirect block) will be freed and the parent will be
173  * recursively dropped.
174  *
175  * Modified elements hold an additional reference so it should not be
176  * possible for the count on a modified element to drop to 0.
177  *
178  * The chain element must NOT be locked by the caller.
179  *
180  * The parent might or might not be locked by the caller but if so it
181  * will also be referenced so we shouldn't recurse upward.
182  */
183 void
184 hammer2_chain_drop(hammer2_mount_t *hmp, hammer2_chain_t *chain)
185 {
186         hammer2_chain_t *parent;
187         u_int refs;
188
189         while (chain) {
190                 refs = chain->refs;
191                 cpu_ccfence();
192                 KKASSERT(refs > 0);
193                 if (refs == 1) {
194                         KKASSERT(chain != &hmp->vchain);
195                         parent = chain->parent;
196                         if (parent)
197                                 lockmgr(&parent->lk, LK_EXCLUSIVE);
198                         if (atomic_cmpset_int(&chain->refs, 1, 0)) {
199                                 /*
200                                  * Succeeded, recurse and drop parent
201                                  */
202                                 if (!(chain->flags & HAMMER2_CHAIN_DELETED)) {
203                                         SPLAY_REMOVE(hammer2_chain_splay,
204                                                      &parent->shead, chain);
205                                         atomic_set_int(&chain->flags,
206                                                        HAMMER2_CHAIN_DELETED);
207                                         /* parent refs dropped via recursion */
208                                 }
209                                 chain->parent = NULL;
210                                 if (parent)
211                                         lockmgr(&parent->lk, LK_RELEASE);
212                                 hammer2_chain_free(hmp, chain);
213                                 chain = parent;
214                                 /* recurse on parent */
215                         } else {
216                                 if (parent)
217                                         lockmgr(&parent->lk, LK_RELEASE);
218                                 /* retry the same chain */
219                         }
220                 } else {
221                         if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) {
222                                 /*
223                                  * Succeeded, count did not reach zero so
224                                  * cut out of the loop.
225                                  */
226                                 break;
227                         }
228                         /* retry the same chain */
229                 }
230         }
231 }
232
233 /*
234  * Lock a chain element, acquiring its data with I/O if necessary.
235  *
236  * Returns 0 on success or an error code if the data could not be acquired.
237  * The chain element is locked either way.
238  *
239  * chain->data will be pointed either at the embedded data (e.g. for
240  * inodes), in which case the buffer cache buffer is released, or will
241  * point into the bp->b_data buffer with the bp left intact while locked.
242  */
243 int
244 hammer2_chain_lock(hammer2_mount_t *hmp, hammer2_chain_t *chain)
245 {
246         hammer2_blockref_t *bref;
247         hammer2_off_t off_hi;
248         size_t off_lo;
249         int error;
250         void *data;
251
252         /*
253          * Lock the element.  Under certain conditions this might end up
254          * being a recursive lock.
255          */
256         KKASSERT(chain->refs > 0);
257         lockmgr(&chain->lk, LK_EXCLUSIVE);
258
259         /*
260          * The volume header is a special case
261          */
262         if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME)
263                 return(0);
264
265         /*
266          * bp must be NULL, so if the data pointer is valid here it points
267          * to embedded data and no I/O is necessary (whether modified or not).
268          */
269         KKASSERT(chain->bp == NULL);
270         if (chain->data)
271                 return (0);
272
273         /*
274          * If data is NULL we must issue I/O.  Any error returns the error
275          * code but leaves the chain locked.
276          *
277          * If the chain was modified a new bref will have already been
278          * allocated and its related bp is probably still sitting in the
279          * buffer cache.
280          */
281         bref = &chain->bref;
282
283         off_hi = bref->data_off & HAMMER2_OFF_MASK_HI;
284         off_lo = (size_t)bref->data_off & HAMMER2_OFF_MASK_LO;
285         KKASSERT(off_hi != 0);
286         error = cluster_read(hmp->devvp,
287                              hmp->voldata.volu_size, off_hi,
288                              HAMMER2_PBUFSIZE,
289                              HAMMER2_PBUFSIZE, HAMMER2_PBUFSIZE*8,
290                              &chain->bp);
291
292         /*
293          * Even though this can be synthesized from bref->data_off we
294          * store it in the in-memory chain structure for convenience.
295          */
296         if (chain->bytes !=
297             (1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX))) {
298                 panic("hammer2_chain_lock: chain->bytes mismatch");
299         }
300
301         if (error) {
302                 kprintf("hammer2_chain_get: I/O error %016jx: %d\n",
303                         (intmax_t)off_hi, error);
304                 bqrelse(chain->bp);
305                 chain->bp = NULL;
306                 return (error);
307         }
308
309         /*
310          * Setup the data pointer, either pointing it to an embedded data
311          * structure and copying the data from the buffer, or pointint it
312          * into the buffer.
313          *
314          * The buffer is not retained when copying to an embedded data
315          * structure in order to avoid potential deadlocks or recursions
316          * on the same physical buffer.
317          */
318         switch (bref->type) {
319         case HAMMER2_BREF_TYPE_VOLUME:
320                 /*
321                  * Copy data from bp to embedded buffer
322                  */
323                 KKASSERT(0);    /* not yet - have mount use this soon */
324                 KKASSERT(off_hi == 0);
325                 bcopy((char *)chain->bp->b_data + off_lo,
326                       &hmp->voldata, HAMMER2_PBUFSIZE);
327                 chain->data = (void *)&hmp->voldata;
328                 bqrelse(chain->bp);
329                 chain->bp = NULL;
330                 break;
331         case HAMMER2_BREF_TYPE_INODE:
332                 /*
333                  * Copy data from bp to embedded buffer.
334                  */
335                 bcopy((char *)chain->bp->b_data + off_lo,
336                       &chain->u.ip->ip_data,
337                       HAMMER2_INODE_BYTES);
338                 chain->data = (void *)&chain->u.ip->ip_data;
339                 bqrelse(chain->bp);
340                 chain->bp = NULL;
341                 break;
342         case HAMMER2_BREF_TYPE_INDIRECT:
343                 /*
344                  * Indirect node data may or may not be embedded depending
345                  * on how much there is.
346                  */
347                 if (chain->u.np->is_embedded) {
348                         bcopy((char *)chain->bp->b_data + off_lo,
349                               chain->u.np->buf,
350                               chain->bytes);
351                         chain->data = (void *)&chain->u.np->buf[0];
352                         bqrelse(chain->bp);
353                         chain->bp = NULL;
354                         break;
355                 }
356                 /* fall through */
357         default:
358                 /*
359                  * Leave bp intact
360                  */
361                 data = (char *)chain->bp->b_data + off_lo;
362                 chain->data = data;
363                 break;
364         }
365         return (0);
366 }
367
368 /*
369  * Resize the chain's physical storage allocation.  Chains can be resized
370  * smaller without reallocating the storage.  Resizing larger will reallocate
371  * the storage.
372  */
373 void
374 hammer2_chain_resize(hammer2_mount_t *hmp, hammer2_chain_t *chain, int nradix)
375 {
376         hammer2_chain_t *parent;
377         struct buf *nbp;
378         size_t obytes;
379         size_t nbytes;
380         void *ndata;
381         int error;
382
383         /*
384          * Only data blocks can be resized for now
385          */
386         KKASSERT(chain != &hmp->vchain);
387         KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA ||
388                  chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT);
389
390         /*
391          * Nothing to do if the element is already the proper size
392          */
393         obytes = chain->bytes;
394         nbytes = 1 << nradix;
395         if (obytes == nbytes)
396                 return;
397
398         /*
399          * A deleted inode may still be active but unreachable via sync
400          * because it has been disconnected from the tree.  Do not allow
401          * deleted inodes to be marked as being modified because this will
402          * bump the refs and never get resolved by the sync, leaving the
403          * inode structure allocated after umount.
404          */
405         if ((chain->flags & HAMMER2_CHAIN_DELETED) &&
406             chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
407                 KKASSERT(chain->data != NULL);
408                 return;
409         }
410
411         /*
412          * Set MODIFIED1 and add a chain ref to prevent destruction.  Both
413          * modified flags share the same ref.
414          */
415         atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
416         if ((chain->flags & HAMMER2_CHAIN_MODIFIED2) == 0)
417                 hammer2_chain_ref(hmp, chain);
418
419         if (nbytes < obytes) {
420                 /*
421                  * If we are making it smaller we don't have to reallocate
422                  * the block.
423                  */
424                 chain->bref.data_off &= ~ HAMMER2_OFF_MASK_RADIX;
425                 chain->bref.data_off |= (nradix & HAMMER2_OFF_MASK_RADIX);
426                 chain->bytes = nbytes;
427         } else {
428                 /*
429                  * Otherwise we do
430                  */
431                 if (chain != &hmp->vchain) {
432                         chain->bref.data_off =
433                           hammer2_freemap_alloc(hmp, chain->bref.type, nbytes);
434                         chain->bytes = nbytes;
435                 }
436                 /* XXX failed allocation */
437
438                 switch(chain->bref.type) {
439                 case HAMMER2_BREF_TYPE_VOLUME:          /* embedded */
440                 case HAMMER2_BREF_TYPE_INODE:           /* embedded */
441                         /*
442                          * data points to embedded structure, no copy needed
443                          */
444                         error = 0;
445                         break;
446                 case HAMMER2_BREF_TYPE_INDIRECT:
447                         panic("hammer2_chain_resize: "
448                               "cannot resize indirect block");
449                         /* NOT REACHED */
450                         break;
451                 case HAMMER2_BREF_TYPE_DATA:
452                         /*
453                          * data (if not NULL) points into original bp or
454                          * to embedded data.  Copy-on-write to new block.
455                          */
456                         KKASSERT(chain != &hmp->vchain);        /* safety */
457                         if (nbytes == HAMMER2_PBUFSIZE) {
458                                 nbp = getblk(hmp->devvp,
459                                              chain->bref.data_off &
460                                               HAMMER2_OFF_MASK_HI,
461                                              HAMMER2_PBUFSIZE, 0, 0);
462                                 vfs_bio_clrbuf(nbp);
463                                 error = 0;
464                         } else {
465                                 error = bread(hmp->devvp,
466                                              chain->bref.data_off &
467                                               HAMMER2_OFF_MASK_HI,
468                                              HAMMER2_PBUFSIZE, &nbp);
469                                 KKASSERT(error == 0);/* XXX handle error */
470                         }
471
472                         /*
473                          * The new block may be smaller or larger than the
474                          * old block, only copy what fits.
475                          */
476                         ndata = nbp->b_data + (chain->bref.data_off &
477                                                HAMMER2_OFF_MASK_LO);
478                         if (chain->data) {
479                                 if (nbytes < obytes)
480                                         bcopy(chain->data, ndata, nbytes);
481                                 else
482                                         bcopy(chain->data, ndata, obytes);
483                                 KKASSERT(chain->bp != NULL);
484                                 bqrelse(chain->bp);
485                         }
486                         chain->bp = nbp;
487                         chain->data = ndata;
488                         break;
489                 default:
490                         panic("hammer2_chain_modify: unknown bref type");
491                         break;
492
493                 }
494         }
495
496         /*
497          * Recursively mark the parent chain elements so flushes can find
498          * modified elements.
499          *
500          * NOTE: The flush code will modify a SUBMODIFIED-flagged chain
501          *       during the flush recursion after clearing the parent's
502          *       SUBMODIFIED bit.  We don't want to re-set the parent's
503          *       SUBMODIFIED bit in this case!
504          */
505         if ((chain->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
506                 parent = chain->parent;
507                 while (parent &&
508                        (parent->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
509                         atomic_set_int(&parent->flags,
510                                        HAMMER2_CHAIN_SUBMODIFIED);
511                         parent = parent->parent;
512                 }
513         }
514 }
515
516 /*
517  * Convert a locked chain that was retrieved read-only to read-write.
518  *
519  * If not already marked modified a new physical block will be allocated
520  * and assigned to the bref.
521  *
522  * allocated->modified (without calling hammer2_chain_lock()) results
523  * in chain->data typically being NULL.  In this situation chain->data
524  * is assigned and the target area is zero'd out.
525  *
526  * If the data is pointing into a bp it will be relocated to a new bp.
527  * If the data is embedded we leave it alone for now.
528  */
529 void
530 hammer2_chain_modify(hammer2_mount_t *hmp, hammer2_chain_t *chain)
531 {
532         hammer2_chain_t *parent;
533         struct buf *nbp;
534         void *ndata;
535         int error;
536
537         /*
538          * If the chain is already marked MODIFIED1 we can just return.
539          */
540         if (chain->flags & HAMMER2_CHAIN_MODIFIED1) {
541                 KKASSERT(chain->data != NULL);
542                 return;
543         }
544
545         /*
546          * A deleted inode may still be active but unreachable via sync
547          * because it has been disconnected from the tree.  Do not allow
548          * deleted inodes to be marked as being modified because this will
549          * bump the refs and never get resolved by the sync, leaving the
550          * inode structure allocated after umount.
551          */
552         if ((chain->flags & HAMMER2_CHAIN_DELETED) &&
553             chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
554                 KKASSERT(chain->data != NULL);
555                 return;
556         }
557
558         /*
559          * Set MODIFIED1 and add a chain ref to prevent destruction.  Both
560          * modified flags share the same ref.
561          */
562         atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
563         if ((chain->flags & HAMMER2_CHAIN_MODIFIED2) == 0)
564                 hammer2_chain_ref(hmp, chain);
565
566         /*
567          * We must allocate the copy-on-write block.
568          *
569          * If the data is embedded no other action is required.
570          *
571          * If the data is not embedded we acquire and clear the
572          * new block.  If chain->data is not NULL we then do the
573          * copy-on-write.  chain->data will then be repointed to the new
574          * buffer and the old buffer will be released.
575          *
576          * For newly created elements with no prior allocation we go
577          * through the copy-on-write steps except without the copying part.
578          */
579         if (chain != &hmp->vchain) {
580                 if (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)
581                         kprintf("Replace %d\n", chain->bytes);
582                 chain->bref.data_off =
583                     hammer2_freemap_alloc(hmp, chain->bref.type, chain->bytes);
584                 /* XXX failed allocation */
585         }
586
587         switch(chain->bref.type) {
588         case HAMMER2_BREF_TYPE_VOLUME:          /* embedded */
589         case HAMMER2_BREF_TYPE_INODE:           /* embedded */
590                 /*
591                  * Inode and Volume data already points to the embedded
592                  * structure, no copy is needed
593                  */
594                 error = 0;
595                 break;
596         case HAMMER2_BREF_TYPE_INDIRECT:
597                 /*
598                  * If the indirect data is embedded we just leave it
599                  * in its embedded space, otherwise fall-through to
600                  * the bp-handling code.
601                  *
602                  * If this is a newly allocated block chain->data will
603                  * be NULL, so make sure it is properly assigned.  In
604                  * this case the embedded space has already been zero'd
605                  * by the kmalloc().
606                  */
607                 if (chain->u.np->is_embedded) {
608                         chain->data = (void *)&chain->u.np->buf[0];
609                         error = 0;
610                         break;
611                 }
612                 /* fallthrough */
613         case HAMMER2_BREF_TYPE_DATA:
614                 /*
615                  * data (if not NULL) points into original bp or to embedded
616                  * data, copy-on-write to the new block.
617                  *
618                  * data (if NULL) indicates that no prior copy exists, the
619                  * storage must be zero'd.
620                  */
621                 KKASSERT(chain != &hmp->vchain);        /* safety */
622                 if (chain->bytes == HAMMER2_PBUFSIZE) {
623                         nbp = getblk(hmp->devvp,
624                                      chain->bref.data_off & HAMMER2_OFF_MASK_HI,
625                                      HAMMER2_PBUFSIZE, 0, 0);
626                         /*
627                          * XXX want to set B_CACHE but not bother to
628                          * zero because it will be zero'd below?
629                          */
630                         vfs_bio_clrbuf(nbp);
631                         error = 0;
632                 } else {
633                         error = bread(hmp->devvp,
634                                      chain->bref.data_off & HAMMER2_OFF_MASK_HI,
635                                      HAMMER2_PBUFSIZE, &nbp);
636                         KKASSERT(error == 0);/* XXX handle error */
637                 }
638
639                 /*
640                  * Copy or zero-fill on write depending on whether
641                  * chain->data exists or not.
642                  */
643                 ndata = nbp->b_data + (chain->bref.data_off &
644                                        HAMMER2_OFF_MASK_LO);
645                 if (chain->data) {
646                         bcopy(chain->data, ndata, chain->bytes);
647                         KKASSERT(chain->bp != NULL);
648                         bqrelse(chain->bp);
649                 } else {
650                         bzero(ndata, chain->bytes);
651                 }
652                 chain->bp = nbp;
653                 chain->data = ndata;
654                 break;
655         default:
656                 panic("hammer2_chain_modify: unknown bref type");
657                 break;
658
659         }
660
661         /*
662          * Recursively mark the parent chain elements so flushes can find
663          * modified elements.
664          *
665          * NOTE: The flush code will modify a SUBMODIFIED-flagged chain
666          *       during the flush recursion after clearing the parent's
667          *       SUBMODIFIED bit.  We don't want to re-set the parent's
668          *       SUBMODIFIED bit in this case!
669          */
670         if ((chain->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
671                 parent = chain->parent;
672                 while (parent &&
673                        (parent->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
674                         atomic_set_int(&parent->flags,
675                                        HAMMER2_CHAIN_SUBMODIFIED);
676                         parent = parent->parent;
677                 }
678         }
679 }
680
681 /*
682  * Unlock a chain element without dropping its reference count.
683  * (see hammer2_chain_put() to do both).
684  *
685  * Non-embedded data references (chain->bp != NULL) are returned to the
686  * system and the data field is cleared in that case.  If modified the
687  * dirty buffer is still returned to the system, can be flushed to disk by
688  * the system at any time, and will be reconstituted/re-read as needed.
689  */
690 void
691 hammer2_chain_unlock(hammer2_mount_t *hmp, hammer2_chain_t *chain)
692 {
693         if (chain->bp) {
694                 chain->data = NULL;
695                 if (chain->flags & HAMMER2_CHAIN_MODIFIED1) {
696                         if (chain->flags & HAMMER2_CHAIN_IOFLUSH) {
697                                 atomic_clear_int(&chain->flags,
698                                                  HAMMER2_CHAIN_IOFLUSH);
699                                 bdwrite(chain->bp);
700                         } else {
701                                 bdwrite(chain->bp);
702                         }
703                 } else {
704                         /* bp might still be dirty */
705                         bqrelse(chain->bp);
706                 }
707                 chain->bp = NULL;
708         }
709         lockmgr(&chain->lk, LK_RELEASE);
710 }
711
712 /*
713  * Locate an in-memory chain.  The parent must be locked.  The in-memory
714  * chain is returned or NULL if no in-memory chain is present.
715  *
716  * NOTE: A chain on-media might exist for this index when NULL is returned.
717  */
718 hammer2_chain_t *
719 hammer2_chain_find(hammer2_mount_t *hmp, hammer2_chain_t *parent, int index)
720 {
721         hammer2_chain_t dummy;
722         hammer2_chain_t *chain;
723
724         dummy.index = index;
725         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead, &dummy);
726         return (chain);
727 }
728
729 /*
730  * Return a locked chain structure with all associated data acquired.
731  *
732  * Caller must lock the parent on call, the returned child will be locked.
733  */
734 hammer2_chain_t *
735 hammer2_chain_get(hammer2_mount_t *hmp, hammer2_chain_t *parent,
736                   int index, int flags)
737 {
738         hammer2_blockref_t *bref;
739         hammer2_chain_t *chain;
740         hammer2_chain_t dummy;
741
742         /*
743          * First see if we have a (possibly modified) chain element cached
744          * for this (parent, index).  Acquire the data if necessary.
745          *
746          * If chain->data is non-NULL the chain should already be marked
747          * modified.
748          */
749         dummy.index = index;
750         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead, &dummy);
751         if (chain) {
752                 hammer2_chain_ref(hmp, chain);
753                 if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
754                         hammer2_chain_lock(hmp, chain);
755                 return(chain);
756         }
757
758         /*
759          * Otherwise lookup the bref and issue I/O (switch on the parent)
760          */
761         switch(parent->bref.type) {
762         case HAMMER2_BREF_TYPE_INODE:
763                 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
764                 bref = &parent->data->ipdata.u.blockset.blockref[index];
765                 break;
766         case HAMMER2_BREF_TYPE_INDIRECT:
767                 KKASSERT(index >= 0 &&
768                          index < parent->bytes / sizeof(hammer2_blockref_t));
769                 bref = &parent->data->npdata.blockref[index];
770                 break;
771         case HAMMER2_BREF_TYPE_VOLUME:
772                 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
773                 bref = &hmp->voldata.sroot_blockset.blockref[index];
774                 break;
775         default:
776                 bref = NULL;
777                 panic("hammer2_chain_get: unrecognized blockref type: %d",
778                       parent->bref.type);
779         }
780
781         /*
782          * Allocate a chain structure representing the existing media
783          * entry.  Thus the chain is *not* INITIAL and certainly not
784          * MODIFIED (yet).
785          */
786         chain = hammer2_chain_alloc(hmp, bref);
787
788         /*
789          * Link the chain into its parent.  Caller is expected to hold an
790          * exclusive lock on the parent.
791          */
792         chain->parent = parent;
793         chain->index = index;
794         if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain))
795                 panic("hammer2_chain_link: collision");
796         KKASSERT(parent->refs > 1);
797         atomic_add_int(&parent->refs, 1);       /* for splay entry */
798
799         /*
800          * Additional linkage for inodes.  Reuse the parent pointer to
801          * find the parent directory.
802          */
803         if (bref->type == HAMMER2_BREF_TYPE_INODE) {
804                 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT)
805                         parent = parent->parent;
806                 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE)
807                         chain->u.ip->pip = parent->u.ip;
808         }
809
810         /*
811          * Our new chain structure has already been referenced and locked
812          * but the lock code handles the I/O so call it to resolve the data.
813          * Then release one of our two exclusive locks.
814          *
815          * If NOLOCK is set the release will release the one-and-only lock.
816          */
817         if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
818                 hammer2_chain_lock(hmp, chain);
819         lockmgr(&chain->lk, LK_RELEASE);
820
821         return (chain);
822 }
823
824 /*
825  * Unlock and dereference a chain after use.  It is possible for this to
826  * recurse up the chain.
827  */
828 void
829 hammer2_chain_put(hammer2_mount_t *hmp, hammer2_chain_t *chain)
830 {
831         hammer2_chain_unlock(hmp, chain);
832         hammer2_chain_drop(hmp, chain);
833 }
834
835 /*
836  * Locate any key between key_beg and key_end inclusive.  (*parentp)
837  * typically points to an inode but can also point to a related indirect
838  * block and this function will recurse upwards and find the inode again.
839  *
840  * WARNING!  THIS DOES NOT RETURN KEYS IN LOGICAL KEY ORDER!  ANY KEY
841  *           WITHIN THE RANGE CAN BE RETURNED.  HOWEVER, AN ITERATION
842  *           WHICH PICKS UP WHERE WE LEFT OFF WILL CONTINUE THE SCAN.
843  *
844  * (*parentp) must be exclusively locked and referenced and can be an inode
845  * or an existing indirect block within the inode.
846  *
847  * On return (*parentp) will be modified to point at the deepest parent chain
848  * element encountered during the search, as a helper for an insertion or
849  * deletion.   The new (*parentp) will be locked and referenced and the old
850  * will be unlocked and dereferenced (no change if they are both the same).
851  *
852  * The matching chain will be returned exclusively locked and referenced.
853  *
854  * NULL is returned if no match was found, but (*parentp) will still
855  * potentially be adjusted.
856  *
857  * This function will also recurse up the chain if the key is not within the
858  * current parent's range.  (*parentp) can never be set to NULL.  An iteration
859  * can simply allow (*parentp) to float inside the loop.
860  */
861 hammer2_chain_t *
862 hammer2_chain_lookup(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
863                      hammer2_key_t key_beg, hammer2_key_t key_end,
864                      int flags)
865 {
866         hammer2_chain_t *parent;
867         hammer2_chain_t *chain;
868         hammer2_chain_t *tmp;
869         hammer2_blockref_t *base;
870         hammer2_blockref_t *bref;
871         hammer2_key_t scan_beg;
872         hammer2_key_t scan_end;
873         int count = 0;
874         int i;
875
876         /*
877          * Recurse (*parentp) upward if necessary until the parent completely
878          * encloses the key range or we hit the inode.
879          */
880         parent = *parentp;
881         while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
882                 scan_beg = parent->bref.key;
883                 scan_end = scan_beg +
884                            ((hammer2_key_t)1 << parent->bref.keybits) - 1;
885                 if (key_beg >= scan_beg && key_end <= scan_end)
886                         break;
887                 hammer2_chain_unlock(hmp, parent);
888                 parent = parent->parent;
889                 hammer2_chain_ref(hmp, parent);         /* ref new parent */
890                 hammer2_chain_lock(hmp, parent);        /* lock new parent */
891                 hammer2_chain_drop(hmp, *parentp);      /* drop old parent */
892                 *parentp = parent;                      /* new parent */
893         }
894
895 again:
896         /*
897          * Locate the blockref array.  Currently we do a fully associative
898          * search through the array.
899          */
900         switch(parent->bref.type) {
901         case HAMMER2_BREF_TYPE_INODE:
902                 /*
903                  * Special shortcut for embedded data returns the inode
904                  * itself.  Callers must detect this condition and access
905                  * the embedded data (the strategy code does this for us).
906                  *
907                  * This is only applicable to regular files and softlinks.
908                  */
909                 if (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) {
910                         hammer2_chain_ref(hmp, parent);
911                         if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
912                                 hammer2_chain_lock(hmp, parent);
913                         return (parent);
914                 }
915                 base = &parent->data->ipdata.u.blockset.blockref[0];
916                 count = HAMMER2_SET_COUNT;
917                 break;
918         case HAMMER2_BREF_TYPE_INDIRECT:
919                 if (parent->data == NULL)
920                         panic("parent->data is NULL");
921                 base = &parent->data->npdata.blockref[0];
922                 count = parent->bytes / sizeof(hammer2_blockref_t);
923                 break;
924         case HAMMER2_BREF_TYPE_VOLUME:
925                 base = &hmp->voldata.sroot_blockset.blockref[0];
926                 count = HAMMER2_SET_COUNT;
927                 break;
928         default:
929                 panic("hammer2_chain_lookup: unrecognized blockref type: %d",
930                       parent->bref.type);
931                 base = NULL;    /* safety */
932                 count = 0;      /* safety */
933         }
934
935         /*
936          * If the element and key overlap we use the element.
937          */
938         bref = NULL;
939         for (i = 0; i < count; ++i) {
940                 tmp = hammer2_chain_find(hmp, parent, i);
941                 bref = (tmp) ? &tmp->bref : &base[i];
942                 if (bref->type == 0)
943                         continue;
944                 scan_beg = bref->key;
945                 scan_end = scan_beg + ((hammer2_key_t)1 << bref->keybits) - 1;
946                 if (key_beg <= scan_end && key_end >= scan_beg)
947                         break;
948         }
949         if (i == count) {
950                 if (key_beg == key_end)
951                         return (NULL);
952                 return (hammer2_chain_next(hmp, parentp, NULL,
953                                            key_beg, key_end, flags));
954         }
955
956         /*
957          * Acquire the new chain element.  If the chain element is an
958          * indirect block we must search recursively.
959          */
960         chain = hammer2_chain_get(hmp, parent, i, flags);
961         if (chain == NULL)
962                 return (NULL);
963
964         /*
965          * If the chain element is an indirect block it becomes the new
966          * parent and we loop on it.  We must fixup the chain we loop on
967          * if the caller passed flags to us that aren't sufficient for our
968          * needs.
969          */
970         if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
971                 hammer2_chain_put(hmp, parent);
972                 *parentp = parent = chain;
973                 if (flags & HAMMER2_LOOKUP_NOLOCK)
974                         hammer2_chain_lock(hmp, chain);
975                 goto again;
976         }
977
978         /*
979          * All done, return chain
980          */
981         return (chain);
982 }
983
984 /*
985  * After having issued a lookup we can iterate all matching keys.
986  *
987  * If chain is non-NULL we continue the iteration from just after it's index.
988  *
989  * If chain is NULL we assume the parent was exhausted and continue the
990  * iteration at the next parent.
991  */
992 hammer2_chain_t *
993 hammer2_chain_next(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
994                    hammer2_chain_t *chain,
995                    hammer2_key_t key_beg, hammer2_key_t key_end,
996                    int flags)
997 {
998         hammer2_chain_t *parent;
999         hammer2_chain_t *tmp;
1000         hammer2_blockref_t *base;
1001         hammer2_blockref_t *bref;
1002         hammer2_key_t scan_beg;
1003         hammer2_key_t scan_end;
1004         int i;
1005         int count;
1006
1007         parent = *parentp;
1008
1009 again:
1010         /*
1011          * Calculate the next index and recalculate the parent if necessary.
1012          */
1013         if (chain) {
1014                 /*
1015                  * Continue iteration within current parent.  If not NULL
1016                  * the passed-in chain may or may not be locked, based on
1017                  * the LOOKUP_NOLOCK flag (passed in as returned from lookup
1018                  * or a prior next).
1019                  */
1020                 i = chain->index + 1;
1021                 if (flags & HAMMER2_LOOKUP_NOLOCK)
1022                         hammer2_chain_drop(hmp, chain);
1023                 else
1024                         hammer2_chain_put(hmp, chain);
1025
1026                 /*
1027                  * Any scan where the lookup returned degenerate data embedded
1028                  * in the inode has an invalid index and must terminate.
1029                  */
1030                 if (chain == parent)
1031                         return(NULL);
1032                 chain = NULL;
1033         } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT) {
1034                 /*
1035                  * We reached the end of the iteration.
1036                  */
1037                 return (NULL);
1038         } else {
1039                 /*
1040                  * Continue iteration with next parent unless the current
1041                  * parent covers the range.
1042                  */
1043                 hammer2_chain_t *nparent;
1044
1045                 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT)
1046                         return (NULL);
1047
1048                 scan_beg = parent->bref.key;
1049                 scan_end = scan_beg +
1050                             ((hammer2_key_t)1 << parent->bref.keybits) - 1;
1051                 if (key_beg >= scan_beg && key_end <= scan_end)
1052                         return (NULL);
1053
1054                 i = parent->index + 1;
1055                 nparent = parent->parent;
1056                 hammer2_chain_ref(hmp, nparent);        /* ref new parent */
1057                 hammer2_chain_unlock(hmp, parent);
1058                 hammer2_chain_lock(hmp, nparent);       /* lock new parent */
1059                 hammer2_chain_drop(hmp, parent);        /* drop old parent */
1060                 *parentp = parent = nparent;
1061         }
1062
1063 again2:
1064         /*
1065          * Locate the blockref array.  Currently we do a fully associative
1066          * search through the array.
1067          */
1068         switch(parent->bref.type) {
1069         case HAMMER2_BREF_TYPE_INODE:
1070                 base = &parent->data->ipdata.u.blockset.blockref[0];
1071                 count = HAMMER2_SET_COUNT;
1072                 break;
1073         case HAMMER2_BREF_TYPE_INDIRECT:
1074                 base = &parent->data->npdata.blockref[0];
1075                 count = parent->bytes / sizeof(hammer2_blockref_t);
1076                 break;
1077         case HAMMER2_BREF_TYPE_VOLUME:
1078                 base = &hmp->voldata.sroot_blockset.blockref[0];
1079                 count = HAMMER2_SET_COUNT;
1080                 break;
1081         default:
1082                 panic("hammer2_chain_next: unrecognized blockref type: %d",
1083                       parent->bref.type);
1084                 base = NULL;    /* safety */
1085                 count = 0;      /* safety */
1086                 break;
1087         }
1088         KKASSERT(i <= count);
1089
1090         /*
1091          * Look for the key.  If we are unable to find a match and an exact
1092          * match was requested we return NULL.  If a range was requested we
1093          * run hammer2_chain_next() to iterate.
1094          */
1095         bref = NULL;
1096         while (i < count) {
1097                 tmp = hammer2_chain_find(hmp, parent, i);
1098                 bref = (tmp) ? &tmp->bref : &base[i];
1099                 if (bref->type == 0) {
1100                         ++i;
1101                         continue;
1102                 }
1103                 scan_beg = bref->key;
1104                 scan_end = scan_beg + ((hammer2_key_t)1 << bref->keybits) - 1;
1105                 if (key_beg <= scan_end && key_end >= scan_beg)
1106                         break;
1107                 ++i;
1108         }
1109
1110         /*
1111          * If we couldn't find a match recurse up a parent to continue the
1112          * search.
1113          */
1114         if (i == count)
1115                 goto again;
1116
1117         /*
1118          * Acquire the new chain element.  If the chain element is an
1119          * indirect block we must search recursively.
1120          */
1121         chain = hammer2_chain_get(hmp, parent, i, flags);
1122         if (chain == NULL)
1123                 return (NULL);
1124
1125         /*
1126          * If the chain element is an indirect block it becomes the new
1127          * parent and we loop on it.  We may have to lock the chain when
1128          * cycling it in as the new parent as it will not be locked if the
1129          * caller passed NOLOCK.
1130          */
1131         if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
1132                 hammer2_chain_put(hmp, parent);
1133                 *parentp = parent = chain;
1134                 if (flags & HAMMER2_LOOKUP_NOLOCK)
1135                         hammer2_chain_lock(hmp, chain);
1136                 i = 0;
1137                 goto again2;
1138         }
1139
1140         /*
1141          * All done, return chain
1142          */
1143         return (chain);
1144 }
1145
1146 /*
1147  * Create and return a new hammer2 system memory structure of the specified
1148  * key, type and size and insert it RELATIVE TO (PARENT).
1149  *
1150  * (parent) is typically either an inode or an indirect  block, acquired
1151  * acquired as a side effect of issuing a prior failed lookup.  parent
1152  * must be locked and held.  Do not pass the inode chain to this function
1153  * unless that is the chain returned by the failed lookup.
1154  *
1155  * Non-indirect types will automatically allocate indirect blocks as required
1156  * if the new item does not fit in the current (parent).
1157  *
1158  * Indirect types will move a portion of the existing blockref array in
1159  * (parent) into the new indirect type and then use one of the free slots
1160  * to emplace the new indirect type.
1161  *
1162  * A new locked, referenced chain element is returned of the specified type.
1163  * This element will also be marked as modified and contain a data area
1164  * ready for initialization.
1165  */
1166 hammer2_chain_t *
1167 hammer2_chain_create(hammer2_mount_t *hmp, hammer2_chain_t *parent,
1168                      hammer2_chain_t *chain,
1169                      hammer2_key_t key, int keybits, int type, size_t bytes)
1170 {
1171         hammer2_blockref_t dummy;
1172         hammer2_blockref_t *base;
1173         hammer2_blockref_t *bref;
1174         hammer2_chain_t dummy_chain;
1175         int unlock_parent = 0;
1176         int allocated = 0;
1177         int count;
1178         int i;
1179
1180         if (chain == NULL) {
1181                 /*
1182                  * First allocate media space and construct the dummy bref,
1183                  * then allocate the in-memory chain structure.
1184                  */
1185                 bzero(&dummy, sizeof(dummy));
1186                 dummy.type = type;
1187                 dummy.key = key;
1188                 dummy.keybits = keybits;
1189                 dummy.data_off = hammer2_bytes_to_radix(bytes);
1190                 chain = hammer2_chain_alloc(hmp, &dummy);
1191                 allocated = 1;
1192
1193                 /*
1194                  * We set the WAS_MODIFIED flag here so the chain gets
1195                  * marked as modified below.
1196                  */
1197                 chain->flags |= HAMMER2_CHAIN_INITIAL |
1198                                 HAMMER2_CHAIN_WAS_MODIFIED;
1199
1200                 /*
1201                  * Recalculate bytes to reflect the actual media block
1202                  * allocation.
1203                  */
1204                 bytes = (hammer2_off_t)1 <<
1205                         (int)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX);
1206                 chain->bytes = bytes;
1207
1208                 switch(type) {
1209                 case HAMMER2_BREF_TYPE_VOLUME:
1210                         panic("hammer2_chain_create: called with volume type");
1211                         break;
1212                 case HAMMER2_BREF_TYPE_INODE:
1213                         KKASSERT(bytes == HAMMER2_INODE_BYTES);
1214                         chain->data = (void *)&chain->u.ip->ip_data;
1215                         break;
1216                 case HAMMER2_BREF_TYPE_INDIRECT:
1217                         /*
1218                          * May or may not be embedded (chain->data may or
1219                          * may not be NULL)
1220                          */
1221                         if (chain->u.np->is_embedded) {
1222                                 chain->data = (void *)&chain->u.np->buf[0];
1223                         } else {
1224                                 KKASSERT(chain->data == NULL);
1225                         }
1226                         break;
1227                 default:
1228                         /* leave chain->data NULL */
1229                         KKASSERT(chain->data == NULL);
1230                         break;
1231                 }
1232         } else {
1233                 /*
1234                  * Potentially update the chain's key/keybits, but it will
1235                  * only be marked modified if WAS_MODIFIED is set (if it
1236                  * was modified at the time of its removal during a rename).
1237                  */
1238                 chain->bref.key = key;
1239                 chain->bref.keybits = keybits;
1240         }
1241
1242 again:
1243         /*
1244          * Locate a free blockref in the parent's array
1245          */
1246         switch(parent->bref.type) {
1247         case HAMMER2_BREF_TYPE_INODE:
1248                 KKASSERT(parent->data != NULL);
1249                 base = &parent->data->ipdata.u.blockset.blockref[0];
1250                 count = HAMMER2_SET_COUNT;
1251                 break;
1252         case HAMMER2_BREF_TYPE_INDIRECT:
1253                 KKASSERT(parent->data != NULL);
1254                 base = &parent->data->npdata.blockref[0];
1255                 count = parent->bytes / sizeof(hammer2_blockref_t);
1256                 break;
1257         case HAMMER2_BREF_TYPE_VOLUME:
1258                 KKASSERT(parent->data != NULL);
1259                 base = &hmp->voldata.sroot_blockset.blockref[0];
1260                 count = HAMMER2_SET_COUNT;
1261                 break;
1262         default:
1263                 panic("hammer2_chain_create: unrecognized blockref type: %d",
1264                       parent->bref.type);
1265                 count = 0;
1266                 break;
1267         }
1268
1269         /*
1270          * Scan for an unallocated bref, also skipping any slots occupied
1271          * by in-memory chain elements that may not yet have been updated
1272          * in the parent's bref array.
1273          */
1274         bzero(&dummy_chain, sizeof(dummy_chain));
1275         bref = NULL;
1276         for (i = 0; i < count; ++i) {
1277                 bref = &base[i];
1278                 dummy_chain.index = i;
1279                 if (bref->type == 0 &&
1280                     SPLAY_FIND(hammer2_chain_splay,
1281                                &parent->shead, &dummy_chain) == NULL) {
1282                         break;
1283                 }
1284         }
1285
1286         /*
1287          * If no free blockref count be found we must create an indirect
1288          * block and move a number of blockrefs into it.  With the parent
1289          * locked we can safely lock each child in order to move it without
1290          * causing a deadlock.
1291          *
1292          * This may return the new indirect block or the old parent depending
1293          * on where the key falls.
1294          */
1295         if (i == count) {
1296                 hammer2_chain_t *nparent;
1297
1298                 nparent = hammer2_chain_create_indirect(hmp, parent,
1299                                                         key, keybits);
1300                 if (nparent == NULL) {
1301                         if (allocated)
1302                                 hammer2_chain_free(hmp, chain);
1303                         chain = NULL;
1304                         goto done;
1305                 }
1306                 if (parent != nparent) {
1307                         if (unlock_parent)
1308                                 hammer2_chain_put(hmp, parent);
1309                         parent = nparent;
1310                         unlock_parent = 1;
1311                 }
1312                 goto again;
1313         }
1314
1315         /*
1316          * Link the chain into its parent.
1317          */
1318         if (chain->parent != NULL)
1319                 panic("hammer2: hammer2_chain_create: chain already connected");
1320         KKASSERT(chain->parent == NULL);
1321         chain->parent = parent;
1322         chain->index = i;
1323         if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain))
1324                 panic("hammer2_chain_link: collision");
1325         atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED);
1326         KKASSERT(parent->refs > 1);
1327         atomic_add_int(&parent->refs, 1);
1328
1329         /*
1330          * Additional linkage for inodes.  Reuse the parent pointer to
1331          * find the parent directory.
1332          */
1333         if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
1334                 hammer2_chain_t *scan = parent;
1335                 while (scan->bref.type == HAMMER2_BREF_TYPE_INDIRECT)
1336                         scan = scan->parent;
1337                 if (scan->bref.type == HAMMER2_BREF_TYPE_INODE)
1338                         chain->u.ip->pip = scan->u.ip;
1339         }
1340
1341         /*
1342          * Mark the newly created or previously disconnected chain element
1343          * as modified and fully resolve the chain->data pointer.  The
1344          * WAS_MODIFIED bit will be set in both cases.
1345          *
1346          * Chain elements with embedded data will not issue I/O at this time.
1347          * A new block will be allocated for the buffer but not instantiated.
1348          *
1349          * Chain elements which do not use embedded data will allocate
1350          * the new block AND instantiate its buffer cache buffer, pointing
1351          * the data at the bp.
1352          */
1353         if (chain->flags & HAMMER2_CHAIN_WAS_MODIFIED) {
1354                 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_WAS_MODIFIED);
1355                 hammer2_chain_modify(hmp, chain);
1356         }
1357
1358 done:
1359         if (unlock_parent)
1360                 hammer2_chain_put(hmp, parent);
1361         return (chain);
1362 }
1363
1364 /*
1365  * Create an indirect block that covers one or more of the elements in the
1366  * current parent.  Either returns the existing parent with no locking or
1367  * ref changes or returns the new indirect block locked and referenced,
1368  * depending on what the specified key falls into.
1369  *
1370  * The key/keybits for the indirect mode only needs to follow three rules:
1371  *
1372  * (1) That all elements underneath it fit within its key space and
1373  *
1374  * (2) That all elements outside it are outside its key space.
1375  *
1376  * (3) When creating the new indirect block any elements in the current
1377  *     parent that fit within the new indirect block's keyspace must be
1378  *     moved into the new indirect block.
1379  *
1380  * (4) The keyspace chosen for the inserted indirect block CAN cover a wider
1381  *     keyspace the the current parent, but lookup/iteration rules will
1382  *     ensure (and must ensure) that rule (2) for all parents leading up
1383  *     to the nearest inode or the root volume header is adhered to.  This
1384  *     is accomplished by always recursing through matching keyspaces in
1385  *     the hammer2_chain_lookup() and hammer2_chain_next() API.
1386  *
1387  * The current implementation calculates the current worst-case keyspace by
1388  * iterating the current parent and then divides it into two halves, choosing
1389  * whichever half has the most elements (not necessarily the half containing
1390  * the requested key).
1391  *
1392  * We can also opt to use the half with the least number of elements.  This
1393  * causes lower-numbered keys (aka logical file offsets) to recurse through
1394  * fewer indirect blocks and higher-numbered keys to recurse through more.
1395  * This also has the risk of not moving enough elements to the new indirect
1396  * block and being forced to create several indirect blocks before the element
1397  * can be inserted.
1398  */
1399 static
1400 hammer2_chain_t *
1401 hammer2_chain_create_indirect(hammer2_mount_t *hmp, hammer2_chain_t *parent,
1402                               hammer2_key_t create_key, int create_bits)
1403 {
1404         hammer2_blockref_t *base;
1405         hammer2_blockref_t *bref;
1406         hammer2_chain_t *chain;
1407         hammer2_chain_t *ichain;
1408         hammer2_chain_t dummy;
1409         hammer2_key_t key = create_key;
1410         int keybits = create_bits;
1411         int locount = 0;
1412         int hicount = 0;
1413         int count;
1414         int nbytes;
1415         int i;
1416
1417         /*
1418          * Mark the parent modified so our base[] pointer remains valid
1419          * while we move entries.
1420          */
1421         hammer2_chain_modify(hmp, parent);
1422
1423         /*
1424          * Locate a free blockref in the parent's array
1425          */
1426         switch(parent->bref.type) {
1427         case HAMMER2_BREF_TYPE_INODE:
1428                 base = &parent->data->ipdata.u.blockset.blockref[0];
1429                 count = HAMMER2_SET_COUNT;
1430                 break;
1431         case HAMMER2_BREF_TYPE_INDIRECT:
1432                 base = &parent->data->npdata.blockref[0];
1433                 count = parent->bytes / sizeof(hammer2_blockref_t);
1434                 break;
1435         case HAMMER2_BREF_TYPE_VOLUME:
1436                 base = &hmp->voldata.sroot_blockset.blockref[0];
1437                 count = HAMMER2_SET_COUNT;
1438                 break;
1439         default:
1440                 panic("hammer2_chain_create_indirect: "
1441                       "unrecognized blockref type: %d",
1442                       parent->bref.type);
1443                 count = 0;
1444                 break;
1445         }
1446
1447         /*
1448          * Scan for an unallocated bref, also skipping any slots occupied
1449          * by in-memory chain elements that may not yet have been updated
1450          * in the parent's bref array.
1451          */
1452         bzero(&dummy, sizeof(dummy));
1453         for (i = 0; i < count; ++i) {
1454                 int nkeybits;
1455
1456                 bref = &base[i];
1457                 if (bref->type == 0) {
1458                         dummy.index = i;
1459                         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead,
1460                                            &dummy);
1461                         if (chain == NULL)
1462                                 continue;
1463                         bref = &chain->bref;
1464                 }
1465
1466                 /*
1467                  * Expand our calculated key range (key, keybits) to fit
1468                  * the scanned key.  nkeybits represents the full range
1469                  * that we will later cut in half (two halves @ nkeybits - 1).
1470                  */
1471                 nkeybits = keybits;
1472                 if (nkeybits < bref->keybits)
1473                         nkeybits = bref->keybits;
1474                 while ((~(((hammer2_key_t)1 << nkeybits) - 1) &
1475                         (key ^ bref->key)) != 0) {
1476                         ++nkeybits;
1477                 }
1478
1479                 /*
1480                  * If the new key range is larger we have to determine
1481                  * which side of the new key range the existing keys fall
1482                  * under by checking the high bit, then collapsing the
1483                  * locount into the hicount or vise-versa.
1484                  */
1485                 if (keybits != nkeybits) {
1486                         if (((hammer2_key_t)1 << (nkeybits - 1)) & key) {
1487                                 hicount += locount;
1488                                 locount = 0;
1489                         } else {
1490                                 locount += hicount;
1491                                 hicount = 0;
1492                         }
1493                         keybits = nkeybits;
1494                 }
1495
1496                 /*
1497                  * The newly scanned key will be in the lower half or the
1498                  * higher half of the (new) key range.
1499                  */
1500                 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key)
1501                         ++hicount;
1502                 else
1503                         ++locount;
1504         }
1505
1506         /*
1507          * Adjust keybits to represent half of the full range calculated
1508          * above.
1509          */
1510         --keybits;
1511
1512         /*
1513          * Select whichever half contains the most elements.  Theoretically
1514          * we can select either side as long as it contains at least one
1515          * element (in order to ensure that a free slot is present to hold
1516          * the indirect block).
1517          */
1518         key &= ~(((hammer2_key_t)1 << keybits) - 1);
1519         if (hammer2_indirect_optimize) {
1520                 /*
1521                  * Insert node for least number of keys, this will arrange
1522                  * the first few blocks of a large file or the first few
1523                  * inodes in a directory with fewer indirect blocks when
1524                  * created linearly.
1525                  */
1526                 if (hicount < locount && hicount != 0)
1527                         key |= (hammer2_key_t)1 << keybits;
1528                 else
1529                         key &= ~(hammer2_key_t)1 << keybits;
1530         } else {
1531                 /*
1532                  * Insert node for most number of keys, best for heavily
1533                  * fragmented files.
1534                  */
1535                 if (hicount > locount)
1536                         key |= (hammer2_key_t)1 << keybits;
1537                 else
1538                         key &= ~(hammer2_key_t)1 << keybits;
1539         }
1540
1541         /*
1542          * How big should our new indirect block be?  It has to be at least
1543          * as large as its parent.
1544          */
1545         if (parent->bref.type == HAMMER2_BREF_TYPE_INODE)
1546                 nbytes = HAMMER2_IND_BYTES_MIN;
1547         else
1548                 nbytes = HAMMER2_IND_BYTES_MAX;
1549         if (nbytes < count * sizeof(hammer2_blockref_t))
1550                 nbytes = count * sizeof(hammer2_blockref_t);
1551
1552         /*
1553          * Ok, create our new indirect block
1554          */
1555         dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT;
1556         dummy.bref.key = key;
1557         dummy.bref.keybits = keybits;
1558         dummy.bref.data_off = hammer2_bytes_to_radix(nbytes);
1559         ichain = hammer2_chain_alloc(hmp, &dummy.bref);
1560         ichain->flags |= HAMMER2_CHAIN_INITIAL;
1561
1562         /*
1563          * Iterate the original parent and move the matching brefs into
1564          * the new indirect block.
1565          */
1566         for (i = 0; i < count; ++i) {
1567                 /*
1568                  * For keying purposes access the bref from the media or
1569                  * from our in-memory cache.  In cases where the in-memory
1570                  * cache overrides the media the keyrefs will be the same
1571                  * anyway so we can avoid checking the cache when the media
1572                  * has a key.
1573                  */
1574                 bref = &base[i];
1575                 if (bref->type == 0) {
1576                         dummy.index = i;
1577                         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead,
1578                                            &dummy);
1579                         if (chain == NULL) {
1580                                 /*
1581                                  * Select index indirect block is placed in
1582                                  */
1583                                 if (ichain->index < 0)
1584                                         ichain->index = i;
1585                                 continue;
1586                         }
1587                         bref = &chain->bref;
1588                 }
1589
1590                 /*
1591                  * Skip keys not in the chosen half (low or high), only bit
1592                  * (keybits - 1) needs to be compared but for safety we
1593                  * will compare all msb bits plus that bit again.
1594                  */
1595                 if ((~(((hammer2_key_t)1 << keybits) - 1) &
1596                     (key ^ bref->key)) != 0) {
1597                         continue;
1598                 }
1599
1600                 /*
1601                  * This element is being moved, its slot is available
1602                  * for our indirect block.
1603                  */
1604                 if (ichain->index < 0)
1605                         ichain->index = i;
1606
1607                 /*
1608                  * Load the new indirect block by acquiring or allocating
1609                  * the related chain entries, then simply move it to the
1610                  * new parent (ichain).
1611                  *
1612                  * Flagging the new chain entry MOVED will cause a flush
1613                  * to synchronize its block into the new indirect block.
1614                  * The chain is unlocked after being moved but needs to
1615                  * retain a reference for the MOVED state
1616                  *
1617                  * We must still set SUBMODIFIED in the parent but we do
1618                  * that after the loop.
1619                  *
1620                  * XXX we really need a lock here but we don't need the
1621                  *     data.  NODATA feature needed.
1622                  */
1623                 chain = hammer2_chain_get(hmp, parent, i,
1624                                           HAMMER2_LOOKUP_NOLOCK);
1625                 SPLAY_REMOVE(hammer2_chain_splay, &parent->shead, chain);
1626                 if (SPLAY_INSERT(hammer2_chain_splay, &ichain->shead, chain))
1627                         panic("hammer2_chain_create_indirect: collision");
1628                 chain->parent = ichain;
1629                 bzero(&base[i], sizeof(base[i]));
1630                 atomic_add_int(&parent->refs, -1);
1631                 atomic_add_int(&ichain->refs, 1);
1632                 if (chain->flags & HAMMER2_CHAIN_MOVED) {
1633                         /* We don't need the ref from the chain_get */
1634                         hammer2_chain_drop(hmp, chain);
1635                 } else {
1636                         /* MOVED bit inherits the ref from the chain_get */
1637                         atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED);
1638                 }
1639                 KKASSERT(parent->refs > 0);
1640                 chain = NULL;
1641         }
1642
1643         /*
1644          * Insert the new indirect block into the parent now that we've
1645          * cleared out some entries in the parent.  We calculated a good
1646          * insertion index in the loop above (ichain->index).
1647          */
1648         KKASSERT(ichain->index >= 0);
1649         if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, ichain))
1650                 panic("hammer2_chain_create_indirect: ichain insertion");
1651         ichain->parent = parent;
1652         atomic_add_int(&parent->refs, 1);
1653
1654         /*
1655          * Mark the new indirect block modified after insertion, which
1656          * will propagate up through parent all the way to the root and
1657          * also allocate the physical block in ichain for our caller,
1658          * and assign ichain->data to a pre-zero'd space (because there
1659          * is not prior data to copy into it).
1660          *
1661          * We have to set SUBMODIFIED in ichain's flags manually so the
1662          * flusher knows it has to recurse through it to get to all of
1663          * our moved blocks.
1664          */
1665         hammer2_chain_modify(hmp, ichain);
1666         atomic_set_int(&ichain->flags, HAMMER2_CHAIN_SUBMODIFIED);
1667
1668         /*
1669          * Figure out what to return.
1670          */
1671         if (create_bits >= keybits) {
1672                 /*
1673                  * Key being created is way outside the key range,
1674                  * return the original parent.
1675                  */
1676                 hammer2_chain_put(hmp, ichain);
1677         } else if (~(((hammer2_key_t)1 << keybits) - 1) &
1678                    (create_key ^ key)) {
1679                 /*
1680                  * Key being created is outside the key range,
1681                  * return the original parent.
1682                  */
1683                 hammer2_chain_put(hmp, ichain);
1684         } else {
1685                 /*
1686                  * Otherwise its in the range, return the new parent.
1687                  */
1688                 parent = ichain;
1689         }
1690
1691         return(parent);
1692 }
1693
1694 /*
1695  * Physically delete the specified chain element.  Note that inodes with
1696  * open descriptors should not be deleted (as with other filesystems) until
1697  * the last open descriptor is closed.
1698  *
1699  * This routine will remove the chain element from its parent and potentially
1700  * also recurse upward and delete indirect blocks which become empty as a
1701  * side effect.
1702  *
1703  * The caller must pass a pointer to the chain's parent, also locked and
1704  * referenced.  (*parentp) will be modified in a manner similar to a lookup
1705  * or iteration when indirect blocks are also deleted as a side effect.
1706  */
1707 void
1708 hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t *parent,
1709                      hammer2_chain_t *chain)
1710 {
1711         hammer2_blockref_t *base;
1712         int count;
1713
1714         if (chain->parent != parent)
1715                 panic("hammer2_chain_delete: parent mismatch");
1716
1717         /*
1718          * Mark the parent modified so our base[] pointer remains valid
1719          * while we move entries.
1720          *
1721          * Calculate the blockref reference in the parent
1722          */
1723         hammer2_chain_modify(hmp, parent);
1724
1725         switch(parent->bref.type) {
1726         case HAMMER2_BREF_TYPE_INODE:
1727                 base = &parent->data->ipdata.u.blockset.blockref[0];
1728                 count = HAMMER2_SET_COUNT;
1729                 break;
1730         case HAMMER2_BREF_TYPE_INDIRECT:
1731                 base = &parent->data->npdata.blockref[0];
1732                 count = parent->bytes / sizeof(hammer2_blockref_t);
1733                 break;
1734         case HAMMER2_BREF_TYPE_VOLUME:
1735                 base = &hmp->voldata.sroot_blockset.blockref[0];
1736                 count = HAMMER2_SET_COUNT;
1737                 break;
1738         default:
1739                 panic("hammer2_chain_delete: unrecognized blockref type: %d",
1740                       parent->bref.type);
1741                 count = 0;
1742                 break;
1743         }
1744
1745         /*
1746          * Disconnect the bref in the parent, remove the chain, and
1747          * disconnect in-memory fields from the parent.
1748          */
1749         KKASSERT(chain->index >= 0 && chain->index < count);
1750         base += chain->index;
1751         bzero(base, sizeof(*base));
1752
1753         SPLAY_REMOVE(hammer2_chain_splay, &parent->shead, chain);
1754         atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED);
1755         atomic_add_int(&parent->refs, -1);      /* for splay entry */
1756         chain->index = -1;
1757
1758         chain->parent = NULL;
1759         if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
1760                 chain->u.ip->pip = NULL;
1761
1762         /*
1763          * Nobody references the underlying object any more so we can
1764          * clear any pending modification(s) on it.  This can theoretically
1765          * recurse downward but even just clearing the bit on this item
1766          * will effectively recurse if someone is doing a rm -rf and greatly
1767          * reduce the I/O required.
1768          *
1769          * The MODIFIED1 bit is cleared but we have to remember the old state
1770          * in case this deletion is related to a rename.  The ref on the
1771          * chain is shared by both modified flags.
1772          */
1773         if (chain->flags & HAMMER2_CHAIN_MODIFIED1) {
1774                 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
1775                 atomic_set_int(&chain->flags, HAMMER2_CHAIN_WAS_MODIFIED);
1776                 if ((chain->flags & HAMMER2_CHAIN_MODIFIED2) == 0)
1777                         hammer2_chain_drop(hmp, chain);
1778         }
1779 }
1780
1781 /*
1782  * Recursively flush the specified chain.  The chain is locked and
1783  * referenced by the caller and will remain so on return.
1784  *
1785  * This cannot be called with the volume header's vchain (yet).
1786  *
1787  * PASS1 - clear the MODIFIED1 bit (and set the MODIFIED2 bit XXX)
1788  *
1789  */
1790 static void
1791 hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *chain)
1792 {
1793         /*
1794          * Flush any children of this chain entry.
1795          */
1796         if (chain->flags & HAMMER2_CHAIN_SUBMODIFIED) {
1797                 hammer2_blockref_t *base;
1798                 hammer2_chain_t *scan;
1799                 hammer2_chain_t *next;
1800                 int count;
1801                 int submodified = 0;
1802
1803                 /*
1804                  * Modifications to the children will propagate up, forcing
1805                  * us to become modified and copy-on-write too.
1806                  *
1807                  * Clear SUBMODIFIED now, races during the flush will re-set
1808                  * it.
1809                  */
1810                 hammer2_chain_modify(hmp, chain);
1811                 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_SUBMODIFIED);
1812
1813                 /*
1814                  * The blockref in the parent's array must be repointed at
1815                  * the new block allocated by the child after its flush.
1816                  *
1817                  * Calculate the base of the array.
1818                  */
1819                 switch(chain->bref.type) {
1820                 case HAMMER2_BREF_TYPE_INODE:
1821                         KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
1822                         base = &chain->data->ipdata.u.blockset.blockref[0];
1823                         count = HAMMER2_SET_COUNT;
1824                         break;
1825                 case HAMMER2_BREF_TYPE_INDIRECT:
1826                         base = &chain->data->npdata.blockref[0];
1827                         count = chain->bytes / sizeof(hammer2_blockref_t);
1828                         break;
1829                 case HAMMER2_BREF_TYPE_VOLUME:
1830                         KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
1831                         base = &hmp->voldata.sroot_blockset.blockref[0];
1832                         count = HAMMER2_SET_COUNT;
1833                         break;
1834                 default:
1835                         base = NULL;
1836                         panic("hammer2_chain_get: unrecognized blockref type: %d",
1837                               chain->bref.type);
1838                 }
1839
1840                 /*
1841                  * Flush the children and update the blockrefs in the parent.
1842                  * Be careful of ripouts during the loop.
1843                  */
1844                 next = SPLAY_MIN(hammer2_chain_splay, &chain->shead);
1845                 while ((scan = next) != NULL) {
1846                         next = SPLAY_NEXT(hammer2_chain_splay, &chain->shead,
1847                                           scan);
1848                         if (scan->flags & (HAMMER2_CHAIN_SUBMODIFIED |
1849                                            HAMMER2_CHAIN_MODIFIED1 |
1850                                            HAMMER2_CHAIN_MOVED)) {
1851                                 hammer2_chain_ref(hmp, scan);
1852                                 hammer2_chain_lock(hmp, scan);
1853                                 hammer2_chain_flush_pass1(hmp, scan);
1854                                 if (scan->flags & (HAMMER2_CHAIN_SUBMODIFIED |
1855                                                    HAMMER2_CHAIN_MODIFIED1)) {
1856                                         submodified = 1;
1857                                 } else {
1858                                         KKASSERT(scan->index < count);
1859                                         base[scan->index] = scan->bref;
1860                                         if (scan->flags & HAMMER2_CHAIN_MOVED) {
1861                                                 atomic_clear_int(&scan->flags,
1862                                                          HAMMER2_CHAIN_MOVED);
1863                                                 hammer2_chain_drop(hmp, scan);
1864                                         }
1865                                 }
1866                                 hammer2_chain_put(hmp, scan);
1867                         }
1868                 }
1869                 if (submodified) {
1870                         atomic_set_int(&chain->flags,
1871                                        HAMMER2_CHAIN_SUBMODIFIED);
1872                 }
1873         }
1874
1875         /*
1876          * Flush this chain entry only if it is marked modified.
1877          */
1878         if ((chain->flags & HAMMER2_CHAIN_MODIFIED1) == 0)
1879                 return;
1880
1881         /*
1882          * Clear MODIFIED1 and set HAMMER2_CHAIN_MOVED.  The MODIFIED{1,2}
1883          * bits own a single parent ref and the MOVED bit owns its own
1884          * parent ref.
1885          */
1886         atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
1887         if (chain->flags & HAMMER2_CHAIN_MOVED) {
1888                 if ((chain->flags & HAMMER2_CHAIN_MODIFIED2) == 0)
1889                         hammer2_chain_drop(hmp, chain);
1890         } else {
1891                 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED);
1892                 if (chain->flags & HAMMER2_CHAIN_MODIFIED2)
1893                         hammer2_chain_ref(hmp, chain);
1894         }
1895
1896         /*
1897          * If this is part of a recursive flush we can go ahead and write
1898          * out the buffer cache buffer and pass a new bref back up the chain.
1899          *
1900          * This will never be a volume header.
1901          */
1902         if (chain != &hmp->vchain) {
1903                 hammer2_blockref_t *bref;
1904                 hammer2_off_t off_hi;
1905                 struct buf *bp;
1906                 size_t off_lo;
1907                 size_t bytes;
1908                 int error;
1909
1910                 KKASSERT(chain->data != NULL);
1911                 bref = &chain->bref;
1912
1913                 off_hi = bref->data_off & HAMMER2_OFF_MASK_HI;
1914                 off_lo = (size_t)bref->data_off & HAMMER2_OFF_MASK_LO;
1915                 bytes = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
1916                 KKASSERT(off_hi != 0);  /* not the root volume header */
1917
1918                 if (chain->bp) {
1919                         /*
1920                          * The data is mapped directly to the bp.  Dirty the
1921                          * bp so it gets flushed out by the kernel later on.
1922                          */
1923                         bdirty(chain->bp);
1924                 } else {
1925                         /*
1926                          * The data is embedded, we have to acquire the
1927                          * buffer cache buffer and copy the data into it.
1928                          */
1929                         bp = NULL;
1930                         error = bread(hmp->devvp, off_hi,
1931                                       HAMMER2_PBUFSIZE, &bp);
1932                         KKASSERT(error == 0); /* XXX */
1933
1934                         /*
1935                          * Copy the data to the buffer, mark the buffer
1936                          * dirty, and convert the chain to unmodified.
1937                          */
1938                         bcopy(chain->data, (char *)bp->b_data + off_lo, bytes);
1939                         bdwrite(bp);
1940                         bp = NULL;
1941
1942                         chain->bref.check.iscsi32.value =
1943                                         hammer2_icrc32(chain->data, bytes);
1944                 }
1945         }
1946         {
1947                 hammer2_blockref_t *bref;
1948
1949                 bref = &chain->bref;
1950
1951                 switch(bref->type) {
1952                 case HAMMER2_BREF_TYPE_VOLUME:
1953                         KKASSERT(chain->data != NULL);
1954                         KKASSERT(chain->bp == NULL);
1955
1956                         hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]=
1957                                 hammer2_icrc32(
1958                                         (char *)&hmp->voldata +
1959                                          HAMMER2_VOLUME_ICRC1_OFF,
1960                                         HAMMER2_VOLUME_ICRC1_SIZE);
1961                         hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]=
1962                                 hammer2_icrc32(
1963                                         (char *)&hmp->voldata +
1964                                          HAMMER2_VOLUME_ICRC0_OFF,
1965                                         HAMMER2_VOLUME_ICRC0_SIZE);
1966                         hmp->voldata.icrc_volheader =
1967                                 hammer2_icrc32(
1968                                         (char *)&hmp->voldata +
1969                                          HAMMER2_VOLUME_ICRCVH_OFF,
1970                                         HAMMER2_VOLUME_ICRCVH_SIZE);
1971                         break;
1972                 }
1973         }
1974 }
1975
1976 #if 0
1977 /*
1978  * PASS2 - not yet implemented (should be called only with the root chain?)
1979  */
1980 static void
1981 hammer2_chain_flush_pass2(hammer2_mount_t *hmp, hammer2_chain_t *chain)
1982 {
1983 }
1984 #endif
1985
1986 void
1987 hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain)
1988 {
1989         hammer2_chain_flush_pass1(hmp, chain);
1990 }