29a0530c7a5b6dd27f0db8298b979549447099e0
[dragonfly.git] / sys / vfs / hammer2 / hammer2_chain.c
1 /*
2  * Copyright (c) 2011-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 /*
36  * This subsystem handles direct and indirect block searches, recursions,
37  * creation, and deletion.  Chains of blockrefs are tracked and modifications
38  * are flag for propagation... eventually all the way back to the volume
39  * header.
40  */
41
42 #include <sys/cdefs.h>
43 #include <sys/cdefs.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/types.h>
47 #include <sys/lock.h>
48 #include <sys/uuid.h>
49
50 #include "hammer2.h"
51
52 SPLAY_GENERATE(hammer2_chain_splay, hammer2_chain, snode, hammer2_chain_cmp);
53
54 static int hammer2_indirect_optimize;   /* XXX SYSCTL */
55
56 static hammer2_chain_t *hammer2_chain_create_indirect(
57                         hammer2_mount_t *hmp, hammer2_chain_t *parent,
58                         hammer2_key_t key, int keybits);
59
60 /*
61  * Compare function for chain splay tree
62  */
63 int
64 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2)
65 {
66         return(chain2->index - chain1->index);
67 }
68
69 /*
70  * Allocate a new disconnected chain element representing the specified
71  * bref.  The chain element is locked exclusively and refs is set to 1.
72  *
73  * This essentially allocates a system memory structure representing one
74  * of the media structure types, including inodes.
75  */
76 hammer2_chain_t *
77 hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_blockref_t *bref)
78 {
79         hammer2_chain_t *chain;
80         hammer2_inode_t *ip;
81         hammer2_indblock_t *np;
82         hammer2_data_t *dp;
83         u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
84
85         /*
86          * Construct the appropriate system structure.
87          */
88         switch(bref->type) {
89         case HAMMER2_BREF_TYPE_INODE:
90                 ip = kmalloc(sizeof(*ip), hmp->minode, M_WAITOK | M_ZERO);
91                 chain = &ip->chain;
92                 chain->u.ip = ip;
93                 lockinit(&chain->lk, "inode", 0, LK_CANRECURSE);
94                 ip->hmp = hmp;
95                 break;
96         case HAMMER2_BREF_TYPE_INDIRECT:
97                 if (bytes == HAMMER2_PBUFSIZE) {
98                         np = kmalloc(sizeof(*np),
99                                      hmp->mchain,
100                                      M_WAITOK | M_ZERO);
101                         np->is_embedded = 0;
102                 } else {
103                         np = kmalloc(offsetof(struct hammer2_indblock,
104                                               buf[bytes]),
105                                      hmp->mchain,
106                                      M_WAITOK | M_ZERO);
107                         np->is_embedded = 1;
108                 }
109                 chain = &np->chain;
110                 chain->u.np = np;
111                 lockinit(&chain->lk, "iblk", 0, LK_CANRECURSE);
112                 break;
113         case HAMMER2_BREF_TYPE_DATA:
114                 dp = kmalloc(sizeof(*dp), hmp->mchain, M_WAITOK | M_ZERO);
115                 chain = &dp->chain;
116                 chain->u.dp = dp;
117                 lockinit(&chain->lk, "dblk", 0, LK_CANRECURSE);
118                 break;
119         case HAMMER2_BREF_TYPE_VOLUME:
120                 chain = NULL;
121                 panic("hammer2_chain_alloc volume type illegal for op");
122         default:
123                 chain = NULL;
124                 panic("hammer2_chain_alloc: unrecognized blockref type: %d",
125                       bref->type);
126         }
127         chain->bref = *bref;
128         chain->index = -1;      /* not yet assigned */
129         chain->refs = 1;
130         chain->bytes = bytes;
131         lockmgr(&chain->lk, LK_EXCLUSIVE);
132
133         return (chain);
134 }
135
136 /*
137  * Free a disconnected chain element
138  */
139 void
140 hammer2_chain_free(hammer2_mount_t *hmp, hammer2_chain_t *chain)
141 {
142         void *mem;
143
144         KKASSERT(chain->bp == NULL);
145         KKASSERT(chain->data == NULL);
146         KKASSERT(chain->bref.type != HAMMER2_BREF_TYPE_INODE ||
147                  chain->u.ip->vp == NULL);
148
149         if ((mem = chain->u.mem) != NULL) {
150                 chain->u.mem = NULL;
151                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
152                         kfree(mem, hmp->minode);
153                 else
154                         kfree(mem, hmp->mchain);
155         }
156 }
157
158 /*
159  * Add a reference to a chain element (for shared access).  The chain
160  * element must already have at least 1 ref controlled by the caller.
161  */
162 void
163 hammer2_chain_ref(hammer2_mount_t *hmp, hammer2_chain_t *chain)
164 {
165         KKASSERT(chain->refs > 0);
166         atomic_add_int(&chain->refs, 1);
167 }
168
169 /*
170  * Drop the callers reference to the chain element.  If the ref count
171  * reaches zero the chain element and its related structure (typically an
172  * inode or indirect block) will be freed and the parent will be
173  * recursively dropped.
174  *
175  * Modified elements hold an additional reference so it should not be
176  * possible for the count on a modified element to drop to 0.
177  *
178  * The chain element must NOT be locked by the caller.
179  *
180  * The parent might or might not be locked by the caller but if so it
181  * will also be referenced so we shouldn't recurse upward.
182  */
183 void
184 hammer2_chain_drop(hammer2_mount_t *hmp, hammer2_chain_t *chain)
185 {
186         hammer2_chain_t *parent;
187         u_int refs;
188
189         while (chain) {
190                 refs = chain->refs;
191                 cpu_ccfence();
192                 KKASSERT(refs > 0);
193                 if (refs == 1) {
194                         KKASSERT(chain != &hmp->vchain);
195                         parent = chain->parent;
196                         if (parent)
197                                 lockmgr(&parent->lk, LK_EXCLUSIVE);
198                         if (atomic_cmpset_int(&chain->refs, 1, 0)) {
199                                 /*
200                                  * Succeeded, recurse and drop parent
201                                  */
202                                 if (!(chain->flags & HAMMER2_CHAIN_DELETED)) {
203                                         SPLAY_REMOVE(hammer2_chain_splay,
204                                                      &parent->shead, chain);
205                                         atomic_set_int(&chain->flags,
206                                                        HAMMER2_CHAIN_DELETED);
207                                         /* parent refs dropped via recursion */
208                                 }
209                                 chain->parent = NULL;
210                                 if (parent)
211                                         lockmgr(&parent->lk, LK_RELEASE);
212                                 hammer2_chain_free(hmp, chain);
213                                 chain = parent;
214                                 /* recurse on parent */
215                         } else {
216                                 if (parent)
217                                         lockmgr(&parent->lk, LK_RELEASE);
218                                 /* retry the same chain */
219                         }
220                 } else {
221                         if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) {
222                                 /*
223                                  * Succeeded, count did not reach zero so
224                                  * cut out of the loop.
225                                  */
226                                 break;
227                         }
228                         /* retry the same chain */
229                 }
230         }
231 }
232
233 /*
234  * Lock a chain element, acquiring its data with I/O if necessary.
235  *
236  * Returns 0 on success or an error code if the data could not be acquired.
237  * The chain element is locked either way.
238  *
239  * chain->data will be pointed either at the embedded data (e.g. for
240  * inodes), in which case the buffer cache buffer is released, or will
241  * point into the bp->b_data buffer with the bp left intact while locked.
242  */
243 int
244 hammer2_chain_lock(hammer2_mount_t *hmp, hammer2_chain_t *chain)
245 {
246         hammer2_blockref_t *bref;
247         hammer2_off_t off_hi;
248         size_t off_lo;
249         int error;
250         void *data;
251
252         /*
253          * Lock the element.  Under certain conditions this might end up
254          * being a recursive lock.
255          */
256         KKASSERT(chain->refs > 0);
257         lockmgr(&chain->lk, LK_EXCLUSIVE);
258
259         /*
260          * The volume header is a special case
261          */
262         if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME)
263                 return(0);
264
265         /*
266          * bp must be NULL, so if the data pointer is valid here it points
267          * to embedded data and no I/O is necessary (whether modified or not).
268          */
269         KKASSERT(chain->bp == NULL);
270         if (chain->data)
271                 return (0);
272
273         /*
274          * If data is NULL we must issue I/O.  Any error returns the error
275          * code but leaves the chain locked.
276          *
277          * If the chain was modified a new bref will have already been
278          * allocated and its related bp is probably still sitting in the
279          * buffer cache.
280          */
281         bref = &chain->bref;
282
283         off_hi = bref->data_off & HAMMER2_OFF_MASK_HI;
284         off_lo = (size_t)bref->data_off & HAMMER2_OFF_MASK_LO;
285         KKASSERT(off_hi != 0);
286         error = cluster_read(hmp->devvp,
287                              hmp->voldata.volu_size, off_hi,
288                              HAMMER2_PBUFSIZE,
289                              HAMMER2_PBUFSIZE, HAMMER2_PBUFSIZE*8,
290                              &chain->bp);
291
292         /*
293          * Even though this can be synthesized from bref->data_off we
294          * store it in the in-memory chain structure for convenience.
295          */
296         if (chain->bytes !=
297             (1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX))) {
298                 panic("hammer2_chain_lock: chain->bytes mismatch");
299         }
300
301         if (error) {
302                 kprintf("hammer2_chain_get: I/O error %016jx: %d\n",
303                         (intmax_t)off_hi, error);
304                 bqrelse(chain->bp);
305                 chain->bp = NULL;
306                 return (error);
307         }
308
309         /*
310          * Setup the data pointer, either pointing it to an embedded data
311          * structure and copying the data from the buffer, or pointint it
312          * into the buffer.
313          *
314          * The buffer is not retained when copying to an embedded data
315          * structure in order to avoid potential deadlocks or recursions
316          * on the same physical buffer.
317          */
318         switch (bref->type) {
319         case HAMMER2_BREF_TYPE_VOLUME:
320                 /*
321                  * Copy data from bp to embedded buffer
322                  */
323                 KKASSERT(0);    /* not yet - have mount use this soon */
324                 KKASSERT(off_hi == 0);
325                 bcopy((char *)chain->bp->b_data + off_lo,
326                       &hmp->voldata, HAMMER2_PBUFSIZE);
327                 chain->data = (void *)&hmp->voldata;
328                 bqrelse(chain->bp);
329                 chain->bp = NULL;
330                 break;
331         case HAMMER2_BREF_TYPE_INODE:
332                 /*
333                  * Copy data from bp to embedded buffer.
334                  */
335                 bcopy((char *)chain->bp->b_data + off_lo,
336                       &chain->u.ip->ip_data,
337                       HAMMER2_INODE_BYTES);
338                 chain->data = (void *)&chain->u.ip->ip_data;
339                 bqrelse(chain->bp);
340                 chain->bp = NULL;
341                 break;
342         case HAMMER2_BREF_TYPE_INDIRECT:
343                 /*
344                  * Indirect node data may or may not be embedded depending
345                  * on how much there is.
346                  */
347                 if (chain->u.np->is_embedded) {
348                         bcopy((char *)chain->bp->b_data + off_lo,
349                               chain->u.np->buf,
350                               chain->bytes);
351                         chain->data = (void *)&chain->u.np->buf[0];
352                         bqrelse(chain->bp);
353                         chain->bp = NULL;
354                         break;
355                 }
356                 /* fall through */
357         default:
358                 /*
359                  * Leave bp intact
360                  */
361                 data = (char *)chain->bp->b_data + off_lo;
362                 chain->data = data;
363                 break;
364         }
365         return (0);
366 }
367
368 /*
369  * Resize the chain's physical storage allocation.  Chains can be resized
370  * smaller without reallocating the storage.  Resizing larger will reallocate
371  * the storage.
372  */
373 void
374 hammer2_chain_resize(hammer2_mount_t *hmp, hammer2_chain_t *chain, int nradix)
375 {
376         hammer2_chain_t *parent;
377         struct buf *nbp;
378         size_t obytes;
379         size_t nbytes;
380         void *ndata;
381         int error;
382
383         /*
384          * Only data blocks can be resized for now
385          */
386         KKASSERT(chain != &hmp->vchain);
387         KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA ||
388                  chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT);
389
390         /*
391          * Nothing to do if the element is already the proper size
392          */
393         obytes = chain->bytes;
394         nbytes = 1 << nradix;
395         if (obytes == nbytes)
396                 return;
397
398         /*
399          * A deleted inode may still be active but unreachable via sync
400          * because it has been disconnected from the tree.  Do not allow
401          * deleted inodes to be marked as being modified because this will
402          * bump the refs and never get resolved by the sync, leaving the
403          * inode structure allocated after umount.
404          */
405         if ((chain->flags & HAMMER2_CHAIN_DELETED) &&
406             chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
407                 KKASSERT(chain->data != NULL);
408                 return;
409         }
410
411         /*
412          * Set MODIFIED1 and add a chain ref to prevent destruction.  Both
413          * modified flags share the same ref.
414          */
415         atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
416         if ((chain->flags & HAMMER2_CHAIN_MODIFIED2) == 0)
417                 hammer2_chain_ref(hmp, chain);
418
419         if (nbytes < obytes) {
420                 /*
421                  * If we are making it smaller we don't have to reallocate
422                  * the block.
423                  */
424                 chain->bref.data_off &= ~ HAMMER2_OFF_MASK_RADIX;
425                 chain->bref.data_off |= (nradix & HAMMER2_OFF_MASK_RADIX);
426                 chain->bytes = nbytes;
427         } else {
428                 /*
429                  * Otherwise we do
430                  */
431                 if (chain != &hmp->vchain) {
432                         chain->bref.data_off =
433                           hammer2_freemap_alloc(hmp, chain->bref.type, nbytes);
434                         chain->bytes = nbytes;
435                 }
436                 /* XXX failed allocation */
437
438                 switch(chain->bref.type) {
439                 case HAMMER2_BREF_TYPE_VOLUME:          /* embedded */
440                 case HAMMER2_BREF_TYPE_INODE:           /* embedded */
441                         /*
442                          * data points to embedded structure, no copy needed
443                          */
444                         error = 0;
445                         break;
446                 case HAMMER2_BREF_TYPE_INDIRECT:
447                         panic("hammer2_chain_resize: "
448                               "cannot resize indirect block");
449                         /* NOT REACHED */
450                         break;
451                 case HAMMER2_BREF_TYPE_DATA:
452                         /*
453                          * data (if not NULL) points into original bp or
454                          * to embedded data.  Copy-on-write to new block.
455                          */
456                         KKASSERT(chain != &hmp->vchain);        /* safety */
457                         if (nbytes == HAMMER2_PBUFSIZE) {
458                                 nbp = getblk(hmp->devvp,
459                                              chain->bref.data_off &
460                                               HAMMER2_OFF_MASK_HI,
461                                              HAMMER2_PBUFSIZE, 0, 0);
462                                 vfs_bio_clrbuf(nbp);
463                                 error = 0;
464                         } else {
465                                 error = bread(hmp->devvp,
466                                              chain->bref.data_off &
467                                               HAMMER2_OFF_MASK_HI,
468                                              HAMMER2_PBUFSIZE, &nbp);
469                                 KKASSERT(error == 0);/* XXX handle error */
470                         }
471
472                         /*
473                          * The new block may be smaller or larger than the
474                          * old block, only copy what fits.
475                          */
476                         ndata = nbp->b_data + (chain->bref.data_off &
477                                                HAMMER2_OFF_MASK_LO);
478                         if (chain->data) {
479                                 if (nbytes < obytes)
480                                         bcopy(chain->data, ndata, nbytes);
481                                 else
482                                         bcopy(chain->data, ndata, obytes);
483                                 KKASSERT(chain->bp != NULL);
484                                 bqrelse(chain->bp);
485                         }
486                         chain->bp = nbp;
487                         chain->data = ndata;
488                         break;
489                 default:
490                         panic("hammer2_chain_modify: unknown bref type");
491                         break;
492
493                 }
494         }
495
496         /*
497          * Recursively mark the parent chain elements so flushes can find
498          * modified elements.
499          *
500          * NOTE: The flush code will modify a SUBMODIFIED-flagged chain
501          *       during the flush recursion after clearing the parent's
502          *       SUBMODIFIED bit.  We don't want to re-set the parent's
503          *       SUBMODIFIED bit in this case!
504          */
505         if ((chain->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
506                 parent = chain->parent;
507                 while (parent &&
508                        (parent->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
509                         atomic_set_int(&parent->flags,
510                                        HAMMER2_CHAIN_SUBMODIFIED);
511                         parent = parent->parent;
512                 }
513         }
514 }
515
516 /*
517  * Convert a locked chain that was retrieved read-only to read-write.
518  *
519  * If not already marked modified a new physical block will be allocated
520  * and assigned to the bref.
521  *
522  * allocated->modified (without calling hammer2_chain_lock()) results
523  * in chain->data typically being NULL.  In this situation chain->data
524  * is assigned and the target area is zero'd out.
525  *
526  * If the data is pointing into a bp it will be relocated to a new bp.
527  * If the data is embedded we leave it alone for now.
528  */
529 void
530 hammer2_chain_modify(hammer2_mount_t *hmp, hammer2_chain_t *chain)
531 {
532         hammer2_chain_t *parent;
533         struct buf *nbp;
534         void *ndata;
535         int error;
536
537         /*
538          * Setting the DIRTYBP flag will cause the buffer to be dirtied or
539          * written-out on unlock.  This bit is independent of the MODIFIED1
540          * bit because the chain may still need meta-data adjustments done
541          * by virtue of MODIFIED1 for its parent, and the buffer can be
542          * flushed out (possibly multiple times) by the OS before that.
543          */
544         atomic_set_int(&chain->flags, HAMMER2_CHAIN_DIRTYBP);
545
546         /*
547          * If the chain is already marked MODIFIED1 we can just return.
548          */
549         if (chain->flags & HAMMER2_CHAIN_MODIFIED1) {
550                 KKASSERT(chain->data != NULL);
551                 return;
552         }
553
554         /*
555          * A deleted inode may still be active but unreachable via sync
556          * because it has been disconnected from the tree.  Do not allow
557          * deleted inodes to be marked as being modified because this will
558          * bump the refs and never get resolved by the sync, leaving the
559          * inode structure allocated after umount.
560          */
561         if ((chain->flags & HAMMER2_CHAIN_DELETED) &&
562             chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
563                 KKASSERT(chain->data != NULL);
564                 return;
565         }
566
567         /*
568          * Set MODIFIED1 and add a chain ref to prevent destruction.  Both
569          * modified flags share the same ref.
570          */
571         atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
572         if ((chain->flags & HAMMER2_CHAIN_MODIFIED2) == 0)
573                 hammer2_chain_ref(hmp, chain);
574
575         /*
576          * We must allocate the copy-on-write block.
577          *
578          * If the data is embedded no other action is required.
579          *
580          * If the data is not embedded we acquire and clear the
581          * new block.  If chain->data is not NULL we then do the
582          * copy-on-write.  chain->data will then be repointed to the new
583          * buffer and the old buffer will be released.
584          *
585          * For newly created elements with no prior allocation we go
586          * through the copy-on-write steps except without the copying part.
587          */
588         if (chain != &hmp->vchain) {
589                 if (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)
590                         kprintf("Replace %d\n", chain->bytes);
591                 chain->bref.data_off =
592                     hammer2_freemap_alloc(hmp, chain->bref.type, chain->bytes);
593                 /* XXX failed allocation */
594         }
595
596         switch(chain->bref.type) {
597         case HAMMER2_BREF_TYPE_VOLUME:          /* embedded */
598         case HAMMER2_BREF_TYPE_INODE:           /* embedded */
599                 /*
600                  * Inode and Volume data already points to the embedded
601                  * structure, no copy is needed
602                  */
603                 error = 0;
604                 break;
605         case HAMMER2_BREF_TYPE_INDIRECT:
606                 /*
607                  * If the indirect data is embedded we just leave it
608                  * in its embedded space, otherwise fall-through to
609                  * the bp-handling code.
610                  *
611                  * If this is a newly allocated block chain->data will
612                  * be NULL, so make sure it is properly assigned.  In
613                  * this case the embedded space has already been zero'd
614                  * by the kmalloc().
615                  */
616                 if (chain->u.np->is_embedded) {
617                         chain->data = (void *)&chain->u.np->buf[0];
618                         error = 0;
619                         break;
620                 }
621                 /* fallthrough */
622         case HAMMER2_BREF_TYPE_DATA:
623                 /*
624                  * data (if not NULL) points into original bp or to embedded
625                  * data, copy-on-write to the new block.
626                  *
627                  * data (if NULL) indicates that no prior copy exists, the
628                  * storage must be zero'd.
629                  */
630                 KKASSERT(chain != &hmp->vchain);        /* safety */
631                 if (chain->bytes == HAMMER2_PBUFSIZE) {
632                         nbp = getblk(hmp->devvp,
633                                      chain->bref.data_off & HAMMER2_OFF_MASK_HI,
634                                      HAMMER2_PBUFSIZE, 0, 0);
635                         /*
636                          * XXX want to set B_CACHE but not bother to
637                          * zero because it will be zero'd below?
638                          */
639                         vfs_bio_clrbuf(nbp);
640                         error = 0;
641                 } else {
642                         error = bread(hmp->devvp,
643                                      chain->bref.data_off & HAMMER2_OFF_MASK_HI,
644                                      HAMMER2_PBUFSIZE, &nbp);
645                         KKASSERT(error == 0);/* XXX handle error */
646                 }
647
648                 /*
649                  * Copy or zero-fill on write depending on whether
650                  * chain->data exists or not.
651                  */
652                 ndata = nbp->b_data + (chain->bref.data_off &
653                                        HAMMER2_OFF_MASK_LO);
654                 if (chain->data) {
655                         bcopy(chain->data, ndata, chain->bytes);
656                         KKASSERT(chain->bp != NULL);
657                         bqrelse(chain->bp);
658                 } else {
659                         bzero(ndata, chain->bytes);
660                 }
661                 chain->bp = nbp;
662                 chain->data = ndata;
663                 break;
664         default:
665                 panic("hammer2_chain_modify: unknown bref type");
666                 break;
667
668         }
669
670         /*
671          * Recursively mark the parent chain elements so flushes can find
672          * modified elements.
673          *
674          * NOTE: The flush code will modify a SUBMODIFIED-flagged chain
675          *       during the flush recursion after clearing the parent's
676          *       SUBMODIFIED bit.  We don't want to re-set the parent's
677          *       SUBMODIFIED bit in this case!
678          */
679         if ((chain->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
680                 parent = chain->parent;
681                 while (parent &&
682                        (parent->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
683                         atomic_set_int(&parent->flags,
684                                        HAMMER2_CHAIN_SUBMODIFIED);
685                         parent = parent->parent;
686                 }
687         }
688 }
689
690 /*
691  * Unlock a chain element without dropping its reference count.
692  * (see hammer2_chain_put() to do both).
693  *
694  * Non-embedded data references (chain->bp != NULL) are returned to the
695  * system and the data field is cleared in that case.  If modified the
696  * dirty buffer is still returned to the system, can be flushed to disk by
697  * the system at any time, and will be reconstituted/re-read as needed.
698  */
699 void
700 hammer2_chain_unlock(hammer2_mount_t *hmp, hammer2_chain_t *chain)
701 {
702         if (chain->bp) {
703                 chain->data = NULL;
704                 if (chain->flags & HAMMER2_CHAIN_DIRTYBP) {
705                         if (chain->flags & HAMMER2_CHAIN_IOFLUSH) {
706                                 atomic_clear_int(&chain->flags,
707                                                  HAMMER2_CHAIN_IOFLUSH);
708                                 chain->bp->b_flags |= B_RELBUF;
709                                 bawrite(chain->bp);
710                         } else {
711                                 bdwrite(chain->bp);
712                         }
713                 } else {
714                         if (chain->flags & HAMMER2_CHAIN_IOFLUSH) {
715                                 atomic_clear_int(&chain->flags,
716                                                  HAMMER2_CHAIN_IOFLUSH);
717                                 chain->bp->b_flags |= B_RELBUF;
718                                 brelse(chain->bp);
719                         } else {
720                                 /* bp might still be dirty */
721                                 bqrelse(chain->bp);
722                         }
723                 }
724                 chain->bp = NULL;
725         }
726         atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DIRTYBP);
727         lockmgr(&chain->lk, LK_RELEASE);
728 }
729
730 /*
731  * Locate an in-memory chain.  The parent must be locked.  The in-memory
732  * chain is returned or NULL if no in-memory chain is present.
733  *
734  * NOTE: A chain on-media might exist for this index when NULL is returned.
735  */
736 hammer2_chain_t *
737 hammer2_chain_find(hammer2_mount_t *hmp, hammer2_chain_t *parent, int index)
738 {
739         hammer2_chain_t dummy;
740         hammer2_chain_t *chain;
741
742         dummy.index = index;
743         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead, &dummy);
744         return (chain);
745 }
746
747 /*
748  * Return a locked chain structure with all associated data acquired.
749  *
750  * Caller must lock the parent on call, the returned child will be locked.
751  */
752 hammer2_chain_t *
753 hammer2_chain_get(hammer2_mount_t *hmp, hammer2_chain_t *parent,
754                   int index, int flags)
755 {
756         hammer2_blockref_t *bref;
757         hammer2_chain_t *chain;
758         hammer2_chain_t dummy;
759
760         /*
761          * First see if we have a (possibly modified) chain element cached
762          * for this (parent, index).  Acquire the data if necessary.
763          *
764          * If chain->data is non-NULL the chain should already be marked
765          * modified.
766          */
767         dummy.index = index;
768         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead, &dummy);
769         if (chain) {
770                 hammer2_chain_ref(hmp, chain);
771                 if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
772                         hammer2_chain_lock(hmp, chain);
773                 return(chain);
774         }
775
776         /*
777          * Otherwise lookup the bref and issue I/O (switch on the parent)
778          */
779         switch(parent->bref.type) {
780         case HAMMER2_BREF_TYPE_INODE:
781                 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
782                 bref = &parent->data->ipdata.u.blockset.blockref[index];
783                 break;
784         case HAMMER2_BREF_TYPE_INDIRECT:
785                 KKASSERT(index >= 0 &&
786                          index < parent->bytes / sizeof(hammer2_blockref_t));
787                 bref = &parent->data->npdata.blockref[index];
788                 break;
789         case HAMMER2_BREF_TYPE_VOLUME:
790                 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
791                 bref = &hmp->voldata.sroot_blockset.blockref[index];
792                 break;
793         default:
794                 bref = NULL;
795                 panic("hammer2_chain_get: unrecognized blockref type: %d",
796                       parent->bref.type);
797         }
798
799         /*
800          * Allocate a chain structure representing the existing media
801          * entry.  Thus the chain is *not* INITIAL and certainly not
802          * MODIFIED (yet).
803          */
804         chain = hammer2_chain_alloc(hmp, bref);
805
806         /*
807          * Link the chain into its parent.  Caller is expected to hold an
808          * exclusive lock on the parent.
809          */
810         chain->parent = parent;
811         chain->index = index;
812         if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain))
813                 panic("hammer2_chain_link: collision");
814         KKASSERT(parent->refs > 1);
815         atomic_add_int(&parent->refs, 1);       /* for splay entry */
816
817         /*
818          * Additional linkage for inodes.  Reuse the parent pointer to
819          * find the parent directory.
820          */
821         if (bref->type == HAMMER2_BREF_TYPE_INODE) {
822                 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT)
823                         parent = parent->parent;
824                 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE)
825                         chain->u.ip->pip = parent->u.ip;
826         }
827
828         /*
829          * Our new chain structure has already been referenced and locked
830          * but the lock code handles the I/O so call it to resolve the data.
831          * Then release one of our two exclusive locks.
832          *
833          * If NOLOCK is set the release will release the one-and-only lock.
834          */
835         if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
836                 hammer2_chain_lock(hmp, chain);
837         lockmgr(&chain->lk, LK_RELEASE);
838
839         return (chain);
840 }
841
842 /*
843  * Unlock and dereference a chain after use.  It is possible for this to
844  * recurse up the chain.
845  */
846 void
847 hammer2_chain_put(hammer2_mount_t *hmp, hammer2_chain_t *chain)
848 {
849         hammer2_chain_unlock(hmp, chain);
850         hammer2_chain_drop(hmp, chain);
851 }
852
853 /*
854  * Locate any key between key_beg and key_end inclusive.  (*parentp)
855  * typically points to an inode but can also point to a related indirect
856  * block and this function will recurse upwards and find the inode again.
857  *
858  * WARNING!  THIS DOES NOT RETURN KEYS IN LOGICAL KEY ORDER!  ANY KEY
859  *           WITHIN THE RANGE CAN BE RETURNED.  HOWEVER, AN ITERATION
860  *           WHICH PICKS UP WHERE WE LEFT OFF WILL CONTINUE THE SCAN.
861  *
862  * (*parentp) must be exclusively locked and referenced and can be an inode
863  * or an existing indirect block within the inode.
864  *
865  * On return (*parentp) will be modified to point at the deepest parent chain
866  * element encountered during the search, as a helper for an insertion or
867  * deletion.   The new (*parentp) will be locked and referenced and the old
868  * will be unlocked and dereferenced (no change if they are both the same).
869  *
870  * The matching chain will be returned exclusively locked and referenced.
871  *
872  * NULL is returned if no match was found, but (*parentp) will still
873  * potentially be adjusted.
874  *
875  * This function will also recurse up the chain if the key is not within the
876  * current parent's range.  (*parentp) can never be set to NULL.  An iteration
877  * can simply allow (*parentp) to float inside the loop.
878  */
879 hammer2_chain_t *
880 hammer2_chain_lookup(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
881                      hammer2_key_t key_beg, hammer2_key_t key_end,
882                      int flags)
883 {
884         hammer2_chain_t *parent;
885         hammer2_chain_t *chain;
886         hammer2_chain_t *tmp;
887         hammer2_blockref_t *base;
888         hammer2_blockref_t *bref;
889         hammer2_key_t scan_beg;
890         hammer2_key_t scan_end;
891         int count = 0;
892         int i;
893
894         /*
895          * Recurse (*parentp) upward if necessary until the parent completely
896          * encloses the key range or we hit the inode.
897          */
898         parent = *parentp;
899         while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
900                 scan_beg = parent->bref.key;
901                 scan_end = scan_beg +
902                            ((hammer2_key_t)1 << parent->bref.keybits) - 1;
903                 if (key_beg >= scan_beg && key_end <= scan_end)
904                         break;
905                 hammer2_chain_unlock(hmp, parent);
906                 parent = parent->parent;
907                 hammer2_chain_ref(hmp, parent);         /* ref new parent */
908                 hammer2_chain_lock(hmp, parent);        /* lock new parent */
909                 hammer2_chain_drop(hmp, *parentp);      /* drop old parent */
910                 *parentp = parent;                      /* new parent */
911         }
912
913 again:
914         /*
915          * Locate the blockref array.  Currently we do a fully associative
916          * search through the array.
917          */
918         switch(parent->bref.type) {
919         case HAMMER2_BREF_TYPE_INODE:
920                 /*
921                  * Special shortcut for embedded data returns the inode
922                  * itself.  Callers must detect this condition and access
923                  * the embedded data (the strategy code does this for us).
924                  *
925                  * This is only applicable to regular files and softlinks.
926                  */
927                 if (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) {
928                         hammer2_chain_ref(hmp, parent);
929                         if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
930                                 hammer2_chain_lock(hmp, parent);
931                         return (parent);
932                 }
933                 base = &parent->data->ipdata.u.blockset.blockref[0];
934                 count = HAMMER2_SET_COUNT;
935                 break;
936         case HAMMER2_BREF_TYPE_INDIRECT:
937                 if (parent->data == NULL)
938                         panic("parent->data is NULL");
939                 base = &parent->data->npdata.blockref[0];
940                 count = parent->bytes / sizeof(hammer2_blockref_t);
941                 break;
942         case HAMMER2_BREF_TYPE_VOLUME:
943                 base = &hmp->voldata.sroot_blockset.blockref[0];
944                 count = HAMMER2_SET_COUNT;
945                 break;
946         default:
947                 panic("hammer2_chain_lookup: unrecognized blockref type: %d",
948                       parent->bref.type);
949                 base = NULL;    /* safety */
950                 count = 0;      /* safety */
951         }
952
953         /*
954          * If the element and key overlap we use the element.
955          */
956         bref = NULL;
957         for (i = 0; i < count; ++i) {
958                 tmp = hammer2_chain_find(hmp, parent, i);
959                 bref = (tmp) ? &tmp->bref : &base[i];
960                 if (bref->type == 0)
961                         continue;
962                 scan_beg = bref->key;
963                 scan_end = scan_beg + ((hammer2_key_t)1 << bref->keybits) - 1;
964                 if (key_beg <= scan_end && key_end >= scan_beg)
965                         break;
966         }
967         if (i == count) {
968                 if (key_beg == key_end)
969                         return (NULL);
970                 return (hammer2_chain_next(hmp, parentp, NULL,
971                                            key_beg, key_end, flags));
972         }
973
974         /*
975          * Acquire the new chain element.  If the chain element is an
976          * indirect block we must search recursively.
977          */
978         chain = hammer2_chain_get(hmp, parent, i, flags);
979         if (chain == NULL)
980                 return (NULL);
981
982         /*
983          * If the chain element is an indirect block it becomes the new
984          * parent and we loop on it.  We must fixup the chain we loop on
985          * if the caller passed flags to us that aren't sufficient for our
986          * needs.
987          */
988         if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
989                 hammer2_chain_put(hmp, parent);
990                 *parentp = parent = chain;
991                 if (flags & HAMMER2_LOOKUP_NOLOCK)
992                         hammer2_chain_lock(hmp, chain);
993                 goto again;
994         }
995
996         /*
997          * All done, return chain
998          */
999         return (chain);
1000 }
1001
1002 /*
1003  * After having issued a lookup we can iterate all matching keys.
1004  *
1005  * If chain is non-NULL we continue the iteration from just after it's index.
1006  *
1007  * If chain is NULL we assume the parent was exhausted and continue the
1008  * iteration at the next parent.
1009  */
1010 hammer2_chain_t *
1011 hammer2_chain_next(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
1012                    hammer2_chain_t *chain,
1013                    hammer2_key_t key_beg, hammer2_key_t key_end,
1014                    int flags)
1015 {
1016         hammer2_chain_t *parent;
1017         hammer2_chain_t *tmp;
1018         hammer2_blockref_t *base;
1019         hammer2_blockref_t *bref;
1020         hammer2_key_t scan_beg;
1021         hammer2_key_t scan_end;
1022         int i;
1023         int count;
1024
1025         parent = *parentp;
1026
1027 again:
1028         /*
1029          * Calculate the next index and recalculate the parent if necessary.
1030          */
1031         if (chain) {
1032                 /*
1033                  * Continue iteration within current parent.  If not NULL
1034                  * the passed-in chain may or may not be locked, based on
1035                  * the LOOKUP_NOLOCK flag (passed in as returned from lookup
1036                  * or a prior next).
1037                  */
1038                 i = chain->index + 1;
1039                 if (flags & HAMMER2_LOOKUP_NOLOCK)
1040                         hammer2_chain_drop(hmp, chain);
1041                 else
1042                         hammer2_chain_put(hmp, chain);
1043
1044                 /*
1045                  * Any scan where the lookup returned degenerate data embedded
1046                  * in the inode has an invalid index and must terminate.
1047                  */
1048                 if (chain == parent)
1049                         return(NULL);
1050                 chain = NULL;
1051         } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT) {
1052                 /*
1053                  * We reached the end of the iteration.
1054                  */
1055                 return (NULL);
1056         } else {
1057                 /*
1058                  * Continue iteration with next parent unless the current
1059                  * parent covers the range.
1060                  */
1061                 hammer2_chain_t *nparent;
1062
1063                 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT)
1064                         return (NULL);
1065
1066                 scan_beg = parent->bref.key;
1067                 scan_end = scan_beg +
1068                             ((hammer2_key_t)1 << parent->bref.keybits) - 1;
1069                 if (key_beg >= scan_beg && key_end <= scan_end)
1070                         return (NULL);
1071
1072                 i = parent->index + 1;
1073                 nparent = parent->parent;
1074                 hammer2_chain_ref(hmp, nparent);        /* ref new parent */
1075                 hammer2_chain_unlock(hmp, parent);
1076                 hammer2_chain_lock(hmp, nparent);       /* lock new parent */
1077                 hammer2_chain_drop(hmp, parent);        /* drop old parent */
1078                 *parentp = parent = nparent;
1079         }
1080
1081 again2:
1082         /*
1083          * Locate the blockref array.  Currently we do a fully associative
1084          * search through the array.
1085          */
1086         switch(parent->bref.type) {
1087         case HAMMER2_BREF_TYPE_INODE:
1088                 base = &parent->data->ipdata.u.blockset.blockref[0];
1089                 count = HAMMER2_SET_COUNT;
1090                 break;
1091         case HAMMER2_BREF_TYPE_INDIRECT:
1092                 base = &parent->data->npdata.blockref[0];
1093                 count = parent->bytes / sizeof(hammer2_blockref_t);
1094                 break;
1095         case HAMMER2_BREF_TYPE_VOLUME:
1096                 base = &hmp->voldata.sroot_blockset.blockref[0];
1097                 count = HAMMER2_SET_COUNT;
1098                 break;
1099         default:
1100                 panic("hammer2_chain_next: unrecognized blockref type: %d",
1101                       parent->bref.type);
1102                 base = NULL;    /* safety */
1103                 count = 0;      /* safety */
1104                 break;
1105         }
1106         KKASSERT(i <= count);
1107
1108         /*
1109          * Look for the key.  If we are unable to find a match and an exact
1110          * match was requested we return NULL.  If a range was requested we
1111          * run hammer2_chain_next() to iterate.
1112          */
1113         bref = NULL;
1114         while (i < count) {
1115                 tmp = hammer2_chain_find(hmp, parent, i);
1116                 bref = (tmp) ? &tmp->bref : &base[i];
1117                 if (bref->type == 0) {
1118                         ++i;
1119                         continue;
1120                 }
1121                 scan_beg = bref->key;
1122                 scan_end = scan_beg + ((hammer2_key_t)1 << bref->keybits) - 1;
1123                 if (key_beg <= scan_end && key_end >= scan_beg)
1124                         break;
1125                 ++i;
1126         }
1127
1128         /*
1129          * If we couldn't find a match recurse up a parent to continue the
1130          * search.
1131          */
1132         if (i == count)
1133                 goto again;
1134
1135         /*
1136          * Acquire the new chain element.  If the chain element is an
1137          * indirect block we must search recursively.
1138          */
1139         chain = hammer2_chain_get(hmp, parent, i, flags);
1140         if (chain == NULL)
1141                 return (NULL);
1142
1143         /*
1144          * If the chain element is an indirect block it becomes the new
1145          * parent and we loop on it.  We may have to lock the chain when
1146          * cycling it in as the new parent as it will not be locked if the
1147          * caller passed NOLOCK.
1148          */
1149         if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
1150                 hammer2_chain_put(hmp, parent);
1151                 *parentp = parent = chain;
1152                 if (flags & HAMMER2_LOOKUP_NOLOCK)
1153                         hammer2_chain_lock(hmp, chain);
1154                 i = 0;
1155                 goto again2;
1156         }
1157
1158         /*
1159          * All done, return chain
1160          */
1161         return (chain);
1162 }
1163
1164 /*
1165  * Create and return a new hammer2 system memory structure of the specified
1166  * key, type and size and insert it RELATIVE TO (PARENT).
1167  *
1168  * (parent) is typically either an inode or an indirect  block, acquired
1169  * acquired as a side effect of issuing a prior failed lookup.  parent
1170  * must be locked and held.  Do not pass the inode chain to this function
1171  * unless that is the chain returned by the failed lookup.
1172  *
1173  * Non-indirect types will automatically allocate indirect blocks as required
1174  * if the new item does not fit in the current (parent).
1175  *
1176  * Indirect types will move a portion of the existing blockref array in
1177  * (parent) into the new indirect type and then use one of the free slots
1178  * to emplace the new indirect type.
1179  *
1180  * A new locked, referenced chain element is returned of the specified type.
1181  * This element will also be marked as modified and contain a data area
1182  * ready for initialization.
1183  */
1184 hammer2_chain_t *
1185 hammer2_chain_create(hammer2_mount_t *hmp, hammer2_chain_t *parent,
1186                      hammer2_chain_t *chain,
1187                      hammer2_key_t key, int keybits, int type, size_t bytes)
1188 {
1189         hammer2_blockref_t dummy;
1190         hammer2_blockref_t *base;
1191         hammer2_blockref_t *bref;
1192         hammer2_chain_t dummy_chain;
1193         int unlock_parent = 0;
1194         int allocated = 0;
1195         int count;
1196         int i;
1197
1198         if (chain == NULL) {
1199                 /*
1200                  * First allocate media space and construct the dummy bref,
1201                  * then allocate the in-memory chain structure.
1202                  */
1203                 bzero(&dummy, sizeof(dummy));
1204                 dummy.type = type;
1205                 dummy.key = key;
1206                 dummy.keybits = keybits;
1207                 dummy.data_off = hammer2_bytes_to_radix(bytes);
1208                 chain = hammer2_chain_alloc(hmp, &dummy);
1209                 allocated = 1;
1210
1211                 /*
1212                  * We set the WAS_MODIFIED flag here so the chain gets
1213                  * marked as modified below.
1214                  */
1215                 chain->flags |= HAMMER2_CHAIN_INITIAL |
1216                                 HAMMER2_CHAIN_WAS_MODIFIED;
1217
1218                 /*
1219                  * Recalculate bytes to reflect the actual media block
1220                  * allocation.
1221                  */
1222                 bytes = (hammer2_off_t)1 <<
1223                         (int)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX);
1224                 chain->bytes = bytes;
1225
1226                 switch(type) {
1227                 case HAMMER2_BREF_TYPE_VOLUME:
1228                         panic("hammer2_chain_create: called with volume type");
1229                         break;
1230                 case HAMMER2_BREF_TYPE_INODE:
1231                         KKASSERT(bytes == HAMMER2_INODE_BYTES);
1232                         chain->data = (void *)&chain->u.ip->ip_data;
1233                         break;
1234                 case HAMMER2_BREF_TYPE_INDIRECT:
1235                         /*
1236                          * May or may not be embedded (chain->data may or
1237                          * may not be NULL)
1238                          */
1239                         if (chain->u.np->is_embedded) {
1240                                 chain->data = (void *)&chain->u.np->buf[0];
1241                         } else {
1242                                 KKASSERT(chain->data == NULL);
1243                         }
1244                         break;
1245                 default:
1246                         /* leave chain->data NULL */
1247                         KKASSERT(chain->data == NULL);
1248                         break;
1249                 }
1250         } else {
1251                 /*
1252                  * Potentially update the chain's key/keybits, but it will
1253                  * only be marked modified if WAS_MODIFIED is set (if it
1254                  * was modified at the time of its removal during a rename).
1255                  */
1256                 chain->bref.key = key;
1257                 chain->bref.keybits = keybits;
1258         }
1259
1260 again:
1261         /*
1262          * Locate a free blockref in the parent's array
1263          */
1264         switch(parent->bref.type) {
1265         case HAMMER2_BREF_TYPE_INODE:
1266                 KKASSERT(parent->data != NULL);
1267                 base = &parent->data->ipdata.u.blockset.blockref[0];
1268                 count = HAMMER2_SET_COUNT;
1269                 break;
1270         case HAMMER2_BREF_TYPE_INDIRECT:
1271                 KKASSERT(parent->data != NULL);
1272                 base = &parent->data->npdata.blockref[0];
1273                 count = parent->bytes / sizeof(hammer2_blockref_t);
1274                 break;
1275         case HAMMER2_BREF_TYPE_VOLUME:
1276                 KKASSERT(parent->data != NULL);
1277                 base = &hmp->voldata.sroot_blockset.blockref[0];
1278                 count = HAMMER2_SET_COUNT;
1279                 break;
1280         default:
1281                 panic("hammer2_chain_create: unrecognized blockref type: %d",
1282                       parent->bref.type);
1283                 count = 0;
1284                 break;
1285         }
1286
1287         /*
1288          * Scan for an unallocated bref, also skipping any slots occupied
1289          * by in-memory chain elements that may not yet have been updated
1290          * in the parent's bref array.
1291          */
1292         bzero(&dummy_chain, sizeof(dummy_chain));
1293         bref = NULL;
1294         for (i = 0; i < count; ++i) {
1295                 bref = &base[i];
1296                 dummy_chain.index = i;
1297                 if (bref->type == 0 &&
1298                     SPLAY_FIND(hammer2_chain_splay,
1299                                &parent->shead, &dummy_chain) == NULL) {
1300                         break;
1301                 }
1302         }
1303
1304         /*
1305          * If no free blockref count be found we must create an indirect
1306          * block and move a number of blockrefs into it.  With the parent
1307          * locked we can safely lock each child in order to move it without
1308          * causing a deadlock.
1309          *
1310          * This may return the new indirect block or the old parent depending
1311          * on where the key falls.
1312          */
1313         if (i == count) {
1314                 hammer2_chain_t *nparent;
1315
1316                 nparent = hammer2_chain_create_indirect(hmp, parent,
1317                                                         key, keybits);
1318                 if (nparent == NULL) {
1319                         if (allocated)
1320                                 hammer2_chain_free(hmp, chain);
1321                         chain = NULL;
1322                         goto done;
1323                 }
1324                 if (parent != nparent) {
1325                         if (unlock_parent)
1326                                 hammer2_chain_put(hmp, parent);
1327                         parent = nparent;
1328                         unlock_parent = 1;
1329                 }
1330                 goto again;
1331         }
1332
1333         /*
1334          * Link the chain into its parent.
1335          */
1336         if (chain->parent != NULL)
1337                 panic("hammer2: hammer2_chain_create: chain already connected");
1338         KKASSERT(chain->parent == NULL);
1339         chain->parent = parent;
1340         chain->index = i;
1341         if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain))
1342                 panic("hammer2_chain_link: collision");
1343         atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED);
1344         KKASSERT(parent->refs > 1);
1345         atomic_add_int(&parent->refs, 1);
1346
1347         /*
1348          * Additional linkage for inodes.  Reuse the parent pointer to
1349          * find the parent directory.
1350          */
1351         if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
1352                 hammer2_chain_t *scan = parent;
1353                 while (scan->bref.type == HAMMER2_BREF_TYPE_INDIRECT)
1354                         scan = scan->parent;
1355                 if (scan->bref.type == HAMMER2_BREF_TYPE_INODE)
1356                         chain->u.ip->pip = scan->u.ip;
1357         }
1358
1359         /*
1360          * Mark the newly created or previously disconnected chain element
1361          * as modified and fully resolve the chain->data pointer.  The
1362          * WAS_MODIFIED bit will be set in both cases.
1363          *
1364          * Chain elements with embedded data will not issue I/O at this time.
1365          * A new block will be allocated for the buffer but not instantiated.
1366          *
1367          * Chain elements which do not use embedded data will allocate
1368          * the new block AND instantiate its buffer cache buffer, pointing
1369          * the data at the bp.
1370          */
1371         if (chain->flags & HAMMER2_CHAIN_WAS_MODIFIED) {
1372                 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_WAS_MODIFIED);
1373                 hammer2_chain_modify(hmp, chain);
1374         }
1375
1376 done:
1377         if (unlock_parent)
1378                 hammer2_chain_put(hmp, parent);
1379         return (chain);
1380 }
1381
1382 /*
1383  * Create an indirect block that covers one or more of the elements in the
1384  * current parent.  Either returns the existing parent with no locking or
1385  * ref changes or returns the new indirect block locked and referenced,
1386  * depending on what the specified key falls into.
1387  *
1388  * The key/keybits for the indirect mode only needs to follow three rules:
1389  *
1390  * (1) That all elements underneath it fit within its key space and
1391  *
1392  * (2) That all elements outside it are outside its key space.
1393  *
1394  * (3) When creating the new indirect block any elements in the current
1395  *     parent that fit within the new indirect block's keyspace must be
1396  *     moved into the new indirect block.
1397  *
1398  * (4) The keyspace chosen for the inserted indirect block CAN cover a wider
1399  *     keyspace the the current parent, but lookup/iteration rules will
1400  *     ensure (and must ensure) that rule (2) for all parents leading up
1401  *     to the nearest inode or the root volume header is adhered to.  This
1402  *     is accomplished by always recursing through matching keyspaces in
1403  *     the hammer2_chain_lookup() and hammer2_chain_next() API.
1404  *
1405  * The current implementation calculates the current worst-case keyspace by
1406  * iterating the current parent and then divides it into two halves, choosing
1407  * whichever half has the most elements (not necessarily the half containing
1408  * the requested key).
1409  *
1410  * We can also opt to use the half with the least number of elements.  This
1411  * causes lower-numbered keys (aka logical file offsets) to recurse through
1412  * fewer indirect blocks and higher-numbered keys to recurse through more.
1413  * This also has the risk of not moving enough elements to the new indirect
1414  * block and being forced to create several indirect blocks before the element
1415  * can be inserted.
1416  */
1417 static
1418 hammer2_chain_t *
1419 hammer2_chain_create_indirect(hammer2_mount_t *hmp, hammer2_chain_t *parent,
1420                               hammer2_key_t create_key, int create_bits)
1421 {
1422         hammer2_blockref_t *base;
1423         hammer2_blockref_t *bref;
1424         hammer2_chain_t *chain;
1425         hammer2_chain_t *ichain;
1426         hammer2_chain_t dummy;
1427         hammer2_key_t key = create_key;
1428         int keybits = create_bits;
1429         int locount = 0;
1430         int hicount = 0;
1431         int count;
1432         int nbytes;
1433         int i;
1434
1435         /*
1436          * Mark the parent modified so our base[] pointer remains valid
1437          * while we move entries.
1438          */
1439         hammer2_chain_modify(hmp, parent);
1440
1441         /*
1442          * Locate a free blockref in the parent's array
1443          */
1444         switch(parent->bref.type) {
1445         case HAMMER2_BREF_TYPE_INODE:
1446                 base = &parent->data->ipdata.u.blockset.blockref[0];
1447                 count = HAMMER2_SET_COUNT;
1448                 break;
1449         case HAMMER2_BREF_TYPE_INDIRECT:
1450                 base = &parent->data->npdata.blockref[0];
1451                 count = parent->bytes / sizeof(hammer2_blockref_t);
1452                 break;
1453         case HAMMER2_BREF_TYPE_VOLUME:
1454                 base = &hmp->voldata.sroot_blockset.blockref[0];
1455                 count = HAMMER2_SET_COUNT;
1456                 break;
1457         default:
1458                 panic("hammer2_chain_create_indirect: "
1459                       "unrecognized blockref type: %d",
1460                       parent->bref.type);
1461                 count = 0;
1462                 break;
1463         }
1464
1465         /*
1466          * Scan for an unallocated bref, also skipping any slots occupied
1467          * by in-memory chain elements that may not yet have been updated
1468          * in the parent's bref array.
1469          */
1470         bzero(&dummy, sizeof(dummy));
1471         for (i = 0; i < count; ++i) {
1472                 int nkeybits;
1473
1474                 bref = &base[i];
1475                 if (bref->type == 0) {
1476                         dummy.index = i;
1477                         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead,
1478                                            &dummy);
1479                         if (chain == NULL)
1480                                 continue;
1481                         bref = &chain->bref;
1482                 }
1483
1484                 /*
1485                  * Expand our calculated key range (key, keybits) to fit
1486                  * the scanned key.  nkeybits represents the full range
1487                  * that we will later cut in half (two halves @ nkeybits - 1).
1488                  */
1489                 nkeybits = keybits;
1490                 if (nkeybits < bref->keybits)
1491                         nkeybits = bref->keybits;
1492                 while ((~(((hammer2_key_t)1 << nkeybits) - 1) &
1493                         (key ^ bref->key)) != 0) {
1494                         ++nkeybits;
1495                 }
1496
1497                 /*
1498                  * If the new key range is larger we have to determine
1499                  * which side of the new key range the existing keys fall
1500                  * under by checking the high bit, then collapsing the
1501                  * locount into the hicount or vise-versa.
1502                  */
1503                 if (keybits != nkeybits) {
1504                         if (((hammer2_key_t)1 << (nkeybits - 1)) & key) {
1505                                 hicount += locount;
1506                                 locount = 0;
1507                         } else {
1508                                 locount += hicount;
1509                                 hicount = 0;
1510                         }
1511                         keybits = nkeybits;
1512                 }
1513
1514                 /*
1515                  * The newly scanned key will be in the lower half or the
1516                  * higher half of the (new) key range.
1517                  */
1518                 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key)
1519                         ++hicount;
1520                 else
1521                         ++locount;
1522         }
1523
1524         /*
1525          * Adjust keybits to represent half of the full range calculated
1526          * above.
1527          */
1528         --keybits;
1529
1530         /*
1531          * Select whichever half contains the most elements.  Theoretically
1532          * we can select either side as long as it contains at least one
1533          * element (in order to ensure that a free slot is present to hold
1534          * the indirect block).
1535          */
1536         key &= ~(((hammer2_key_t)1 << keybits) - 1);
1537         if (hammer2_indirect_optimize) {
1538                 /*
1539                  * Insert node for least number of keys, this will arrange
1540                  * the first few blocks of a large file or the first few
1541                  * inodes in a directory with fewer indirect blocks when
1542                  * created linearly.
1543                  */
1544                 if (hicount < locount && hicount != 0)
1545                         key |= (hammer2_key_t)1 << keybits;
1546                 else
1547                         key &= ~(hammer2_key_t)1 << keybits;
1548         } else {
1549                 /*
1550                  * Insert node for most number of keys, best for heavily
1551                  * fragmented files.
1552                  */
1553                 if (hicount > locount)
1554                         key |= (hammer2_key_t)1 << keybits;
1555                 else
1556                         key &= ~(hammer2_key_t)1 << keybits;
1557         }
1558
1559         /*
1560          * How big should our new indirect block be?  It has to be at least
1561          * as large as its parent.
1562          */
1563         if (parent->bref.type == HAMMER2_BREF_TYPE_INODE)
1564                 nbytes = HAMMER2_IND_BYTES_MIN;
1565         else
1566                 nbytes = HAMMER2_IND_BYTES_MAX;
1567         if (nbytes < count * sizeof(hammer2_blockref_t))
1568                 nbytes = count * sizeof(hammer2_blockref_t);
1569
1570         /*
1571          * Ok, create our new indirect block
1572          */
1573         dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT;
1574         dummy.bref.key = key;
1575         dummy.bref.keybits = keybits;
1576         dummy.bref.data_off = hammer2_bytes_to_radix(nbytes);
1577         ichain = hammer2_chain_alloc(hmp, &dummy.bref);
1578         ichain->flags |= HAMMER2_CHAIN_INITIAL;
1579
1580         /*
1581          * Iterate the original parent and move the matching brefs into
1582          * the new indirect block.
1583          */
1584         for (i = 0; i < count; ++i) {
1585                 /*
1586                  * For keying purposes access the bref from the media or
1587                  * from our in-memory cache.  In cases where the in-memory
1588                  * cache overrides the media the keyrefs will be the same
1589                  * anyway so we can avoid checking the cache when the media
1590                  * has a key.
1591                  */
1592                 bref = &base[i];
1593                 if (bref->type == 0) {
1594                         dummy.index = i;
1595                         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead,
1596                                            &dummy);
1597                         if (chain == NULL) {
1598                                 /*
1599                                  * Select index indirect block is placed in
1600                                  */
1601                                 if (ichain->index < 0)
1602                                         ichain->index = i;
1603                                 continue;
1604                         }
1605                         bref = &chain->bref;
1606                 }
1607
1608                 /*
1609                  * Skip keys not in the chosen half (low or high), only bit
1610                  * (keybits - 1) needs to be compared but for safety we
1611                  * will compare all msb bits plus that bit again.
1612                  */
1613                 if ((~(((hammer2_key_t)1 << keybits) - 1) &
1614                     (key ^ bref->key)) != 0) {
1615                         continue;
1616                 }
1617
1618                 /*
1619                  * This element is being moved, its slot is available
1620                  * for our indirect block.
1621                  */
1622                 if (ichain->index < 0)
1623                         ichain->index = i;
1624
1625                 /*
1626                  * Load the new indirect block by acquiring or allocating
1627                  * the related chain entries, then simply move it to the
1628                  * new parent (ichain).
1629                  *
1630                  * Flagging the new chain entry MOVED will cause a flush
1631                  * to synchronize its block into the new indirect block.
1632                  * The chain is unlocked after being moved but needs to
1633                  * retain a reference for the MOVED state
1634                  *
1635                  * We must still set SUBMODIFIED in the parent but we do
1636                  * that after the loop.
1637                  *
1638                  * XXX we really need a lock here but we don't need the
1639                  *     data.  NODATA feature needed.
1640                  */
1641                 chain = hammer2_chain_get(hmp, parent, i,
1642                                           HAMMER2_LOOKUP_NOLOCK);
1643                 SPLAY_REMOVE(hammer2_chain_splay, &parent->shead, chain);
1644                 if (SPLAY_INSERT(hammer2_chain_splay, &ichain->shead, chain))
1645                         panic("hammer2_chain_create_indirect: collision");
1646                 chain->parent = ichain;
1647                 bzero(&base[i], sizeof(base[i]));
1648                 atomic_add_int(&parent->refs, -1);
1649                 atomic_add_int(&ichain->refs, 1);
1650                 if (chain->flags & HAMMER2_CHAIN_MOVED) {
1651                         /* We don't need the ref from the chain_get */
1652                         hammer2_chain_drop(hmp, chain);
1653                 } else {
1654                         /* MOVED bit inherits the ref from the chain_get */
1655                         atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED);
1656                 }
1657                 KKASSERT(parent->refs > 0);
1658                 chain = NULL;
1659         }
1660
1661         /*
1662          * Insert the new indirect block into the parent now that we've
1663          * cleared out some entries in the parent.  We calculated a good
1664          * insertion index in the loop above (ichain->index).
1665          */
1666         KKASSERT(ichain->index >= 0);
1667         if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, ichain))
1668                 panic("hammer2_chain_create_indirect: ichain insertion");
1669         ichain->parent = parent;
1670         atomic_add_int(&parent->refs, 1);
1671
1672         /*
1673          * Mark the new indirect block modified after insertion, which
1674          * will propagate up through parent all the way to the root and
1675          * also allocate the physical block in ichain for our caller,
1676          * and assign ichain->data to a pre-zero'd space (because there
1677          * is not prior data to copy into it).
1678          *
1679          * We have to set SUBMODIFIED in ichain's flags manually so the
1680          * flusher knows it has to recurse through it to get to all of
1681          * our moved blocks.
1682          */
1683         hammer2_chain_modify(hmp, ichain);
1684         atomic_set_int(&ichain->flags, HAMMER2_CHAIN_SUBMODIFIED);
1685
1686         /*
1687          * Figure out what to return.
1688          */
1689         if (create_bits >= keybits) {
1690                 /*
1691                  * Key being created is way outside the key range,
1692                  * return the original parent.
1693                  */
1694                 hammer2_chain_put(hmp, ichain);
1695         } else if (~(((hammer2_key_t)1 << keybits) - 1) &
1696                    (create_key ^ key)) {
1697                 /*
1698                  * Key being created is outside the key range,
1699                  * return the original parent.
1700                  */
1701                 hammer2_chain_put(hmp, ichain);
1702         } else {
1703                 /*
1704                  * Otherwise its in the range, return the new parent.
1705                  */
1706                 parent = ichain;
1707         }
1708
1709         return(parent);
1710 }
1711
1712 /*
1713  * Physically delete the specified chain element.  Note that inodes with
1714  * open descriptors should not be deleted (as with other filesystems) until
1715  * the last open descriptor is closed.
1716  *
1717  * This routine will remove the chain element from its parent and potentially
1718  * also recurse upward and delete indirect blocks which become empty as a
1719  * side effect.
1720  *
1721  * The caller must pass a pointer to the chain's parent, also locked and
1722  * referenced.  (*parentp) will be modified in a manner similar to a lookup
1723  * or iteration when indirect blocks are also deleted as a side effect.
1724  */
1725 void
1726 hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t *parent,
1727                      hammer2_chain_t *chain)
1728 {
1729         hammer2_blockref_t *base;
1730         int count;
1731
1732         if (chain->parent != parent)
1733                 panic("hammer2_chain_delete: parent mismatch");
1734
1735         /*
1736          * Mark the parent modified so our base[] pointer remains valid
1737          * while we move entries.
1738          *
1739          * Calculate the blockref reference in the parent
1740          */
1741         hammer2_chain_modify(hmp, parent);
1742
1743         switch(parent->bref.type) {
1744         case HAMMER2_BREF_TYPE_INODE:
1745                 base = &parent->data->ipdata.u.blockset.blockref[0];
1746                 count = HAMMER2_SET_COUNT;
1747                 break;
1748         case HAMMER2_BREF_TYPE_INDIRECT:
1749                 base = &parent->data->npdata.blockref[0];
1750                 count = parent->bytes / sizeof(hammer2_blockref_t);
1751                 break;
1752         case HAMMER2_BREF_TYPE_VOLUME:
1753                 base = &hmp->voldata.sroot_blockset.blockref[0];
1754                 count = HAMMER2_SET_COUNT;
1755                 break;
1756         default:
1757                 panic("hammer2_chain_delete: unrecognized blockref type: %d",
1758                       parent->bref.type);
1759                 count = 0;
1760                 break;
1761         }
1762
1763         /*
1764          * Disconnect the bref in the parent, remove the chain, and
1765          * disconnect in-memory fields from the parent.
1766          */
1767         KKASSERT(chain->index >= 0 && chain->index < count);
1768         base += chain->index;
1769         bzero(base, sizeof(*base));
1770
1771         SPLAY_REMOVE(hammer2_chain_splay, &parent->shead, chain);
1772         atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED);
1773         atomic_add_int(&parent->refs, -1);      /* for splay entry */
1774         chain->index = -1;
1775
1776         chain->parent = NULL;
1777         if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
1778                 chain->u.ip->pip = NULL;
1779
1780         /*
1781          * Nobody references the underlying object any more so we can
1782          * clear any pending modification(s) on it.  This can theoretically
1783          * recurse downward but even just clearing the bit on this item
1784          * will effectively recurse if someone is doing a rm -rf and greatly
1785          * reduce the I/O required.
1786          *
1787          * The MODIFIED1 bit is cleared but we have to remember the old state
1788          * in case this deletion is related to a rename.  The ref on the
1789          * chain is shared by both modified flags.
1790          */
1791         if (chain->flags & HAMMER2_CHAIN_MODIFIED1) {
1792                 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
1793                 atomic_set_int(&chain->flags, HAMMER2_CHAIN_WAS_MODIFIED);
1794                 if ((chain->flags & HAMMER2_CHAIN_MODIFIED2) == 0)
1795                         hammer2_chain_drop(hmp, chain);
1796         }
1797 }
1798
1799 /*
1800  * Recursively flush the specified chain.  The chain is locked and
1801  * referenced by the caller and will remain so on return.
1802  *
1803  * This cannot be called with the volume header's vchain (yet).
1804  *
1805  * PASS1 - clear the MODIFIED1 bit (and set the MODIFIED2 bit XXX)
1806  *
1807  */
1808 static void
1809 hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *chain)
1810 {
1811         /*
1812          * Flush any children of this chain entry.
1813          */
1814         if (chain->flags & HAMMER2_CHAIN_SUBMODIFIED) {
1815                 hammer2_blockref_t *base;
1816                 hammer2_chain_t *scan;
1817                 hammer2_chain_t *next;
1818                 int count;
1819                 int submodified = 0;
1820
1821                 /*
1822                  * Modifications to the children will propagate up, forcing
1823                  * us to become modified and copy-on-write too.
1824                  *
1825                  * Clear SUBMODIFIED now, races during the flush will re-set
1826                  * it.
1827                  */
1828                 hammer2_chain_modify(hmp, chain);
1829                 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_SUBMODIFIED);
1830
1831                 /*
1832                  * The blockref in the parent's array must be repointed at
1833                  * the new block allocated by the child after its flush.
1834                  *
1835                  * Calculate the base of the array.
1836                  */
1837                 switch(chain->bref.type) {
1838                 case HAMMER2_BREF_TYPE_INODE:
1839                         KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
1840                         base = &chain->data->ipdata.u.blockset.blockref[0];
1841                         count = HAMMER2_SET_COUNT;
1842                         break;
1843                 case HAMMER2_BREF_TYPE_INDIRECT:
1844                         base = &chain->data->npdata.blockref[0];
1845                         count = chain->bytes / sizeof(hammer2_blockref_t);
1846                         break;
1847                 case HAMMER2_BREF_TYPE_VOLUME:
1848                         KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
1849                         base = &hmp->voldata.sroot_blockset.blockref[0];
1850                         count = HAMMER2_SET_COUNT;
1851                         break;
1852                 default:
1853                         base = NULL;
1854                         panic("hammer2_chain_get: unrecognized blockref type: %d",
1855                               chain->bref.type);
1856                 }
1857
1858                 /*
1859                  * Flush the children and update the blockrefs in the parent.
1860                  * Be careful of ripouts during the loop.
1861                  */
1862                 next = SPLAY_MIN(hammer2_chain_splay, &chain->shead);
1863                 while ((scan = next) != NULL) {
1864                         next = SPLAY_NEXT(hammer2_chain_splay, &chain->shead,
1865                                           scan);
1866                         if (scan->flags & (HAMMER2_CHAIN_SUBMODIFIED |
1867                                            HAMMER2_CHAIN_MODIFIED1 |
1868                                            HAMMER2_CHAIN_MOVED)) {
1869                                 hammer2_chain_ref(hmp, scan);
1870                                 hammer2_chain_lock(hmp, scan);
1871                                 hammer2_chain_flush_pass1(hmp, scan);
1872                                 if (scan->flags & (HAMMER2_CHAIN_SUBMODIFIED |
1873                                                    HAMMER2_CHAIN_MODIFIED1)) {
1874                                         submodified = 1;
1875                                 } else {
1876                                         KKASSERT(scan->index < count);
1877                                         base[scan->index] = scan->bref;
1878                                         if (scan->flags & HAMMER2_CHAIN_MOVED) {
1879                                                 atomic_clear_int(&scan->flags,
1880                                                          HAMMER2_CHAIN_MOVED);
1881                                                 hammer2_chain_drop(hmp, scan);
1882                                         }
1883                                 }
1884                                 hammer2_chain_put(hmp, scan);
1885                         }
1886                 }
1887                 if (submodified) {
1888                         atomic_set_int(&chain->flags,
1889                                        HAMMER2_CHAIN_SUBMODIFIED);
1890                 }
1891         }
1892
1893         /*
1894          * Flush this chain entry only if it is marked modified.
1895          */
1896         if ((chain->flags & HAMMER2_CHAIN_MODIFIED1) == 0)
1897                 return;
1898
1899         /*
1900          * Clear MODIFIED1 and set HAMMER2_CHAIN_MOVED.  The MODIFIED{1,2}
1901          * bits own a single parent ref and the MOVED bit owns its own
1902          * parent ref.
1903          */
1904         atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
1905         if (chain->flags & HAMMER2_CHAIN_MOVED) {
1906                 if ((chain->flags & HAMMER2_CHAIN_MODIFIED2) == 0)
1907                         hammer2_chain_drop(hmp, chain);
1908         } else {
1909                 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED);
1910                 if (chain->flags & HAMMER2_CHAIN_MODIFIED2)
1911                         hammer2_chain_ref(hmp, chain);
1912         }
1913
1914         /*
1915          * If this is part of a recursive flush we can go ahead and write
1916          * out the buffer cache buffer and pass a new bref back up the chain.
1917          *
1918          * This will never be a volume header.
1919          */
1920         if (chain != &hmp->vchain) {
1921                 hammer2_blockref_t *bref;
1922                 hammer2_off_t off_hi;
1923                 struct buf *bp;
1924                 size_t off_lo;
1925                 size_t bytes;
1926                 int error;
1927
1928                 KKASSERT(chain->data != NULL);
1929                 bref = &chain->bref;
1930
1931                 off_hi = bref->data_off & HAMMER2_OFF_MASK_HI;
1932                 off_lo = (size_t)bref->data_off & HAMMER2_OFF_MASK_LO;
1933                 bytes = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
1934                 KKASSERT(off_hi != 0);  /* not the root volume header */
1935
1936                 if (chain->bp == NULL) {
1937                         /*
1938                          * The data is embedded, we have to acquire the
1939                          * buffer cache buffer and copy the data into it.
1940                          */
1941                         bp = NULL;
1942                         error = bread(hmp->devvp, off_hi,
1943                                       HAMMER2_PBUFSIZE, &bp);
1944                         KKASSERT(error == 0); /* XXX */
1945
1946                         /*
1947                          * Copy the data to the buffer, mark the buffer
1948                          * dirty, and convert the chain to unmodified.
1949                          */
1950                         bcopy(chain->data, (char *)bp->b_data + off_lo, bytes);
1951                         bdwrite(bp);
1952                         bp = NULL;
1953
1954                         chain->bref.check.iscsi32.value =
1955                                         hammer2_icrc32(chain->data, bytes);
1956                 }
1957         }
1958         {
1959                 hammer2_blockref_t *bref;
1960
1961                 bref = &chain->bref;
1962
1963                 switch(bref->type) {
1964                 case HAMMER2_BREF_TYPE_VOLUME:
1965                         KKASSERT(chain->data != NULL);
1966                         KKASSERT(chain->bp == NULL);
1967
1968                         hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]=
1969                                 hammer2_icrc32(
1970                                         (char *)&hmp->voldata +
1971                                          HAMMER2_VOLUME_ICRC1_OFF,
1972                                         HAMMER2_VOLUME_ICRC1_SIZE);
1973                         hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]=
1974                                 hammer2_icrc32(
1975                                         (char *)&hmp->voldata +
1976                                          HAMMER2_VOLUME_ICRC0_OFF,
1977                                         HAMMER2_VOLUME_ICRC0_SIZE);
1978                         hmp->voldata.icrc_volheader =
1979                                 hammer2_icrc32(
1980                                         (char *)&hmp->voldata +
1981                                          HAMMER2_VOLUME_ICRCVH_OFF,
1982                                         HAMMER2_VOLUME_ICRCVH_SIZE);
1983                         break;
1984                 }
1985         }
1986 }
1987
1988 #if 0
1989 /*
1990  * PASS2 - not yet implemented (should be called only with the root chain?)
1991  */
1992 static void
1993 hammer2_chain_flush_pass2(hammer2_mount_t *hmp, hammer2_chain_t *chain)
1994 {
1995 }
1996 #endif
1997
1998 void
1999 hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain)
2000 {
2001         hammer2_chain_flush_pass1(hmp, chain);
2002 }