hammer2 - Start adding ioctl infrastructure, start writing hammer2 utility
[dragonfly.git] / sys / vfs / hammer2 / hammer2_chain.c
1 /*
2  * Copyright (c) 2011-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 /*
36  * This subsystem handles direct and indirect block searches, recursions,
37  * creation, and deletion.  Chains of blockrefs are tracked and modifications
38  * are flag for propagation... eventually all the way back to the volume
39  * header.
40  */
41
42 #include <sys/cdefs.h>
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/types.h>
46 #include <sys/lock.h>
47 #include <sys/uuid.h>
48
49 #include "hammer2.h"
50
51 static int hammer2_indirect_optimize;   /* XXX SYSCTL */
52
53 static hammer2_chain_t *hammer2_chain_create_indirect(
54                         hammer2_mount_t *hmp, hammer2_chain_t *parent,
55                         hammer2_key_t key, int keybits);
56
57 /*
58  * Splay tree
59  */
60 SPLAY_GENERATE(hammer2_chain_splay, hammer2_chain, snode, hammer2_chain_cmp);
61
62 int
63 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2)
64 {
65         return(chain2->index - chain1->index);
66 }
67
68 /*
69  * Recursively mark the parent chain elements so flushes can find
70  * modified elements.
71  *
72  * NOTE: The flush code will modify a SUBMODIFIED-flagged chain
73  *       during the flush recursion after clearing the parent's
74  *       SUBMODIFIED bit.  We don't want to re-set the parent's
75  *       SUBMODIFIED bit in this case!
76  *
77  * XXX rename of parent can create a SMP race
78  */
79 static void
80 hammer2_chain_parent_setsubmod(hammer2_mount_t *hmp, hammer2_chain_t *chain)
81 {
82         hammer2_chain_t *parent;
83
84         if ((chain->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
85                 parent = chain->parent;
86                 while (parent &&
87                        (parent->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
88                         atomic_set_int(&parent->flags,
89                                        HAMMER2_CHAIN_SUBMODIFIED);
90                         parent = parent->parent;
91                 }
92         }
93 }
94
95 /*
96  * Allocate a new disconnected chain element representing the specified
97  * bref.  The chain element is locked exclusively and refs is set to 1.
98  *
99  * This essentially allocates a system memory structure representing one
100  * of the media structure types, including inodes.
101  */
102 hammer2_chain_t *
103 hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_blockref_t *bref)
104 {
105         hammer2_chain_t *chain;
106         hammer2_inode_t *ip;
107         hammer2_indblock_t *np;
108         hammer2_data_t *dp;
109         u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
110
111         /*
112          * Construct the appropriate system structure.
113          */
114         switch(bref->type) {
115         case HAMMER2_BREF_TYPE_INODE:
116                 ip = kmalloc(sizeof(*ip), hmp->minode, M_WAITOK | M_ZERO);
117                 chain = &ip->chain;
118                 chain->u.ip = ip;
119                 lockinit(&chain->lk, "inode", 0, LK_CANRECURSE);
120                 ip->hmp = hmp;
121                 break;
122         case HAMMER2_BREF_TYPE_INDIRECT:
123                 np = kmalloc(sizeof(*np), hmp->mchain, M_WAITOK | M_ZERO);
124                 chain = &np->chain;
125                 chain->u.np = np;
126                 lockinit(&chain->lk, "iblk", 0, LK_CANRECURSE);
127                 break;
128         case HAMMER2_BREF_TYPE_DATA:
129                 dp = kmalloc(sizeof(*dp), hmp->mchain, M_WAITOK | M_ZERO);
130                 chain = &dp->chain;
131                 chain->u.dp = dp;
132                 lockinit(&chain->lk, "dblk", 0, LK_CANRECURSE);
133                 break;
134         case HAMMER2_BREF_TYPE_VOLUME:
135                 chain = NULL;
136                 panic("hammer2_chain_alloc volume type illegal for op");
137         default:
138                 chain = NULL;
139                 panic("hammer2_chain_alloc: unrecognized blockref type: %d",
140                       bref->type);
141         }
142         chain->bref = *bref;
143         chain->index = -1;      /* not yet assigned */
144         chain->refs = 1;
145         chain->bytes = bytes;
146         lockmgr(&chain->lk, LK_EXCLUSIVE);
147
148         return (chain);
149 }
150
151 /*
152  * Free a disconnected chain element
153  */
154 void
155 hammer2_chain_free(hammer2_mount_t *hmp, hammer2_chain_t *chain)
156 {
157         void *mem;
158
159         if (chain->bref.type == HAMMER2_BREF_TYPE_INODE ||
160             chain->bref.type == HAMMER2_BREF_TYPE_VOLUME) {
161                 chain->data = NULL;
162         }
163
164         KKASSERT(chain->bp == NULL);
165         KKASSERT(chain->data == NULL);
166         KKASSERT(chain->bref.type != HAMMER2_BREF_TYPE_INODE ||
167                  chain->u.ip->vp == NULL);
168
169         if ((mem = chain->u.mem) != NULL) {
170                 chain->u.mem = NULL;
171                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
172                         kfree(mem, hmp->minode);
173                 else
174                         kfree(mem, hmp->mchain);
175         }
176 }
177
178 /*
179  * Add a reference to a chain element (for shared access).  The chain
180  * element must already have at least 1 ref controlled by the caller.
181  */
182 void
183 hammer2_chain_ref(hammer2_mount_t *hmp, hammer2_chain_t *chain)
184 {
185         KKASSERT(chain->refs > 0);
186         atomic_add_int(&chain->refs, 1);
187 }
188
189 /*
190  * Drop the callers reference to the chain element.  If the ref count
191  * reaches zero the chain element and its related structure (typically an
192  * inode or indirect block) will be freed and the parent will be
193  * recursively dropped.
194  *
195  * Modified elements hold an additional reference so it should not be
196  * possible for the count on a modified element to drop to 0.
197  *
198  * The chain element must NOT be locked by the caller.
199  *
200  * The parent might or might not be locked by the caller but if so it
201  * will also be referenced so we shouldn't recurse upward.
202  */
203 void
204 hammer2_chain_drop(hammer2_mount_t *hmp, hammer2_chain_t *chain)
205 {
206         hammer2_chain_t *parent;
207         u_int refs;
208
209         while (chain) {
210                 refs = chain->refs;
211                 cpu_ccfence();
212                 KKASSERT(refs > 0);
213                 if (refs == 1) {
214                         KKASSERT(chain != &hmp->vchain);
215                         parent = chain->parent;
216                         if (parent)
217                                 lockmgr(&parent->lk, LK_EXCLUSIVE);
218                         if (atomic_cmpset_int(&chain->refs, 1, 0)) {
219                                 /*
220                                  * Succeeded, recurse and drop parent
221                                  */
222                                 if (!(chain->flags & HAMMER2_CHAIN_DELETED)) {
223                                         SPLAY_REMOVE(hammer2_chain_splay,
224                                                      &parent->shead, chain);
225                                         atomic_set_int(&chain->flags,
226                                                        HAMMER2_CHAIN_DELETED);
227                                         /* parent refs dropped via recursion */
228                                 }
229                                 chain->parent = NULL;
230                                 if (parent)
231                                         lockmgr(&parent->lk, LK_RELEASE);
232                                 hammer2_chain_free(hmp, chain);
233                                 chain = parent;
234                                 /* recurse on parent */
235                         } else {
236                                 if (parent)
237                                         lockmgr(&parent->lk, LK_RELEASE);
238                                 /* retry the same chain */
239                         }
240                 } else {
241                         if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) {
242                                 /*
243                                  * Succeeded, count did not reach zero so
244                                  * cut out of the loop.
245                                  */
246                                 break;
247                         }
248                         /* retry the same chain */
249                 }
250         }
251 }
252
253 /*
254  * Ref and lock a chain element, acquiring its data with I/O if necessary,
255  * and specify how you would like the data to be resolved.
256  *
257  * Returns 0 on success or an error code if the data could not be acquired.
258  * The chain element is locked either way.
259  *
260  * The lock is allowed to recurse, multiple locking ops will aggregate
261  * the requested resolve types.  Once data is assigned it will not be
262  * removed until the last unlock.
263  *
264  * HAMMER2_RESOLVE_NEVER - Do not resolve the data element.
265  *                         (typically used to avoid device/logical buffer
266  *                          aliasing for data)
267  *
268  * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in
269  *                         the INITIAL-create state (indirect blocks only).
270  *
271  *                         Do not resolve data elements for DATA chains.
272  *                         (typically used to avoid device/logical buffer
273  *                          aliasing for data)
274  *
275  * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element.
276  *
277  *
278  * NOTE: Embedded elements (volume header, inodes) are always resolved
279  *       regardless.
280  *
281  * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded
282  *       element will instantiate and zero its buffer, and flush it on
283  *       release.
284  *
285  * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE
286  *       so as not to instantiate a device buffer, which could alias against
287  *       a logical file buffer.  However, if ALWAYS is specified the
288  *       device buffer will be instantiated anyway.
289  */
290 int
291 hammer2_chain_lock(hammer2_mount_t *hmp, hammer2_chain_t *chain, int how)
292 {
293         hammer2_blockref_t *bref;
294         hammer2_off_t pbase;
295         hammer2_off_t peof;
296         size_t boff;
297         size_t bbytes;
298         int error;
299         char *bdata;
300
301         /*
302          * Lock the element.  Under certain conditions this might end up
303          * being a recursive lock.
304          */
305         KKASSERT(chain->refs > 0);
306         atomic_add_int(&chain->refs, 1);
307         lockmgr(&chain->lk, LK_EXCLUSIVE);
308
309         /*
310          * If we already have a valid data pointer no further action is
311          * necessary.
312          */
313         if (chain->data)
314                 return (0);
315
316         /*
317          * Do we have to resolve the data?
318          */
319         switch(how) {
320         case HAMMER2_RESOLVE_NEVER:
321                 return(0);
322         case HAMMER2_RESOLVE_MAYBE:
323                 if (chain->flags & HAMMER2_CHAIN_INITIAL)
324                         return(0);
325                 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA)
326                         return(0);
327                 /* fall through */
328         case HAMMER2_RESOLVE_ALWAYS:
329                 break;
330         }
331
332         /*
333          * We must resolve to a device buffer, either by issuing I/O or
334          * by creating a zero-fill element.  We do not mark the buffer
335          * dirty when creating a zero-fill element (the hammer2_chain_modify()
336          * API must still be used to do that).
337          *
338          * The device buffer is variable-sized in powers of 2 down
339          * to HAMMER2_MINALLOCSIZE (typically 1K).  A 64K physical storage
340          * chunk always contains buffers of the same size. (XXX)
341          *
342          * The minimum physical IO size may be larger than the variable
343          * block size.
344          */
345         bref = &chain->bref;
346
347         if ((bbytes = chain->bytes) < HAMMER2_MINIOSIZE)
348                 bbytes = HAMMER2_MINIOSIZE;
349         pbase = bref->data_off & ~(hammer2_off_t)(bbytes - 1);
350         peof = (pbase + HAMMER2_PBUFSIZE64) & ~HAMMER2_PBUFMASK64;
351         boff = bref->data_off & HAMMER2_OFF_MASK & (bbytes - 1);
352         KKASSERT(pbase != 0);
353
354         /*
355          * The getblk() optimization can only be used on newly created
356          * elements if the physical block size matches the request.
357          */
358         if ((chain->flags & HAMMER2_CHAIN_INITIAL) &&
359             chain->bytes == bbytes) {
360                 chain->bp = getblk(hmp->devvp, pbase, bbytes, 0, 0);
361                 error = 0;
362         } else if (hammer2_cluster_enable) {
363                 error = cluster_read(hmp->devvp, peof, pbase, bbytes,
364                                      HAMMER2_PBUFSIZE, HAMMER2_PBUFSIZE,
365                                      &chain->bp);
366         } else {
367                 error = bread(hmp->devvp, pbase, bbytes, &chain->bp);
368         }
369
370         if (error) {
371                 kprintf("hammer2_chain_get: I/O error %016jx: %d\n",
372                         (intmax_t)pbase, error);
373                 bqrelse(chain->bp);
374                 chain->bp = NULL;
375                 return (error);
376         }
377
378         /*
379          * Zero the data area if the chain is in the INITIAL-create state
380          */
381         bdata = (char *)chain->bp->b_data + boff;
382         if (chain->flags & HAMMER2_CHAIN_INITIAL)
383                 bzero(bdata, chain->bytes);
384
385         /*
386          * Setup the data pointer, either pointing it to an embedded data
387          * structure and copying the data from the buffer, or pointing it
388          * into the buffer.
389          *
390          * The buffer is not retained when copying to an embedded data
391          * structure in order to avoid potential deadlocks or recursions
392          * on the same physical buffer.
393          */
394         switch (bref->type) {
395         case HAMMER2_BREF_TYPE_VOLUME:
396                 /*
397                  * Copy data from bp to embedded buffer
398                  */
399                 panic("hammer2_chain_lock: called on unresolved volume header");
400 #if 0
401                 /* NOT YET */
402                 KKASSERT(pbase == 0);
403                 KKASSERT(chain->bytes == HAMMER2_PBUFSIZE);
404                 bcopy(bdata, &hmp->voldata, chain->bytes);
405                 chain->data = (void *)&hmp->voldata;
406                 bqrelse(chain->bp);
407                 chain->bp = NULL;
408 #endif
409                 break;
410         case HAMMER2_BREF_TYPE_INODE:
411                 /*
412                  * Copy data from bp to embedded buffer, do not retain the
413                  * device buffer.
414                  */
415                 bcopy(bdata, &chain->u.ip->ip_data, chain->bytes);
416                 chain->data = (void *)&chain->u.ip->ip_data;
417                 bqrelse(chain->bp);
418                 chain->bp = NULL;
419                 break;
420         case HAMMER2_BREF_TYPE_INDIRECT:
421         case HAMMER2_BREF_TYPE_DATA:
422         default:
423                 /*
424                  * Point data at the device buffer and leave bp intact.
425                  */
426                 chain->data = (void *)bdata;
427                 break;
428         }
429         return (0);
430 }
431
432 /*
433  * Unlock and deref a chain element.
434  *
435  * On the last lock release any non-embedded data (chain->bp) will be
436  * retired.
437  */
438 void
439 hammer2_chain_unlock(hammer2_mount_t *hmp, hammer2_chain_t *chain)
440 {
441         long *counterp;
442
443         /*
444          * Undo a recursive lock
445          */
446         if (lockcountnb(&chain->lk) > 1) {
447                 KKASSERT(chain->refs > 1);
448                 atomic_add_int(&chain->refs, -1);
449                 lockmgr(&chain->lk, LK_RELEASE);
450                 return;
451         }
452
453         /*
454          * Shortcut the case if the data is embedded or not resolved.
455          * Do NOT null-out pointers to embedded data (e.g. inode).
456          */
457         if (chain->bp == NULL) {
458                 lockmgr(&chain->lk, LK_RELEASE);
459                 hammer2_chain_drop(hmp, chain);
460                 return;
461         }
462
463         /*
464          * Statistics
465          */
466         if ((chain->flags & HAMMER2_CHAIN_DIRTYBP) == 0) {
467                 ;
468         } else if (chain->flags & HAMMER2_CHAIN_IOFLUSH) {
469                 switch(chain->bref.type) {
470                 case HAMMER2_BREF_TYPE_DATA:
471                         counterp = &hammer2_ioa_file_write;
472                         break;
473                 case HAMMER2_BREF_TYPE_INODE:
474                         counterp = &hammer2_ioa_meta_write;
475                         break;
476                 case HAMMER2_BREF_TYPE_INDIRECT:
477                         counterp = &hammer2_ioa_indr_write;
478                         break;
479                 default:
480                         counterp = &hammer2_ioa_volu_write;
481                         break;
482                 }
483                 ++*counterp;
484         } else {
485                 switch(chain->bref.type) {
486                 case HAMMER2_BREF_TYPE_DATA:
487                         counterp = &hammer2_iod_file_write;
488                         break;
489                 case HAMMER2_BREF_TYPE_INODE:
490                         counterp = &hammer2_iod_meta_write;
491                         break;
492                 case HAMMER2_BREF_TYPE_INDIRECT:
493                         counterp = &hammer2_iod_indr_write;
494                         break;
495                 default:
496                         counterp = &hammer2_iod_volu_write;
497                         break;
498                 }
499                 ++*counterp;
500         }
501
502         /*
503          * Clean out the bp.
504          *
505          * If a device buffer was used for data be sure to destroy the
506          * buffer when we are done to avoid aliases (XXX what about the
507          * underlying VM pages?).
508          */
509         if (chain->bref.type == HAMMER2_BREF_TYPE_DATA)
510                 chain->bp->b_flags |= B_RELBUF;
511
512         chain->data = NULL;
513         if (chain->flags & HAMMER2_CHAIN_DIRTYBP) {
514                 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DIRTYBP);
515                 if (chain->flags & HAMMER2_CHAIN_IOFLUSH) {
516                         atomic_clear_int(&chain->flags,
517                                          HAMMER2_CHAIN_IOFLUSH);
518                         chain->bp->b_flags |= B_RELBUF;
519                         cluster_awrite(chain->bp);
520                 } else {
521                         chain->bp->b_flags |= B_CLUSTEROK;
522                         bdwrite(chain->bp);
523                 }
524         } else {
525                 if (chain->flags & HAMMER2_CHAIN_IOFLUSH) {
526                         atomic_clear_int(&chain->flags,
527                                          HAMMER2_CHAIN_IOFLUSH);
528                         chain->bp->b_flags |= B_RELBUF;
529                         brelse(chain->bp);
530                 } else {
531                         /* bp might still be dirty */
532                         bqrelse(chain->bp);
533                 }
534         }
535         chain->bp = NULL;
536         lockmgr(&chain->lk, LK_RELEASE);
537         hammer2_chain_drop(hmp, chain);
538 }
539
540 /*
541  * Resize the chain's physical storage allocation.  Chains can be resized
542  * smaller without reallocating the storage.  Resizing larger will reallocate
543  * the storage.
544  *
545  * Must be passed a locked chain.  If you want the resize to copy the data
546  * you should lock the chain with RESOLVE_MAYBE or RESOLVE_ALWAYS, otherwise
547  * the resize operation will not copy the data.
548  *
549  * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order
550  * to avoid instantiating a device buffer that conflicts with the vnode
551  * data buffer.
552  *
553  * XXX flags currently ignored, uses chain->bp to detect data/no-data.
554  */
555 void
556 hammer2_chain_resize(hammer2_mount_t *hmp, hammer2_chain_t *chain,
557                      int nradix, int flags)
558 {
559         struct buf *nbp;
560         hammer2_off_t pbase;
561         size_t obytes;
562         size_t nbytes;
563         size_t bbytes;
564         int boff;
565         char *bdata;
566         int error;
567
568         /*
569          * Only data and indirect blocks can be resized for now
570          */
571         KKASSERT(chain != &hmp->vchain);
572         KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA ||
573                  chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT);
574
575         /*
576          * Nothing to do if the element is already the proper size
577          */
578         obytes = chain->bytes;
579         nbytes = 1U << nradix;
580         if (obytes == nbytes)
581                 return;
582
583         /*
584          * Set MODIFIED and add a chain ref to prevent destruction.  Both
585          * modified flags share the same ref.
586          */
587         if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) {
588                 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
589                 hammer2_chain_ref(hmp, chain);
590         }
591
592         /*
593          * Relocate the block, even if making it smaller (because different
594          * block sizes may be in different regions).
595          */
596         chain->bref.data_off = hammer2_freemap_alloc(hmp, chain->bref.type,
597                                                      nbytes);
598         chain->bytes = nbytes;
599
600         /*
601          * The device buffer may be larger than the allocation size.
602          */
603         if ((bbytes = chain->bytes) < HAMMER2_MINIOSIZE)
604                 bbytes = HAMMER2_MINIOSIZE;
605         pbase = chain->bref.data_off & ~(hammer2_off_t)(bbytes - 1);
606         boff = chain->bref.data_off & HAMMER2_OFF_MASK & (bbytes - 1);
607
608         /*
609          * Only copy the data if resolved, otherwise the caller is
610          * responsible.
611          */
612         if (chain->bp) {
613                 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT ||
614                          chain->bref.type == HAMMER2_BREF_TYPE_DATA);
615                 KKASSERT(chain != &hmp->vchain);        /* safety */
616
617                 /*
618                  * The getblk() optimization can only be used if the
619                  * physical block size matches the request.
620                  */
621                 if (nbytes == bbytes) {
622                         nbp = getblk(hmp->devvp, pbase, bbytes, 0, 0);
623                         error = 0;
624                 } else {
625                         error = bread(hmp->devvp, pbase, bbytes, &nbp);
626                         KKASSERT(error == 0);
627                 }
628                 bdata = (char *)nbp->b_data + boff;
629
630                 if (nbytes < obytes) {
631                         bcopy(chain->data, bdata, nbytes);
632                 } else {
633                         bcopy(chain->data, bdata, obytes);
634                         bzero(bdata + obytes, nbytes - obytes);
635                 }
636
637                 /*
638                  * NOTE: The INITIAL state of the chain is left intact.
639                  *
640                  * NOTE: Because of the reallocation we have to set DIRTYBP
641                  *       if INITIAL is not set.
642                  */
643                 chain->bp->b_flags |= B_RELBUF;
644                 brelse(chain->bp);
645                 chain->bp = nbp;
646                 chain->data = (void *)bdata;
647                 if ((chain->flags & HAMMER2_CHAIN_INITIAL) == 0)
648                         atomic_set_int(&chain->flags, HAMMER2_CHAIN_DIRTYBP);
649         }
650         hammer2_chain_parent_setsubmod(hmp, chain);
651 }
652
653 /*
654  * Convert a locked chain that was retrieved read-only to read-write.
655  *
656  * If not already marked modified a new physical block will be allocated
657  * and assigned to the bref.
658  *
659  * Non-data blocks - The chain should be locked to at least the RESOLVE_MAYBE
660  *                   level or the COW operation will not work.
661  *
662  * Data blocks     - The chain is usually locked RESOLVE_NEVER so as not to
663  *                   run the data through the device buffers.
664  */
665 void
666 hammer2_chain_modify(hammer2_mount_t *hmp, hammer2_chain_t *chain, int flags)
667 {
668         struct buf *nbp;
669         int error;
670         hammer2_off_t pbase;
671         size_t bbytes;
672         size_t boff;
673         void *bdata;
674
675         /*
676          * If the chain is already marked MODIFIED we can just return.
677          */
678         if (chain->flags & HAMMER2_CHAIN_MODIFIED) {
679                 if ((flags & HAMMER2_MODIFY_OPTDATA) == 0 &&
680                     chain->bp == NULL) {
681                         goto skip1;
682                 }
683                 return;
684         }
685
686         /*
687          * Set MODIFIED and add a chain ref to prevent destruction.  Both
688          * modified flags share the same ref.
689          */
690         atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
691         hammer2_chain_ref(hmp, chain);
692
693         /*
694          * We must allocate the copy-on-write block.
695          *
696          * If the data is embedded no other action is required.
697          *
698          * If the data is not embedded we acquire and clear the
699          * new block.  If chain->data is not NULL we then do the
700          * copy-on-write.  chain->data will then be repointed to the new
701          * buffer and the old buffer will be released.
702          *
703          * For newly created elements with no prior allocation we go
704          * through the copy-on-write steps except without the copying part.
705          */
706         if (chain != &hmp->vchain) {
707                 if ((hammer2_debug & 0x0001) &&
708                     (chain->bref.data_off & HAMMER2_OFF_MASK)) {
709                         kprintf("Replace %d\n", chain->bytes);
710                 }
711                 chain->bref.data_off =
712                         hammer2_freemap_alloc(hmp, chain->bref.type,
713                                               chain->bytes);
714                 /* XXX failed allocation */
715         }
716
717         /*
718          * If data instantiation is optional and the chain has no current
719          * data association (typical for DATA and newly-created INDIRECT
720          * elements), don't instantiate the buffer now.
721          */
722         if ((flags & HAMMER2_MODIFY_OPTDATA) && chain->bp == NULL)
723                 goto skip2;
724
725 skip1:
726         /*
727          * Setting the DIRTYBP flag will cause the buffer to be dirtied or
728          * written-out on unlock.  This bit is independent of the MODIFIED
729          * bit because the chain may still need meta-data adjustments done
730          * by virtue of MODIFIED for its parent, and the buffer can be
731          * flushed out (possibly multiple times) by the OS before that.
732          *
733          * Clearing the INITIAL flag (for indirect blocks) indicates that
734          * a zero-fill buffer has been instantiated.
735          */
736         atomic_set_int(&chain->flags, HAMMER2_CHAIN_DIRTYBP);
737         atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL);
738
739         /*
740          * We currently should never instantiate a device buffer for a
741          * data chain.
742          */
743         KKASSERT(chain->bref.type != HAMMER2_BREF_TYPE_DATA);
744
745         /*
746          * Execute COW operation
747          */
748         switch(chain->bref.type) {
749         case HAMMER2_BREF_TYPE_VOLUME:
750         case HAMMER2_BREF_TYPE_INODE:
751                 /*
752                  * The data is embedded, no copy-on-write operation is
753                  * needed.
754                  */
755                 KKASSERT(chain->bp == NULL);
756                 break;
757         case HAMMER2_BREF_TYPE_DATA:
758         case HAMMER2_BREF_TYPE_INDIRECT:
759                 /*
760                  * Perform the copy-on-write operation
761                  */
762                 KKASSERT(chain != &hmp->vchain);        /* safety */
763                 /*
764                  * The device buffer may be larger than the allocation size.
765                  */
766                 if ((bbytes = chain->bytes) < HAMMER2_MINIOSIZE)
767                         bbytes = HAMMER2_MINIOSIZE;
768                 pbase = chain->bref.data_off & ~(hammer2_off_t)(bbytes - 1);
769                 boff = chain->bref.data_off & HAMMER2_OFF_MASK & (bbytes - 1);
770
771                 /*
772                  * The getblk() optimization can only be used if the
773                  * physical block size matches the request.
774                  */
775                 if (chain->bytes == bbytes) {
776                         nbp = getblk(hmp->devvp, pbase, bbytes, 0, 0);
777                         error = 0;
778                 } else {
779                         error = bread(hmp->devvp, pbase, bbytes, &nbp);
780                         KKASSERT(error == 0);
781                 }
782                 bdata = (char *)nbp->b_data + boff;
783
784                 /*
785                  * Copy or zero-fill on write depending on whether
786                  * chain->data exists or not.
787                  */
788                 if (chain->data) {
789                         bcopy(chain->data, bdata, chain->bytes);
790                         KKASSERT(chain->bp != NULL);
791                 } else {
792                         bzero(bdata, chain->bytes);
793                 }
794                 if (chain->bp) {
795                         chain->bp->b_flags |= B_RELBUF;
796                         brelse(chain->bp);
797                 }
798                 chain->bp = nbp;
799                 chain->data = bdata;
800                 break;
801         default:
802                 panic("hammer2_chain_modify: illegal non-embedded type %d",
803                       chain->bref.type);
804                 break;
805
806         }
807 skip2:
808         if ((flags & HAMMER2_MODIFY_NOSUB) == 0)
809                 hammer2_chain_parent_setsubmod(hmp, chain);
810 }
811
812 /*
813  * Mark the volume as having been modified.  This short-cut version
814  * does not have to lock the volume's chain, which allows the ioctl
815  * code to make adjustments to connections without deadlocking.
816  */
817 void
818 hammer2_modify_volume(hammer2_mount_t *hmp)
819 {
820         hammer2_voldata_lock(hmp);
821         atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED_AUX);
822         hammer2_voldata_unlock(hmp);
823 }
824
825 /*
826  * Locate an in-memory chain.  The parent must be locked.  The in-memory
827  * chain is returned or NULL if no in-memory chain is present.
828  *
829  * NOTE: A chain on-media might exist for this index when NULL is returned.
830  */
831 hammer2_chain_t *
832 hammer2_chain_find(hammer2_mount_t *hmp, hammer2_chain_t *parent, int index)
833 {
834         hammer2_chain_t dummy;
835         hammer2_chain_t *chain;
836
837         dummy.index = index;
838         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead, &dummy);
839         return (chain);
840 }
841
842 /*
843  * Return a locked chain structure with all associated data acquired.
844  *
845  * Caller must lock the parent on call, the returned child will be locked.
846  */
847 hammer2_chain_t *
848 hammer2_chain_get(hammer2_mount_t *hmp, hammer2_chain_t *parent,
849                   int index, int flags)
850 {
851         hammer2_blockref_t *bref;
852         hammer2_chain_t *chain;
853         hammer2_chain_t dummy;
854         int how;
855
856         /*
857          * Figure out how to lock.  MAYBE can be used to optimized
858          * the initial-create state for indirect blocks.
859          */
860         if (flags & (HAMMER2_LOOKUP_NODATA | HAMMER2_LOOKUP_NOLOCK))
861                 how = HAMMER2_RESOLVE_NEVER;
862         else
863                 how = HAMMER2_RESOLVE_MAYBE;
864
865         /*
866          * First see if we have a (possibly modified) chain element cached
867          * for this (parent, index).  Acquire the data if necessary.
868          *
869          * If chain->data is non-NULL the chain should already be marked
870          * modified.
871          */
872         dummy.index = index;
873         chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead, &dummy);
874         if (chain) {
875                 if (flags & HAMMER2_LOOKUP_NOLOCK)
876                         hammer2_chain_ref(hmp, chain);
877                 else
878                         hammer2_chain_lock(hmp, chain, how);
879                 return(chain);
880         }
881
882         /*
883          * the get function must always succeed, panic if there's no
884          * data to index.
885          */
886         if (parent->flags & HAMMER2_CHAIN_INITIAL) {
887                 panic("hammer2_chain_get: Missing bref(1)");
888                 /* NOT REACHED */
889         }
890
891         /*
892          * Otherwise lookup the bref and issue I/O (switch on the parent)
893          */
894         switch(parent->bref.type) {
895         case HAMMER2_BREF_TYPE_INODE:
896                 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
897                 bref = &parent->data->ipdata.u.blockset.blockref[index];
898                 break;
899         case HAMMER2_BREF_TYPE_INDIRECT:
900                 KKASSERT(parent->data != NULL);
901                 KKASSERT(index >= 0 &&
902                          index < parent->bytes / sizeof(hammer2_blockref_t));
903                 bref = &parent->data->npdata.blockref[index];
904                 break;
905         case HAMMER2_BREF_TYPE_VOLUME:
906                 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
907                 bref = &hmp->voldata.sroot_blockset.blockref[index];
908                 break;
909         default:
910                 bref = NULL;
911                 panic("hammer2_chain_get: unrecognized blockref type: %d",
912                       parent->bref.type);
913         }
914         if (bref->type == 0) {
915                 panic("hammer2_chain_get: Missing bref(2)");
916                 /* NOT REACHED */
917         }
918
919         /*
920          * Allocate a chain structure representing the existing media
921          * entry.
922          *
923          * The locking operation we do later will issue I/O to read it.
924          */
925         chain = hammer2_chain_alloc(hmp, bref);
926
927         /*
928          * Link the chain into its parent.  Caller is expected to hold an
929          * exclusive lock on the parent.
930          */
931         chain->parent = parent;
932         chain->index = index;
933         if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain))
934                 panic("hammer2_chain_link: collision");
935         KKASSERT(parent->refs > 0);
936         atomic_add_int(&parent->refs, 1);       /* for splay entry */
937
938         /*
939          * Additional linkage for inodes.  Reuse the parent pointer to
940          * find the parent directory.
941          */
942         if (bref->type == HAMMER2_BREF_TYPE_INODE) {
943                 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT)
944                         parent = parent->parent;
945                 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE)
946                         chain->u.ip->pip = parent->u.ip;
947         }
948
949         /*
950          * Our new chain structure has already been referenced and locked
951          * but the lock code handles the I/O so call it to resolve the data.
952          * Then release one of our two exclusive locks.
953          *
954          * If NOLOCK is set the release will release the one-and-only lock.
955          */
956         if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0) {
957                 hammer2_chain_lock(hmp, chain, how);    /* recusive lock */
958                 hammer2_chain_drop(hmp, chain);         /* excess ref */
959         }
960         lockmgr(&chain->lk, LK_RELEASE);                /* from alloc */
961
962         return (chain);
963 }
964
965 /*
966  * Locate any key between key_beg and key_end inclusive.  (*parentp)
967  * typically points to an inode but can also point to a related indirect
968  * block and this function will recurse upwards and find the inode again.
969  *
970  * WARNING!  THIS DOES NOT RETURN KEYS IN LOGICAL KEY ORDER!  ANY KEY
971  *           WITHIN THE RANGE CAN BE RETURNED.  HOWEVER, AN ITERATION
972  *           WHICH PICKS UP WHERE WE LEFT OFF WILL CONTINUE THE SCAN.
973  *
974  * (*parentp) must be exclusively locked and referenced and can be an inode
975  * or an existing indirect block within the inode.
976  *
977  * On return (*parentp) will be modified to point at the deepest parent chain
978  * element encountered during the search, as a helper for an insertion or
979  * deletion.   The new (*parentp) will be locked and referenced and the old
980  * will be unlocked and dereferenced (no change if they are both the same).
981  *
982  * The matching chain will be returned exclusively locked and referenced.
983  *
984  * NULL is returned if no match was found, but (*parentp) will still
985  * potentially be adjusted.
986  *
987  * This function will also recurse up the chain if the key is not within the
988  * current parent's range.  (*parentp) can never be set to NULL.  An iteration
989  * can simply allow (*parentp) to float inside the loop.
990  */
991 hammer2_chain_t *
992 hammer2_chain_lookup(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
993                      hammer2_key_t key_beg, hammer2_key_t key_end,
994                      int flags)
995 {
996         hammer2_chain_t *parent;
997         hammer2_chain_t *chain;
998         hammer2_chain_t *tmp;
999         hammer2_blockref_t *base;
1000         hammer2_blockref_t *bref;
1001         hammer2_key_t scan_beg;
1002         hammer2_key_t scan_end;
1003         int count = 0;
1004         int i;
1005
1006         /*
1007          * Recurse (*parentp) upward if necessary until the parent completely
1008          * encloses the key range or we hit the inode.
1009          */
1010         parent = *parentp;
1011         while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
1012                 scan_beg = parent->bref.key;
1013                 scan_end = scan_beg +
1014                            ((hammer2_key_t)1 << parent->bref.keybits) - 1;
1015                 if (key_beg >= scan_beg && key_end <= scan_end)
1016                         break;
1017                 hammer2_chain_ref(hmp, parent);         /* ref old parent */
1018                 hammer2_chain_unlock(hmp, parent);      /* unlock old parent */
1019                 parent = parent->parent;
1020                                                         /* lock new parent */
1021                 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_MAYBE);
1022                 hammer2_chain_drop(hmp, *parentp);      /* drop old parent */
1023                 *parentp = parent;                      /* new parent */
1024         }
1025
1026 again:
1027         /*
1028          * Locate the blockref array.  Currently we do a fully associative
1029          * search through the array.
1030          */
1031         switch(parent->bref.type) {
1032         case HAMMER2_BREF_TYPE_INODE:
1033                 /*
1034                  * Special shortcut for embedded data returns the inode
1035                  * itself.  Callers must detect this condition and access
1036                  * the embedded data (the strategy code does this for us).
1037                  *
1038                  * This is only applicable to regular files and softlinks.
1039                  */
1040                 if (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) {
1041                         if (flags & HAMMER2_LOOKUP_NOLOCK)
1042                                 hammer2_chain_ref(hmp, parent);
1043                         else
1044                                 hammer2_chain_lock(hmp, parent,
1045                                                    HAMMER2_RESOLVE_ALWAYS);
1046                         return (parent);
1047                 }
1048                 base = &parent->data->ipdata.u.blockset.blockref[0];
1049                 count = HAMMER2_SET_COUNT;
1050                 break;
1051         case HAMMER2_BREF_TYPE_INDIRECT:
1052                 /*
1053                  * Optimize indirect blocks in the INITIAL state to avoid
1054                  * I/O.
1055                  */
1056                 if (parent->flags & HAMMER2_CHAIN_INITIAL) {
1057                         base = NULL;
1058                 } else {
1059                         if (parent->data == NULL)
1060                                 panic("parent->data is NULL");
1061                         base = &parent->data->npdata.blockref[0];
1062                 }
1063                 count = parent->bytes / sizeof(hammer2_blockref_t);
1064                 break;
1065         case HAMMER2_BREF_TYPE_VOLUME:
1066                 base = &hmp->voldata.sroot_blockset.blockref[0];
1067                 count = HAMMER2_SET_COUNT;
1068                 break;
1069         default:
1070                 panic("hammer2_chain_lookup: unrecognized blockref type: %d",
1071                       parent->bref.type);
1072                 base = NULL;    /* safety */
1073                 count = 0;      /* safety */
1074         }
1075
1076         /*
1077          * If the element and key overlap we use the element.
1078          */
1079         bref = NULL;
1080         for (i = 0; i < count; ++i) {
1081                 tmp = hammer2_chain_find(hmp, parent, i);
1082                 if (tmp) {
1083                         bref = &tmp->bref;
1084                         KKASSERT(bref->type != 0);
1085                 } else if (base == NULL || base[i].type == 0) {
1086                         continue;
1087                 } else {
1088                         bref = &base[i];
1089                 }
1090                 scan_beg = bref->key;
1091                 scan_end = scan_beg + ((hammer2_key_t)1 << bref->keybits) - 1;
1092                 if (key_beg <= scan_end && key_end >= scan_beg)
1093                         break;
1094         }
1095         if (i == count) {
1096                 if (key_beg == key_end)
1097                         return (NULL);
1098                 return (hammer2_chain_next(hmp, parentp, NULL,
1099                                            key_beg, key_end, flags));
1100         }
1101
1102         /*
1103          * Acquire the new chain element.  If the chain element is an
1104          * indirect block we must search recursively.
1105          */
1106         chain = hammer2_chain_get(hmp, parent, i, flags);
1107         if (chain == NULL)
1108                 return (NULL);
1109
1110         /*
1111          * If the chain element is an indirect block it becomes the new
1112          * parent and we loop on it.
1113          *
1114          * The parent always has to be locked with at least RESOLVE_MAYBE,
1115          * so it might need a fixup if the caller passed incompatible flags.
1116          */
1117         if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
1118                 hammer2_chain_unlock(hmp, parent);
1119                 *parentp = parent = chain;
1120                 if (flags & HAMMER2_LOOKUP_NOLOCK) {
1121                         hammer2_chain_lock(hmp, chain, HAMMER2_RESOLVE_MAYBE);
1122                         hammer2_chain_drop(hmp, chain); /* excess ref */
1123                 } else if (flags & HAMMER2_LOOKUP_NODATA) {
1124                         hammer2_chain_lock(hmp, chain, HAMMER2_RESOLVE_MAYBE);
1125                         hammer2_chain_unlock(hmp, chain);
1126                 }
1127                 goto again;
1128         }
1129
1130         /*
1131          * All done, return chain
1132          */
1133         return (chain);
1134 }
1135
1136 /*
1137  * After having issued a lookup we can iterate all matching keys.
1138  *
1139  * If chain is non-NULL we continue the iteration from just after it's index.
1140  *
1141  * If chain is NULL we assume the parent was exhausted and continue the
1142  * iteration at the next parent.
1143  *
1144  * parent must be locked on entry and remains locked throughout.  chain's
1145  * lock status must match flags.
1146  */
1147 hammer2_chain_t *
1148 hammer2_chain_next(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
1149                    hammer2_chain_t *chain,
1150                    hammer2_key_t key_beg, hammer2_key_t key_end,
1151                    int flags)
1152 {
1153         hammer2_chain_t *parent;
1154         hammer2_chain_t *tmp;
1155         hammer2_blockref_t *base;
1156         hammer2_blockref_t *bref;
1157         hammer2_key_t scan_beg;
1158         hammer2_key_t scan_end;
1159         int i;
1160         int count;
1161
1162         parent = *parentp;
1163
1164 again:
1165         /*
1166          * Calculate the next index and recalculate the parent if necessary.
1167          */
1168         if (chain) {
1169                 /*
1170                  * Continue iteration within current parent.  If not NULL
1171                  * the passed-in chain may or may not be locked, based on
1172                  * the LOOKUP_NOLOCK flag (passed in as returned from lookup
1173                  * or a prior next).
1174                  */
1175                 i = chain->index + 1;
1176                 if (flags & HAMMER2_LOOKUP_NOLOCK)
1177                         hammer2_chain_drop(hmp, chain);
1178                 else
1179                         hammer2_chain_unlock(hmp, chain);
1180
1181                 /*
1182                  * Any scan where the lookup returned degenerate data embedded
1183                  * in the inode has an invalid index and must terminate.
1184                  */
1185                 if (chain == parent)
1186                         return(NULL);
1187                 chain = NULL;
1188         } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT) {
1189                 /*
1190                  * We reached the end of the iteration.
1191                  */
1192                 return (NULL);
1193         } else {
1194                 /*
1195                  * Continue iteration with next parent unless the current
1196                  * parent covers the range.
1197                  */
1198                 hammer2_chain_t *nparent;
1199
1200                 scan_beg = parent->bref.key;
1201                 scan_end = scan_beg +
1202                             ((hammer2_key_t)1 << parent->bref.keybits) - 1;
1203                 if (key_beg >= scan_beg && key_end <= scan_end)
1204                         return (NULL);
1205
1206                 i = parent->index + 1;
1207                 nparent = parent->parent;
1208                 hammer2_chain_ref(hmp, nparent);        /* ref new parent */
1209                 hammer2_chain_unlock(hmp, parent);      /* unlock old parent */
1210                                                         /* lock new parent */
1211                 hammer2_chain_lock(hmp, nparent, HAMMER2_RESOLVE_MAYBE);
1212                 hammer2_chain_drop(hmp, nparent);       /* drop excess ref */
1213                 *parentp = parent = nparent;
1214         }
1215
1216 again2:
1217         /*
1218          * Locate the blockref array.  Currently we do a fully associative
1219          * search through the array.
1220          */
1221         switch(parent->bref.type) {
1222         case HAMMER2_BREF_TYPE_INODE:
1223                 base = &parent->data->ipdata.u.blockset.blockref[0];
1224                 count = HAMMER2_SET_COUNT;
1225                 break;
1226         case HAMMER2_BREF_TYPE_INDIRECT:
1227                 if (parent->flags & HAMMER2_CHAIN_INITIAL) {
1228                         base = NULL;
1229                 } else {
1230                         KKASSERT(parent->data != NULL);
1231                         base = &parent->data->npdata.blockref[0];
1232                 }
1233                 count = parent->bytes / sizeof(hammer2_blockref_t);
1234                 break;
1235         case HAMMER2_BREF_TYPE_VOLUME:
1236                 base = &hmp->voldata.sroot_blockset.blockref[0];
1237                 count = HAMMER2_SET_COUNT;
1238                 break;
1239         default:
1240                 panic("hammer2_chain_next: unrecognized blockref type: %d",
1241                       parent->bref.type);
1242                 base = NULL;    /* safety */
1243                 count = 0;      /* safety */
1244                 break;
1245         }
1246         KKASSERT(i <= count);
1247
1248         /*
1249          * Look for the key.  If we are unable to find a match and an exact
1250          * match was requested we return NULL.  If a range was requested we
1251          * run hammer2_chain_next() to iterate.
1252          */
1253         bref = NULL;
1254         while (i < count) {
1255                 tmp = hammer2_chain_find(hmp, parent, i);
1256                 if (tmp) {
1257                         bref = &tmp->bref;
1258                 } else if (base == NULL || base[i].type == 0) {
1259                         ++i;
1260                         continue;
1261                 } else {
1262                         bref = &base[i];
1263                 }
1264                 scan_beg = bref->key;
1265                 scan_end = scan_beg + ((hammer2_key_t)1 << bref->keybits) - 1;
1266                 if (key_beg <= scan_end && key_end >= scan_beg)
1267                         break;
1268                 ++i;
1269         }
1270
1271         /*
1272          * If we couldn't find a match recurse up a parent to continue the
1273          * search.
1274          */
1275         if (i == count)
1276                 goto again;
1277
1278         /*
1279          * Acquire the new chain element.  If the chain element is an
1280          * indirect block we must search recursively.
1281          */
1282         chain = hammer2_chain_get(hmp, parent, i, flags);
1283         if (chain == NULL)
1284                 return (NULL);
1285
1286         /*
1287          * If the chain element is an indirect block it becomes the new
1288          * parent and we loop on it.
1289          *
1290          * The parent always has to be locked with at least RESOLVE_MAYBE,
1291          * so it might need a fixup if the caller passed incompatible flags.
1292          */
1293         if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
1294                 hammer2_chain_unlock(hmp, parent);
1295                 *parentp = parent = chain;
1296                 chain = NULL;
1297                 if (flags & HAMMER2_LOOKUP_NOLOCK) {
1298                         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_MAYBE);
1299                         hammer2_chain_drop(hmp, parent);        /* excess ref */
1300                 } else if (flags & HAMMER2_LOOKUP_NODATA) {
1301                         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_MAYBE);
1302                         hammer2_chain_unlock(hmp, parent);
1303                 }
1304                 i = 0;
1305                 goto again2;
1306         }
1307
1308         /*
1309          * All done, return chain
1310          */
1311         return (chain);
1312 }
1313
1314 /*
1315  * Create and return a new hammer2 system memory structure of the specified
1316  * key, type and size and insert it RELATIVE TO (PARENT).
1317  *
1318  * (parent) is typically either an inode or an indirect block, acquired
1319  * acquired as a side effect of issuing a prior failed lookup.  parent
1320  * must be locked and held.  Do not pass the inode chain to this function
1321  * unless that is the chain returned by the failed lookup.
1322  *
1323  * Non-indirect types will automatically allocate indirect blocks as required
1324  * if the new item does not fit in the current (parent).
1325  *
1326  * Indirect types will move a portion of the existing blockref array in
1327  * (parent) into the new indirect type and then use one of the free slots
1328  * to emplace the new indirect type.
1329  *
1330  * A new locked, referenced chain element is returned of the specified type.
1331  * The element may or may not have a data area associated with it:
1332  *
1333  *      VOLUME          not allowed here
1334  *      INODE           embedded data are will be set-up
1335  *      INDIRECT        not allowed here
1336  *      DATA            no data area will be set-up (caller is expected
1337  *                      to have logical buffers, we don't want to alias
1338  *                      the data onto device buffers!).
1339  */
1340 hammer2_chain_t *
1341 hammer2_chain_create(hammer2_mount_t *hmp, hammer2_chain_t *parent,
1342                      hammer2_chain_t *chain,
1343                      hammer2_key_t key, int keybits, int type, size_t bytes)
1344 {
1345         hammer2_blockref_t dummy;
1346         hammer2_blockref_t *base;
1347         hammer2_chain_t dummy_chain;
1348         int unlock_parent = 0;
1349         int allocated = 0;
1350         int count;
1351         int i;
1352
1353         if (chain == NULL) {
1354                 /*
1355                  * First allocate media space and construct the dummy bref,
1356                  * then allocate the in-memory chain structure.
1357                  */
1358                 bzero(&dummy, sizeof(dummy));
1359                 dummy.type = type;
1360                 dummy.key = key;
1361                 dummy.keybits = keybits;
1362                 dummy.data_off = hammer2_bytes_to_radix(bytes);
1363                 chain = hammer2_chain_alloc(hmp, &dummy);
1364                 allocated = 1;
1365
1366                 /*
1367                  * We do NOT set INITIAL here (yet).  INITIAL is only
1368                  * used for indirect blocks.
1369                  *
1370                  * Recalculate bytes to reflect the actual media block
1371                  * allocation.
1372                  */
1373                 bytes = (hammer2_off_t)1 <<
1374                         (int)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX);
1375                 chain->bytes = bytes;
1376
1377                 switch(type) {
1378                 case HAMMER2_BREF_TYPE_VOLUME:
1379                         panic("hammer2_chain_create: called with volume type");
1380                         break;
1381                 case HAMMER2_BREF_TYPE_INODE:
1382                         KKASSERT(bytes == HAMMER2_INODE_BYTES);
1383                         chain->data = (void *)&chain->u.ip->ip_data;
1384                         break;
1385                 case HAMMER2_BREF_TYPE_INDIRECT:
1386                         panic("hammer2_chain_create: cannot be used to"
1387                               "create indirect block");
1388                         break;
1389                 case HAMMER2_BREF_TYPE_DATA:
1390                 default:
1391                         /* leave chain->data NULL */
1392                         KKASSERT(chain->data == NULL);
1393                         break;
1394                 }
1395         } else {
1396                 /*
1397                  * Potentially update the chain's key/keybits.
1398                  */
1399                 chain->bref.key = key;
1400                 chain->bref.keybits = keybits;
1401         }
1402
1403 again:
1404         /*
1405          * Locate a free blockref in the parent's array
1406          */
1407         switch(parent->bref.type) {
1408         case HAMMER2_BREF_TYPE_INODE:
1409                 KKASSERT(parent->data != NULL);
1410                 base = &parent->data->ipdata.u.blockset.blockref[0];
1411                 count = HAMMER2_SET_COUNT;
1412                 break;
1413         case HAMMER2_BREF_TYPE_INDIRECT:
1414                 if (parent->flags & HAMMER2_CHAIN_INITIAL) {
1415                         base = NULL;
1416                 } else {
1417                         KKASSERT(parent->data != NULL);
1418                         base = &parent->data->npdata.blockref[0];
1419                 }
1420                 count = parent->bytes / sizeof(hammer2_blockref_t);
1421                 break;
1422         case HAMMER2_BREF_TYPE_VOLUME:
1423                 KKASSERT(parent->data != NULL);
1424                 base = &hmp->voldata.sroot_blockset.blockref[0];
1425                 count = HAMMER2_SET_COUNT;
1426                 break;
1427         default:
1428                 panic("hammer2_chain_create: unrecognized blockref type: %d",
1429                       parent->bref.type);
1430                 count = 0;
1431                 break;
1432         }
1433
1434         /*
1435          * Scan for an unallocated bref, also skipping any slots occupied
1436          * by in-memory chain elements that may not yet have been updated
1437          * in the parent's bref array.
1438          */
1439         bzero(&dummy_chain, sizeof(dummy_chain));
1440         for (i = 0; i < count; ++i) {
1441                 if (base == NULL) {
1442                         dummy_chain.index = i;
1443                         if (SPLAY_FIND(hammer2_chain_splay,
1444                                        &parent->shead, &dummy_chain) == NULL) {
1445                                 break;
1446                         }
1447                 } else if (base[i].type == 0) {
1448                         dummy_chain.index = i;
1449                         if (SPLAY_FIND(hammer2_chain_splay,
1450                                        &parent->shead, &dummy_chain) == NULL) {
1451                                 break;
1452                         }
1453                 }
1454         }
1455
1456         /*
1457          * If no free blockref count be found we must create an indirect
1458          * block and move a number of blockrefs into it.  With the parent
1459          * locked we can safely lock each child in order to move it without
1460          * causing a deadlock.
1461          *
1462          * This may return the new indirect block or the old parent depending
1463          * on where the key falls.
1464          */
1465         if (i == count) {
1466                 hammer2_chain_t *nparent;
1467
1468                 nparent = hammer2_chain_create_indirect(hmp, parent,
1469                                                         key, keybits);
1470                 if (nparent == NULL) {
1471                         if (allocated)
1472                                 hammer2_chain_free(hmp, chain);
1473                         chain = NULL;
1474                         goto done;
1475                 }
1476                 if (parent != nparent) {
1477                         if (unlock_parent)
1478                                 hammer2_chain_unlock(hmp, parent);
1479                         parent = nparent;
1480                         unlock_parent = 1;
1481                 }
1482                 goto again;
1483         }
1484
1485         /*
1486          * Link the chain into its parent.
1487          */
1488         if (chain->parent != NULL)
1489                 panic("hammer2: hammer2_chain_create: chain already connected");
1490         KKASSERT(chain->parent == NULL);
1491         chain->parent = parent;
1492         chain->index = i;
1493         if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain))
1494                 panic("hammer2_chain_link: collision");
1495         atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED);
1496         KKASSERT(parent->refs > 0);
1497         atomic_add_int(&parent->refs, 1);
1498
1499         /*
1500          * Additional linkage for inodes.  Reuse the parent pointer to
1501          * find the parent directory.
1502          */
1503         if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
1504                 hammer2_chain_t *scan = parent;
1505                 while (scan->bref.type == HAMMER2_BREF_TYPE_INDIRECT)
1506                         scan = scan->parent;
1507                 if (scan->bref.type == HAMMER2_BREF_TYPE_INODE)
1508                         chain->u.ip->pip = scan->u.ip;
1509         }
1510
1511         /*
1512          * (allocated) indicates that this is a newly-created chain element
1513          * rather than a renamed chain element.  In this situation we want
1514          * to place the chain element in the MODIFIED state.
1515          *
1516          * The data area will be set up as follows:
1517          *
1518          *      VOLUME          not allowed here.
1519          *
1520          *      INODE           embedded data are will be set-up.
1521          *
1522          *      INDIRECT        not allowed here.
1523          *
1524          *      DATA            no data area will be set-up (caller is expected
1525          *                      to have logical buffers, we don't want to alias
1526          *                      the data onto device buffers!).
1527          */
1528         if (allocated) {
1529                 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
1530                         hammer2_chain_modify(hmp, chain,
1531                                              HAMMER2_MODIFY_OPTDATA);
1532                 } else if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
1533                         /* not supported in this function */
1534                         panic("hammer2_chain_create: bad type");
1535                         atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL);
1536                         hammer2_chain_modify(hmp, chain,
1537                                              HAMMER2_MODIFY_OPTDATA);
1538                 } else {
1539                         hammer2_chain_modify(hmp, chain, 0);
1540                 }
1541         } else {
1542                 /*
1543                  * When reconnecting inodes we have to call setsubmod()
1544                  * to ensure that its state propagates up the newly
1545                  * connected parent.
1546                  *
1547                  * We cannot depend on the chain being in a MODIFIED
1548                  * state, or it might already be in that state, so
1549                  * even if the parent calls hammer2_chain_modify()
1550                  * MOVED might not get set.  Thus we have to set it
1551                  * here, too.
1552                  */
1553                 if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) {
1554                         hammer2_chain_ref(hmp, chain);
1555                         atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED);
1556                 }
1557                 hammer2_chain_parent_setsubmod(hmp, chain);
1558         }
1559
1560 done:
1561         if (unlock_parent)
1562                 hammer2_chain_unlock(hmp, parent);
1563         return (chain);
1564 }
1565
1566 /*
1567  * Create an indirect block that covers one or more of the elements in the
1568  * current parent.  Either returns the existing parent with no locking or
1569  * ref changes or returns the new indirect block locked and referenced,
1570  * depending on what the specified key falls into.
1571  *
1572  * The key/keybits for the indirect mode only needs to follow three rules:
1573  *
1574  * (1) That all elements underneath it fit within its key space and
1575  *
1576  * (2) That all elements outside it are outside its key space.
1577  *
1578  * (3) When creating the new indirect block any elements in the current
1579  *     parent that fit within the new indirect block's keyspace must be
1580  *     moved into the new indirect block.
1581  *
1582  * (4) The keyspace chosen for the inserted indirect block CAN cover a wider
1583  *     keyspace the the current parent, but lookup/iteration rules will
1584  *     ensure (and must ensure) that rule (2) for all parents leading up
1585  *     to the nearest inode or the root volume header is adhered to.  This
1586  *     is accomplished by always recursing through matching keyspaces in
1587  *     the hammer2_chain_lookup() and hammer2_chain_next() API.
1588  *
1589  * The current implementation calculates the current worst-case keyspace by
1590  * iterating the current parent and then divides it into two halves, choosing
1591  * whichever half has the most elements (not necessarily the half containing
1592  * the requested key).
1593  *
1594  * We can also opt to use the half with the least number of elements.  This
1595  * causes lower-numbered keys (aka logical file offsets) to recurse through
1596  * fewer indirect blocks and higher-numbered keys to recurse through more.
1597  * This also has the risk of not moving enough elements to the new indirect
1598  * block and being forced to create several indirect blocks before the element
1599  * can be inserted.
1600  */
1601 static
1602 hammer2_chain_t *
1603 hammer2_chain_create_indirect(hammer2_mount_t *hmp, hammer2_chain_t *parent,
1604                               hammer2_key_t create_key, int create_bits)
1605 {
1606         hammer2_blockref_t *base;
1607         hammer2_blockref_t *bref;
1608         hammer2_chain_t *chain;
1609         hammer2_chain_t *ichain;
1610         hammer2_chain_t dummy;
1611         hammer2_key_t key = create_key;
1612         int keybits = create_bits;
1613         int locount = 0;
1614         int hicount = 0;
1615         int count;
1616         int nbytes;
1617         int i;
1618
1619         /*
1620          * Calculate the base blockref pointer or NULL if the chain
1621          * is known to be empty.
1622          */
1623         hammer2_chain_modify(hmp, parent, HAMMER2_MODIFY_OPTDATA);
1624         if (parent->flags & HAMMER2_CHAIN_INITIAL) {
1625                 base = NULL;
1626
1627                 /*
1628                  * We still need to calculate the count for SPLAY lookups
1629                  */
1630                 switch(parent->bref.type) {
1631                 case HAMMER2_BREF_TYPE_INODE:
1632                         count = HAMMER2_SET_COUNT;
1633                         break;
1634                 case HAMMER2_BREF_TYPE_INDIRECT:
1635                         count = parent->bytes / sizeof(hammer2_blockref_t);
1636                         break;
1637                 case HAMMER2_BREF_TYPE_VOLUME:
1638                         count = HAMMER2_SET_COUNT;
1639                         break;
1640                 default:
1641                         panic("hammer2_chain_create_indirect: "
1642                               "unrecognized blockref type: %d",
1643                               parent->bref.type);
1644                         count = 0;
1645                         break;
1646                 }
1647         } else {
1648                 /*
1649                  * Locate a free blockref in the parent's array
1650                  */
1651                 switch(parent->bref.type) {
1652                 case HAMMER2_BREF_TYPE_INODE:
1653                         base = &parent->data->ipdata.u.blockset.blockref[0];
1654                         count = HAMMER2_SET_COUNT;
1655                         break;
1656                 case HAMMER2_BREF_TYPE_INDIRECT:
1657                         base = &parent->data->npdata.blockref[0];
1658                         count = parent->bytes / sizeof(hammer2_blockref_t);
1659                         break;
1660                 case HAMMER2_BREF_TYPE_VOLUME:
1661                         base = &hmp->voldata.sroot_blockset.blockref[0];
1662                         count = HAMMER2_SET_COUNT;
1663                         break;
1664                 default:
1665                         panic("hammer2_chain_create_indirect: "
1666                               "unrecognized blockref type: %d",
1667                               parent->bref.type);
1668                         count = 0;
1669                         break;
1670                 }
1671         }
1672
1673         /*
1674          * Scan for an unallocated bref, also skipping any slots occupied
1675          * by in-memory chain elements that may not yet have been updated
1676          * in the parent's bref array.
1677          */
1678         bzero(&dummy, sizeof(dummy));
1679         for (i = 0; i < count; ++i) {
1680                 int nkeybits;
1681
1682                 /*
1683                  * Optimize the case where the parent is still in its
1684                  * initially created state.
1685                  */
1686                 if (base == NULL || base[i].type == 0) {
1687                         dummy.index = i;
1688                         chain = SPLAY_FIND(hammer2_chain_splay,
1689                                            &parent->shead, &dummy);
1690                         if (chain == NULL)
1691                                 continue;
1692                         bref = &chain->bref;
1693                 } else {
1694                         bref = &base[i];
1695                 }
1696
1697                 /*
1698                  * Expand our calculated key range (key, keybits) to fit
1699                  * the scanned key.  nkeybits represents the full range
1700                  * that we will later cut in half (two halves @ nkeybits - 1).
1701                  */
1702                 nkeybits = keybits;
1703                 if (nkeybits < bref->keybits)
1704                         nkeybits = bref->keybits;
1705                 while ((~(((hammer2_key_t)1 << nkeybits) - 1) &
1706                         (key ^ bref->key)) != 0) {
1707                         ++nkeybits;
1708                 }
1709
1710                 /*
1711                  * If the new key range is larger we have to determine
1712                  * which side of the new key range the existing keys fall
1713                  * under by checking the high bit, then collapsing the
1714                  * locount into the hicount or vise-versa.
1715                  */
1716                 if (keybits != nkeybits) {
1717                         if (((hammer2_key_t)1 << (nkeybits - 1)) & key) {
1718                                 hicount += locount;
1719                                 locount = 0;
1720                         } else {
1721                                 locount += hicount;
1722                                 hicount = 0;
1723                         }
1724                         keybits = nkeybits;
1725                 }
1726
1727                 /*
1728                  * The newly scanned key will be in the lower half or the
1729                  * higher half of the (new) key range.
1730                  */
1731                 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key)
1732                         ++hicount;
1733                 else
1734                         ++locount;
1735         }
1736
1737         /*
1738          * Adjust keybits to represent half of the full range calculated
1739          * above.
1740          */
1741         --keybits;
1742
1743         /*
1744          * Select whichever half contains the most elements.  Theoretically
1745          * we can select either side as long as it contains at least one
1746          * element (in order to ensure that a free slot is present to hold
1747          * the indirect block).
1748          */
1749         key &= ~(((hammer2_key_t)1 << keybits) - 1);
1750         if (hammer2_indirect_optimize) {
1751                 /*
1752                  * Insert node for least number of keys, this will arrange
1753                  * the first few blocks of a large file or the first few
1754                  * inodes in a directory with fewer indirect blocks when
1755                  * created linearly.
1756                  */
1757                 if (hicount < locount && hicount != 0)
1758                         key |= (hammer2_key_t)1 << keybits;
1759                 else
1760                         key &= ~(hammer2_key_t)1 << keybits;
1761         } else {
1762                 /*
1763                  * Insert node for most number of keys, best for heavily
1764                  * fragmented files.
1765                  */
1766                 if (hicount > locount)
1767                         key |= (hammer2_key_t)1 << keybits;
1768                 else
1769                         key &= ~(hammer2_key_t)1 << keybits;
1770         }
1771
1772         /*
1773          * How big should our new indirect block be?  It has to be at least
1774          * as large as its parent.
1775          */
1776         if (parent->bref.type == HAMMER2_BREF_TYPE_INODE)
1777                 nbytes = HAMMER2_IND_BYTES_MIN;
1778         else
1779                 nbytes = HAMMER2_IND_BYTES_MAX;
1780         if (nbytes < count * sizeof(hammer2_blockref_t))
1781                 nbytes = count * sizeof(hammer2_blockref_t);
1782
1783         /*
1784          * Ok, create our new indirect block
1785          */
1786         dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT;
1787         dummy.bref.key = key;
1788         dummy.bref.keybits = keybits;
1789         dummy.bref.data_off = hammer2_bytes_to_radix(nbytes);
1790         ichain = hammer2_chain_alloc(hmp, &dummy.bref);
1791         atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL);
1792
1793         /*
1794          * Iterate the original parent and move the matching brefs into
1795          * the new indirect block.
1796          */
1797         for (i = 0; i < count; ++i) {
1798                 /*
1799                  * For keying purposes access the bref from the media or
1800                  * from our in-memory cache.  In cases where the in-memory
1801                  * cache overrides the media the keyrefs will be the same
1802                  * anyway so we can avoid checking the cache when the media
1803                  * has a key.
1804                  */
1805                 if (base == NULL || base[i].type == 0) {
1806                         dummy.index = i;
1807                         chain = SPLAY_FIND(hammer2_chain_splay,
1808                                            &parent->shead, &dummy);
1809                         if (chain == NULL) {
1810                                 /*
1811                                  * Select index indirect block is placed in
1812                                  */
1813                                 if (ichain->index < 0)
1814                                         ichain->index = i;
1815                                 continue;
1816                         }
1817                         bref = &chain->bref;
1818                 } else {
1819                         bref = &base[i];
1820                 }
1821
1822                 /*
1823                  * Skip keys not in the chosen half (low or high), only bit
1824                  * (keybits - 1) needs to be compared but for safety we
1825                  * will compare all msb bits plus that bit again.
1826                  */
1827                 if ((~(((hammer2_key_t)1 << keybits) - 1) &
1828                     (key ^ bref->key)) != 0) {
1829                         continue;
1830                 }
1831
1832                 /*
1833                  * This element is being moved, its slot is available
1834                  * for our indirect block.
1835                  */
1836                 if (ichain->index < 0)
1837                         ichain->index = i;
1838
1839                 /*
1840                  * Load the new indirect block by acquiring or allocating
1841                  * the related chain entries, then simply move it to the
1842                  * new parent (ichain).
1843                  *
1844                  * Flagging the new chain entry MOVED will cause a flush
1845                  * to synchronize its block into the new indirect block.
1846                  * The chain is unlocked after being moved but needs to
1847                  * retain a reference for the MOVED state
1848                  *
1849                  * We must still set SUBMODIFIED in the parent but we do
1850                  * that after the loop.
1851                  *
1852                  * XXX we really need a lock here but we don't need the
1853                  *     data.  NODATA feature needed.
1854                  */
1855                 chain = hammer2_chain_get(hmp, parent, i,
1856                                           HAMMER2_LOOKUP_NODATA);
1857                 SPLAY_REMOVE(hammer2_chain_splay, &parent->shead, chain);
1858                 if (SPLAY_INSERT(hammer2_chain_splay, &ichain->shead, chain))
1859                         panic("hammer2_chain_create_indirect: collision");
1860                 chain->parent = ichain;
1861                 if (base)
1862                         bzero(&base[i], sizeof(base[i]));
1863                 atomic_add_int(&parent->refs, -1);
1864                 atomic_add_int(&ichain->refs, 1);
1865                 if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) {
1866                         hammer2_chain_ref(hmp, chain);
1867                         atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED);
1868                 }
1869                 hammer2_chain_unlock(hmp, chain);
1870                 KKASSERT(parent->refs > 0);
1871                 chain = NULL;
1872         }
1873
1874         /*
1875          * Insert the new indirect block into the parent now that we've
1876          * cleared out some entries in the parent.  We calculated a good
1877          * insertion index in the loop above (ichain->index).
1878          */
1879         KKASSERT(ichain->index >= 0);
1880         if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, ichain))
1881                 panic("hammer2_chain_create_indirect: ichain insertion");
1882         ichain->parent = parent;
1883         atomic_add_int(&parent->refs, 1);
1884
1885         /*
1886          * Mark the new indirect block modified after insertion, which
1887          * will propagate up through parent all the way to the root and
1888          * also allocate the physical block in ichain for our caller,
1889          * and assign ichain->data to a pre-zero'd space (because there
1890          * is not prior data to copy into it).
1891          *
1892          * We have to set SUBMODIFIED in ichain's flags manually so the
1893          * flusher knows it has to recurse through it to get to all of
1894          * our moved blocks, then call setsubmod() to set the bit
1895          * recursively.
1896          */
1897         hammer2_chain_modify(hmp, ichain, HAMMER2_MODIFY_OPTDATA);
1898         atomic_set_int(&ichain->flags, HAMMER2_CHAIN_SUBMODIFIED);
1899         hammer2_chain_parent_setsubmod(hmp, ichain);
1900
1901         /*
1902          * Figure out what to return.
1903          */
1904         if (create_bits >= keybits) {
1905                 /*
1906                  * Key being created is way outside the key range,
1907                  * return the original parent.
1908                  */
1909                 hammer2_chain_unlock(hmp, ichain);
1910         } else if (~(((hammer2_key_t)1 << keybits) - 1) &
1911                    (create_key ^ key)) {
1912                 /*
1913                  * Key being created is outside the key range,
1914                  * return the original parent.
1915                  */
1916                 hammer2_chain_unlock(hmp, ichain);
1917         } else {
1918                 /*
1919                  * Otherwise its in the range, return the new parent.
1920                  */
1921                 parent = ichain;
1922         }
1923
1924         return(parent);
1925 }
1926
1927 /*
1928  * Physically delete the specified chain element.  Note that inodes with
1929  * open descriptors should not be deleted (as with other filesystems) until
1930  * the last open descriptor is closed.
1931  *
1932  * This routine will remove the chain element from its parent and potentially
1933  * also recurse upward and delete indirect blocks which become empty as a
1934  * side effect.
1935  *
1936  * The caller must pass a pointer to the chain's parent, also locked and
1937  * referenced.  (*parentp) will be modified in a manner similar to a lookup
1938  * or iteration when indirect blocks are also deleted as a side effect.
1939  */
1940 void
1941 hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t *parent,
1942                      hammer2_chain_t *chain)
1943 {
1944         hammer2_blockref_t *base;
1945         int count;
1946
1947         if (chain->parent != parent)
1948                 panic("hammer2_chain_delete: parent mismatch");
1949
1950         /*
1951          * Mark the parent modified so our base[] pointer remains valid
1952          * while we move entries.  For the optimized indirect block
1953          * case mark the parent moved instead.
1954          *
1955          * Calculate the blockref reference in the parent
1956          */
1957         switch(parent->bref.type) {
1958         case HAMMER2_BREF_TYPE_INODE:
1959                 hammer2_chain_modify(hmp, parent, 0);
1960                 base = &parent->data->ipdata.u.blockset.blockref[0];
1961                 count = HAMMER2_SET_COUNT;
1962                 break;
1963         case HAMMER2_BREF_TYPE_INDIRECT:
1964                 hammer2_chain_modify(hmp, parent, HAMMER2_MODIFY_OPTDATA);
1965                 if (parent->flags & HAMMER2_CHAIN_INITIAL)
1966                         base = NULL;
1967                 else
1968                         base = &parent->data->npdata.blockref[0];
1969                 count = parent->bytes / sizeof(hammer2_blockref_t);
1970                 break;
1971         case HAMMER2_BREF_TYPE_VOLUME:
1972                 hammer2_chain_modify(hmp, parent, 0);
1973                 base = &hmp->voldata.sroot_blockset.blockref[0];
1974                 count = HAMMER2_SET_COUNT;
1975                 break;
1976         default:
1977                 panic("hammer2_chain_delete: unrecognized blockref type: %d",
1978                       parent->bref.type);
1979                 count = 0;
1980                 break;
1981         }
1982
1983         /*
1984          * Disconnect the bref in the parent, remove the chain, and
1985          * disconnect in-memory fields from the parent.
1986          */
1987         KKASSERT(chain->index >= 0 && chain->index < count);
1988         if (base)
1989                 bzero(&base[chain->index], sizeof(*base));
1990
1991         SPLAY_REMOVE(hammer2_chain_splay, &parent->shead, chain);
1992         atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED);
1993         atomic_add_int(&parent->refs, -1);      /* for splay entry */
1994         chain->index = -1;
1995         chain->parent = NULL;
1996
1997         /*
1998          * If this is an inode clear the pip.
1999          */
2000         if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
2001                 chain->u.ip->pip = NULL;
2002
2003         /*
2004          * The chain is still likely referenced, possibly even by a vnode
2005          * (if an inode), so defer further action until the chain gets
2006          * dropped.
2007          */
2008 }
2009
2010 /*
2011  * Recursively flush the specified chain.  The chain is locked and
2012  * referenced by the caller and will remain so on return.  The chain
2013  * will remain referenced throughout but can temporarily lose its
2014  * lock during the recursion to avoid unnecessarily stalling user
2015  * processes.
2016  *
2017  *
2018  */
2019 TAILQ_HEAD(flush_deferral_list, hammer2_chain);
2020
2021 struct hammer2_flush_info {
2022         struct flush_deferral_list flush_list;
2023         int             depth;
2024 };
2025
2026 typedef struct hammer2_flush_info hammer2_flush_info_t;
2027
2028 static void
2029 hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *parent,
2030                           hammer2_flush_info_t *info)
2031 {
2032         hammer2_blockref_t *bref;
2033         hammer2_off_t pbase;
2034         size_t bbytes;
2035         size_t boff;
2036         char *bdata;
2037         struct buf *bp;
2038         int error;
2039
2040         /*
2041          * If we hit the stack recursion depth limit defer the operation.
2042          * The controller of the info structure will execute the deferral
2043          * list and then retry.
2044          *
2045          * This is only applicable if SUBMODIFIED is set.  After a reflush
2046          * SUBMODIFIED will probably be cleared and we want to drop through
2047          * to finish processing the current element so our direct parent
2048          * can process the results.
2049          */
2050         if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT &&
2051             (parent->flags & HAMMER2_CHAIN_SUBMODIFIED)) {
2052                 if ((parent->flags & HAMMER2_CHAIN_DEFERRED) == 0 &&
2053                     ((parent->flags & (HAMMER2_CHAIN_SUBMODIFIED |
2054                                        HAMMER2_CHAIN_MODIFIED |
2055                                        HAMMER2_CHAIN_MODIFIED_AUX |
2056                                        HAMMER2_CHAIN_MOVED)) != 0)) {
2057                         hammer2_chain_ref(hmp, parent);
2058                         TAILQ_INSERT_TAIL(&info->flush_list,
2059                                           parent, flush_node);
2060                         atomic_set_int(&parent->flags, HAMMER2_CHAIN_DEFERRED);
2061                 }
2062                 return;
2063         }
2064
2065         if (hammer2_debug & 0x0008)
2066                 kprintf("%*.*sCHAIN type=%d@%08jx %p/%d %04x {\n",
2067                         info->depth, info->depth, "",
2068                         parent->bref.type, parent->bref.data_off,
2069                         parent, parent->refs, parent->flags);
2070
2071         /*
2072          * Flush any children of this parent.
2073          *
2074          * NOTE: If we use a while() here an active filesystem can
2075          *       prevent the flush from ever finishing.
2076          */
2077         if (parent->flags & HAMMER2_CHAIN_SUBMODIFIED) {
2078                 hammer2_blockref_t *base;
2079                 hammer2_chain_t *chain;
2080                 hammer2_chain_t *next;
2081                 int count;
2082                 int submodified = 0;
2083                 int submoved = 0;
2084
2085                 /*
2086                  * Clear SUBMODIFIED now.  Flag any races during the flush
2087                  * with the (submodified) local variable and re-arm it
2088                  * as necessary after the loop is done.
2089                  *
2090                  * Delaying the setting of the parent to MODIFIED can reduce
2091                  * unnecessary I/O.
2092                  *
2093                  * Modifications to the children will propagate up, forcing
2094                  * us to become modified and copy-on-write too.  Be sure
2095                  * to modify parent (as a side effect of the recursive
2096                  * flush) ONLY if it is actually being modified by the
2097                  * recursive flush.
2098                  */
2099                 atomic_clear_int(&parent->flags, HAMMER2_CHAIN_SUBMODIFIED);
2100
2101                 /*
2102                  * Flush the children and update the blockrefs in the parent.
2103                  * Be careful of ripouts during the loop.
2104                  */
2105                 next = SPLAY_MIN(hammer2_chain_splay, &parent->shead);
2106                 while ((chain = next) != NULL) {
2107                         next = SPLAY_NEXT(hammer2_chain_splay,
2108                                           &parent->shead, chain);
2109                         /*
2110                          * We only recurse if SUBMODIFIED (internal node)
2111                          * or MODIFIED (internal node or leaf) is set.
2112                          * However, we must still track whether any MOVED
2113                          * entries are present to determine if the parent's
2114                          * blockref's need updating or not.
2115                          */
2116                         if (chain->flags & HAMMER2_CHAIN_MOVED)
2117                                 submoved = 1;
2118                         if ((chain->flags & (HAMMER2_CHAIN_SUBMODIFIED |
2119                                              HAMMER2_CHAIN_MODIFIED |
2120                                             HAMMER2_CHAIN_MODIFIED_AUX)) == 0) {
2121                                 continue;
2122                         }
2123
2124                         /*
2125                          * Propagate the DESTROYED flag if found set, then
2126                          * recurse the flush.
2127                          */
2128                         hammer2_chain_lock(hmp, chain, HAMMER2_RESOLVE_MAYBE);
2129                         if ((parent->flags & HAMMER2_CHAIN_DESTROYED) &&
2130                             (chain->flags & HAMMER2_CHAIN_DESTROYED) == 0) {
2131                                 atomic_set_int(&chain->flags,
2132                                                HAMMER2_CHAIN_DESTROYED |
2133                                                HAMMER2_CHAIN_SUBMODIFIED);
2134                         }
2135                         ++info->depth;
2136                         hammer2_chain_flush_pass1(hmp, chain, info);
2137                         --info->depth;
2138
2139                         /*
2140                          * No point loading blockrefs yet if the
2141                          * child (recursively) is still dirty.
2142                          */
2143                         if (chain->flags & (HAMMER2_CHAIN_SUBMODIFIED |
2144                                            HAMMER2_CHAIN_MODIFIED |
2145                                            HAMMER2_CHAIN_MODIFIED_AUX)) {
2146                                 submodified = 1;
2147                                 if (hammer2_debug & 0x0008)
2148                                         kprintf("s");
2149                         }
2150                         if (chain->flags & HAMMER2_CHAIN_MOVED) {
2151                                 if (hammer2_debug & 0x0008)
2152                                         kprintf("m");
2153                                 submoved = 1;
2154                         }
2155                         if (hammer2_debug & 0x0008)
2156                                 kprintf("\n");
2157                         hammer2_chain_unlock(hmp, chain);
2158                 }
2159
2160                 if (submodified ||
2161                     (parent->flags & HAMMER2_CHAIN_SUBMODIFIED)) {
2162                         /*
2163                          * No point loading up the blockrefs if submodified
2164                          * got re-set.
2165                          *
2166                          * NOTE: Even though we cleared the SUBMODIFIED flag
2167                          *       it can still get re-set by operations
2168                          *       occuring under our chain, so check both.
2169                          */
2170                         atomic_set_int(&parent->flags,
2171                                        HAMMER2_CHAIN_SUBMODIFIED);
2172                 } else if (submoved) {
2173                         /*
2174                          * Ok, we can modify the blockrefs in this parent
2175                          * entry.  Mark it modified.  Calculate the
2176                          * blockref array after marking it modified (since
2177                          * that may change the underlying data ptr).
2178                          *
2179                          * NOTE: We only do this if submoved != 0, otherwise
2180                          *       there may not be any changes and setting
2181                          *       the parent modified will re-arm the MOVED
2182                          *       bit recursively, resulting in O(N^2)
2183                          *       flushes.
2184                          *
2185                          * NOTE: We don't want hammer2_chain_modify() to
2186                          *       recursively set the SUBMODIFIED flag
2187                          *       upward in this case!
2188                          */
2189                         hammer2_chain_modify(hmp, parent, HAMMER2_MODIFY_NOSUB);
2190
2191                         switch(parent->bref.type) {
2192                         case HAMMER2_BREF_TYPE_INODE:
2193                                 base = &parent->data->ipdata.u.blockset.
2194                                         blockref[0];
2195                                 count = HAMMER2_SET_COUNT;
2196                                 break;
2197                         case HAMMER2_BREF_TYPE_INDIRECT:
2198                                 base = &parent->data->npdata.blockref[0];
2199                                 count = parent->bytes /
2200                                         sizeof(hammer2_blockref_t);
2201                                 break;
2202                         case HAMMER2_BREF_TYPE_VOLUME:
2203                                 base = &hmp->voldata.sroot_blockset.blockref[0];
2204                                 count = HAMMER2_SET_COUNT;
2205                                 break;
2206                         default:
2207                                 base = NULL;
2208                                 panic("hammer2_chain_get: "
2209                                       "unrecognized blockref type: %d",
2210                                       parent->bref.type);
2211                         }
2212
2213                         /*
2214                          * Update the blockrefs.
2215                          */
2216                         next = SPLAY_MIN(hammer2_chain_splay, &parent->shead);
2217                         while ((chain = next) != NULL) {
2218                                 next = SPLAY_NEXT(hammer2_chain_splay,
2219                                                   &parent->shead, chain);
2220                                 KKASSERT(chain->index >= 0 &&
2221                                          chain->index < count);
2222                                 hammer2_chain_lock(hmp, chain,
2223                                                    HAMMER2_RESOLVE_NEVER);
2224                                 base[chain->index] = chain->bref;
2225                                 if (chain->flags & HAMMER2_CHAIN_MOVED) {
2226                                         atomic_clear_int(&chain->flags,
2227                                                  HAMMER2_CHAIN_MOVED);
2228                                         hammer2_chain_drop(hmp, chain);
2229                                 }
2230                                 hammer2_chain_unlock(hmp, chain);
2231                         }
2232                 }
2233         }
2234
2235         /*
2236          * If destroying the object we unconditonally clear the MODIFIED
2237          * and MOVED bits, and we destroy the buffer without writing it
2238          * out.
2239          *
2240          * We don't bother updating the hash/crc or the parent bref.
2241          *
2242          * XXX allocations for unflushed data can be returned to the
2243          *     free pool.
2244          */
2245         if (parent->flags & HAMMER2_CHAIN_DESTROYED) {
2246                 if (parent->flags & HAMMER2_CHAIN_MODIFIED) {
2247                         if (parent->bp) {
2248                                 parent->bp->b_flags |= B_INVAL|B_RELBUF;
2249                         }
2250                         atomic_clear_int(&parent->flags,
2251                                          HAMMER2_CHAIN_MODIFIED);
2252                         hammer2_chain_drop(hmp, parent);
2253                 }
2254                 if (parent->flags & HAMMER2_CHAIN_MODIFIED_AUX) {
2255                         atomic_clear_int(&parent->flags,
2256                                          HAMMER2_CHAIN_MODIFIED_AUX);
2257                 }
2258                 if (parent->flags & HAMMER2_CHAIN_MOVED) {
2259                         atomic_clear_int(&parent->flags,
2260                                          HAMMER2_CHAIN_MOVED);
2261                         hammer2_chain_drop(hmp, parent);
2262                 }
2263                 return;
2264         }
2265
2266         /*
2267          * Flush this chain entry only if it is marked modified.
2268          */
2269         if ((parent->flags & (HAMMER2_CHAIN_MODIFIED |
2270                               HAMMER2_CHAIN_MODIFIED_AUX)) == 0) {
2271                 goto done;
2272         }
2273
2274         /*
2275          * Clear MODIFIED and set HAMMER2_CHAIN_MOVED.  The caller
2276          * will re-test the MOVED bit.
2277          *
2278          * bits own a single parent ref and the MOVED bit owns its own
2279          * parent ref.
2280          */
2281         if (parent->flags & HAMMER2_CHAIN_MODIFIED) {
2282                 atomic_clear_int(&parent->flags, HAMMER2_CHAIN_MODIFIED);
2283                 if (parent->flags & HAMMER2_CHAIN_MOVED) {
2284                         hammer2_chain_drop(hmp, parent);
2285                 } else {
2286                         /* inherit ref from the MODIFIED we cleared */
2287                         atomic_set_int(&parent->flags, HAMMER2_CHAIN_MOVED);
2288                 }
2289         }
2290         atomic_clear_int(&parent->flags, HAMMER2_CHAIN_MODIFIED_AUX);
2291
2292         /*
2293          * If this is part of a recursive flush we can go ahead and write
2294          * out the buffer cache buffer and pass a new bref back up the chain.
2295          *
2296          * This will never be a volume header.
2297          */
2298         switch(parent->bref.type) {
2299         case HAMMER2_BREF_TYPE_VOLUME:
2300                 /*
2301                  * The volume header is flushed manually by the syncer, not
2302                  * here.
2303                  */
2304                 break;
2305         case HAMMER2_BREF_TYPE_DATA:
2306                 /*
2307                  * Data elements have already been flushed via the logical
2308                  * file buffer cache.  Their hash was set in the bref by
2309                  * the vop_write code.
2310                  *
2311                  * Make sure the buffer(s) have been flushed out here.
2312                  */
2313 #if 1
2314                 bbytes = parent->bytes;
2315                 pbase = parent->bref.data_off & ~(hammer2_off_t)(bbytes - 1);
2316                 boff = parent->bref.data_off & HAMMER2_OFF_MASK & (bbytes - 1);
2317
2318                 bp = getblk(hmp->devvp, pbase, bbytes, GETBLK_NOWAIT, 0);
2319                 if (bp) {
2320                         if ((bp->b_flags & (B_CACHE | B_DIRTY)) ==
2321                             (B_CACHE | B_DIRTY)) {
2322                                 kprintf("x");
2323                                 cluster_awrite(bp);
2324                         } else {
2325                                 bp->b_flags |= B_RELBUF;
2326                                 brelse(bp);
2327                         }
2328                 }
2329 #endif
2330                 break;
2331         case HAMMER2_BREF_TYPE_INDIRECT:
2332                 /*
2333                  * Indirect blocks may be in an INITIAL state.
2334                  */
2335                 break;
2336         default:
2337                 /*
2338                  * Embedded elements have to be flushed out.
2339                  */
2340                 KKASSERT(parent->data != NULL);
2341                 bref = &parent->bref;
2342
2343                 KKASSERT((bref->data_off & HAMMER2_OFF_MASK) != 0);
2344
2345                 if (parent->bp == NULL) {
2346                         /*
2347                          * The data is embedded, we have to acquire the
2348                          * buffer cache buffer and copy the data into it.
2349                          */
2350                         if ((bbytes = parent->bytes) < HAMMER2_MINIOSIZE)
2351                                 bbytes = HAMMER2_MINIOSIZE;
2352                         pbase = bref->data_off & ~(hammer2_off_t)(bbytes - 1);
2353                         boff = bref->data_off & HAMMER2_OFF_MASK & (bbytes - 1);
2354
2355                         /*
2356                          * The getblk() optimization can only be used if the
2357                          * physical block size matches the request.
2358                          */
2359                         if (parent->bytes == bbytes) {
2360                                 bp = getblk(hmp->devvp, pbase, bbytes, 0, 0);
2361                                 error = 0;
2362                         } else {
2363                                 error = bread(hmp->devvp, pbase, bbytes, &bp);
2364                                 KKASSERT(error == 0);
2365                         }
2366                         bdata = (char *)bp->b_data + boff;
2367
2368                         /*
2369                          * Copy the data to the buffer, mark the buffer
2370                          * dirty, and convert the parent to unmodified.
2371                          */
2372                         bcopy(parent->data, bdata, parent->bytes);
2373                         bp->b_flags |= B_CLUSTEROK;
2374                         bdwrite(bp);
2375                         bp = NULL;
2376                         parent->bref.check.iscsi32.value =
2377                                 hammer2_icrc32(parent->data, parent->bytes);
2378                         if (parent->bref.type == HAMMER2_BREF_TYPE_INODE)
2379                                 ++hammer2_iod_meta_write;
2380                         else
2381                                 ++hammer2_iod_indr_write;
2382                 } else {
2383                         parent->bref.check.iscsi32.value =
2384                                 hammer2_icrc32(parent->data, parent->bytes);
2385                 }
2386         }
2387
2388         /*
2389          * Special handling
2390          */
2391         bref = &parent->bref;
2392
2393         switch(bref->type) {
2394         case HAMMER2_BREF_TYPE_VOLUME:
2395                 KKASSERT(parent->data != NULL);
2396                 KKASSERT(parent->bp == NULL);
2397
2398                 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]=
2399                         hammer2_icrc32(
2400                                 (char *)&hmp->voldata +
2401                                  HAMMER2_VOLUME_ICRC1_OFF,
2402                                 HAMMER2_VOLUME_ICRC1_SIZE);
2403                 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]=
2404                         hammer2_icrc32(
2405                                 (char *)&hmp->voldata +
2406                                  HAMMER2_VOLUME_ICRC0_OFF,
2407                                 HAMMER2_VOLUME_ICRC0_SIZE);
2408                 hmp->voldata.icrc_volheader =
2409                         hammer2_icrc32(
2410                                 (char *)&hmp->voldata +
2411                                  HAMMER2_VOLUME_ICRCVH_OFF,
2412                                 HAMMER2_VOLUME_ICRCVH_SIZE);
2413                 break;
2414         }
2415 done:
2416         if (hammer2_debug & 0x0008) {
2417                 kprintf("%*.*s} %p/%d %04x ",
2418                         info->depth, info->depth, "",
2419                         parent, parent->refs, parent->flags);
2420         }
2421 }
2422
2423 #if 0
2424 /*
2425  * PASS2 - not yet implemented (should be called only with the root chain?)
2426  */
2427 static void
2428 hammer2_chain_flush_pass2(hammer2_mount_t *hmp, hammer2_chain_t *chain)
2429 {
2430 }
2431 #endif
2432
2433 /*
2434  * Stand-alone flush.  If the chain is unable to completely flush we have
2435  * to be sure that SUBMODIFIED propagates up the parent chain.
2436  *
2437  * This routine can be called from several places but the most important
2438  * is from the hammer2_vop_reclaim() function.  We want to try to completely
2439  * clean out the inode structure to prevent disconnected inodes from
2440  * building up and blowing out the kmalloc pool.
2441  */
2442 void
2443 hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain)
2444 {
2445         hammer2_chain_t *parent;
2446         hammer2_chain_t *scan;
2447         hammer2_blockref_t *base;
2448         hammer2_flush_info_t info;
2449         int count;
2450         int reflush;
2451
2452         /*
2453          * Execute the recursive flush and handle deferrals.
2454          *
2455          * Chains can be ridiculously long (thousands deep), so to
2456          * avoid blowing out the kernel stack the recursive flush has a
2457          * depth limit.  Elements at the limit are placed on a list
2458          * for re-execution after the stack has been popped.
2459          */
2460         bzero(&info, sizeof(info));
2461         TAILQ_INIT(&info.flush_list);
2462         reflush = 1;
2463
2464         while (reflush) {
2465                 /*
2466                  * Primary recursion
2467                  */
2468                 hammer2_chain_flush_pass1(hmp, chain, &info);
2469                 reflush = 0;
2470
2471                 while ((scan = TAILQ_FIRST(&info.flush_list)) != NULL) {
2472                         /*
2473                          * Secondary recursion.  Note that a reference is
2474                          * retained from the element's presence on the
2475                          * deferral list.
2476                          */
2477                         KKASSERT(scan->flags & HAMMER2_CHAIN_DEFERRED);
2478                         TAILQ_REMOVE(&info.flush_list, scan, flush_node);
2479                         atomic_clear_int(&scan->flags, HAMMER2_CHAIN_DEFERRED);
2480
2481                         /*
2482                          * Now that we've popped back up we can do a secondary
2483                          * recursion on the deferred elements.
2484                          */
2485                         if (hammer2_debug & 0x0040)
2486                                 kprintf("defered flush %p\n", scan);
2487                         hammer2_chain_lock(hmp, scan, HAMMER2_RESOLVE_MAYBE);
2488                         hammer2_chain_flush(hmp, scan);
2489                         hammer2_chain_unlock(hmp, scan);
2490
2491                         /*
2492                          * Only flag a reflush if SUBMODIFIED is no longer
2493                          * set.  If SUBMODIFIED is set the element will just
2494                          * wind up on our flush_list again.
2495                          */
2496                         if ((scan->flags & (HAMMER2_CHAIN_SUBMODIFIED |
2497                                             HAMMER2_CHAIN_MODIFIED |
2498                                             HAMMER2_CHAIN_MODIFIED_AUX)) == 0) {
2499                                 reflush = 1;
2500                         }
2501                         hammer2_chain_drop(hmp, scan);
2502                 }
2503                 if ((hammer2_debug & 0x0040) && reflush)
2504                         kprintf("reflush %p\n", chain);
2505         }
2506
2507         /*
2508          * The SUBMODIFIED bit must propagate upward if the chain could not
2509          * be completely flushed.
2510          */
2511         if (chain->flags & (HAMMER2_CHAIN_SUBMODIFIED |
2512                             HAMMER2_CHAIN_MODIFIED |
2513                             HAMMER2_CHAIN_MODIFIED_AUX |
2514                             HAMMER2_CHAIN_MOVED)) {
2515                 hammer2_chain_parent_setsubmod(hmp, chain);
2516         }
2517
2518         /*
2519          * If the only thing left is a simple bref update try to
2520          * pro-actively update the parent, otherwise return early.
2521          */
2522         parent = chain->parent;
2523         if (parent == NULL ||
2524             chain->bref.type != HAMMER2_BREF_TYPE_INODE ||
2525             (chain->flags & (HAMMER2_CHAIN_SUBMODIFIED |
2526                              HAMMER2_CHAIN_MODIFIED |
2527                              HAMMER2_CHAIN_MODIFIED_AUX |
2528                              HAMMER2_CHAIN_MOVED)) != HAMMER2_CHAIN_MOVED) {
2529                 return;
2530         }
2531
2532         /*
2533          * We are locking backwards so allow the lock to fail
2534          */
2535         if (lockmgr(&parent->lk, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
2536                 return;
2537         }
2538
2539         /*
2540          * We are updating brefs but we have to call chain_modify() w/
2541          * setsubmod = TRUE because our caller is not a recursive
2542          * flush.
2543          */
2544         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_MAYBE);
2545         hammer2_chain_modify(hmp, parent, 0);
2546
2547         switch(parent->bref.type) {
2548         case HAMMER2_BREF_TYPE_INODE:
2549                 base = &parent->data->ipdata.u.blockset.
2550                         blockref[0];
2551                 count = HAMMER2_SET_COUNT;
2552                 break;
2553         case HAMMER2_BREF_TYPE_INDIRECT:
2554                 base = &parent->data->npdata.blockref[0];
2555                 count = parent->bytes /
2556                         sizeof(hammer2_blockref_t);
2557                 break;
2558         case HAMMER2_BREF_TYPE_VOLUME:
2559                 base = &hmp->voldata.sroot_blockset.blockref[0];
2560                 count = HAMMER2_SET_COUNT;
2561                 break;
2562         default:
2563                 base = NULL;
2564                 panic("hammer2_chain_flush: "
2565                       "unrecognized blockref type: %d",
2566                       parent->bref.type);
2567         }
2568
2569         /*
2570          * Update the blockref in the parent
2571          */
2572         KKASSERT(chain->index >= 0 &&
2573                  chain->index < count);
2574         base[chain->index] = chain->bref;
2575         if (chain->flags & HAMMER2_CHAIN_MOVED) {
2576                 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MOVED);
2577                 hammer2_chain_drop(hmp, chain);
2578         }
2579
2580         lockmgr(&parent->lk, LK_RELEASE);       /* release manual lockmgr op */
2581         hammer2_chain_unlock(hmp, parent);
2582 }