2 * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * This subsystem handles direct and indirect block searches, recursions,
37 * creation, and deletion. Chains of blockrefs are tracked and modifications
38 * are flag for propagation... eventually all the way back to the volume
42 #include <sys/cdefs.h>
43 #include <sys/cdefs.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/types.h>
52 SPLAY_GENERATE(hammer2_chain_splay, hammer2_chain, snode, hammer2_chain_cmp);
54 static int hammer2_indirect_optimize; /* XXX SYSCTL */
56 static hammer2_chain_t *hammer2_chain_create_indirect(
57 hammer2_mount_t *hmp, hammer2_chain_t *parent,
58 hammer2_key_t key, int keybits);
61 * Compare function for chain splay tree
64 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2)
66 return(chain2->index - chain1->index);
70 * Allocate a new disconnected chain element representing the specified
71 * bref. The chain element is locked exclusively and refs is set to 1.
73 * This essentially allocates a system memory structure representing one
74 * of the media structure types, including inodes.
77 hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_blockref_t *bref)
79 hammer2_chain_t *chain;
81 hammer2_indblock_t *np;
83 u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
86 * Construct the appropriate system structure.
89 case HAMMER2_BREF_TYPE_INODE:
90 ip = kmalloc(sizeof(*ip), hmp->minode, M_WAITOK | M_ZERO);
93 lockinit(&chain->lk, "inode", 0, LK_CANRECURSE);
96 case HAMMER2_BREF_TYPE_INDIRECT:
97 if (bytes == HAMMER2_PBUFSIZE) {
98 np = kmalloc(sizeof(*np),
103 np = kmalloc(offsetof(struct hammer2_indblock,
111 lockinit(&chain->lk, "iblk", 0, LK_CANRECURSE);
113 case HAMMER2_BREF_TYPE_DATA:
114 dp = kmalloc(sizeof(*dp), hmp->mchain, M_WAITOK | M_ZERO);
117 lockinit(&chain->lk, "dblk", 0, LK_CANRECURSE);
119 case HAMMER2_BREF_TYPE_VOLUME:
121 panic("hammer2_chain_alloc volume type illegal for op");
124 panic("hammer2_chain_alloc: unrecognized blockref type: %d",
128 chain->index = -1; /* not yet assigned */
130 chain->bytes = bytes;
131 lockmgr(&chain->lk, LK_EXCLUSIVE);
137 * Free a disconnected chain element
140 hammer2_chain_free(hammer2_mount_t *hmp, hammer2_chain_t *chain)
144 KKASSERT(chain->bp == NULL);
145 KKASSERT(chain->data == NULL);
146 KKASSERT(chain->bref.type != HAMMER2_BREF_TYPE_INODE ||
147 chain->u.ip->vp == NULL);
149 if ((mem = chain->u.mem) != NULL) {
151 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
152 kfree(mem, hmp->minode);
154 kfree(mem, hmp->mchain);
159 * Add a reference to a chain element (for shared access). The chain
160 * element must already have at least 1 ref controlled by the caller.
163 hammer2_chain_ref(hammer2_mount_t *hmp, hammer2_chain_t *chain)
165 KKASSERT(chain->refs > 0);
166 atomic_add_int(&chain->refs, 1);
170 * Drop the callers reference to the chain element. If the ref count
171 * reaches zero the chain element and its related structure (typically an
172 * inode or indirect block) will be freed and the parent will be
173 * recursively dropped.
175 * Modified elements hold an additional reference so it should not be
176 * possible for the count on a modified element to drop to 0.
178 * The chain element must NOT be locked by the caller.
180 * The parent might or might not be locked by the caller but if so it
181 * will also be referenced so we shouldn't recurse upward.
184 hammer2_chain_drop(hammer2_mount_t *hmp, hammer2_chain_t *chain)
186 hammer2_chain_t *parent;
194 KKASSERT(chain != &hmp->vchain);
195 parent = chain->parent;
197 lockmgr(&parent->lk, LK_EXCLUSIVE);
198 if (atomic_cmpset_int(&chain->refs, 1, 0)) {
200 * Succeeded, recurse and drop parent
202 if (!(chain->flags & HAMMER2_CHAIN_DELETED)) {
203 SPLAY_REMOVE(hammer2_chain_splay,
204 &parent->shead, chain);
205 atomic_set_int(&chain->flags,
206 HAMMER2_CHAIN_DELETED);
207 /* parent refs dropped via recursion */
209 chain->parent = NULL;
211 lockmgr(&parent->lk, LK_RELEASE);
212 hammer2_chain_free(hmp, chain);
214 /* recurse on parent */
217 lockmgr(&parent->lk, LK_RELEASE);
218 /* retry the same chain */
221 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) {
223 * Succeeded, count did not reach zero so
224 * cut out of the loop.
228 /* retry the same chain */
234 * Lock a chain element, acquiring its data with I/O if necessary.
236 * Returns 0 on success or an error code if the data could not be acquired.
237 * The chain element is locked either way.
239 * chain->data will be pointed either at the embedded data (e.g. for
240 * inodes), in which case the buffer cache buffer is released, or will
241 * point into the bp->b_data buffer with the bp left intact while locked.
244 hammer2_chain_lock(hammer2_mount_t *hmp, hammer2_chain_t *chain)
246 hammer2_blockref_t *bref;
247 hammer2_off_t off_hi;
253 * Lock the element. Under certain conditions this might end up
254 * being a recursive lock.
256 KKASSERT(chain->refs > 0);
257 lockmgr(&chain->lk, LK_EXCLUSIVE);
260 * The volume header is a special case
262 if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME)
266 * bp must be NULL, so if the data pointer is valid here it points
267 * to embedded data and no I/O is necessary (whether modified or not).
269 KKASSERT(chain->bp == NULL);
274 * If data is NULL we must issue I/O. Any error returns the error
275 * code but leaves the chain locked.
277 * If the chain was modified a new bref will have already been
278 * allocated and its related bp is probably still sitting in the
283 off_hi = bref->data_off & HAMMER2_OFF_MASK_HI;
284 off_lo = (size_t)bref->data_off & HAMMER2_OFF_MASK_LO;
285 KKASSERT(off_hi != 0);
286 error = cluster_read(hmp->devvp,
287 hmp->voldata.volu_size, off_hi,
289 HAMMER2_PBUFSIZE, HAMMER2_PBUFSIZE*8,
293 * Even though this can be synthesized from bref->data_off we
294 * store it in the in-memory chain structure for convenience.
297 (1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX))) {
298 panic("hammer2_chain_lock: chain->bytes mismatch");
302 kprintf("hammer2_chain_get: I/O error %016jx: %d\n",
303 (intmax_t)off_hi, error);
310 * Setup the data pointer, either pointing it to an embedded data
311 * structure and copying the data from the buffer, or pointint it
314 * The buffer is not retained when copying to an embedded data
315 * structure in order to avoid potential deadlocks or recursions
316 * on the same physical buffer.
318 switch (bref->type) {
319 case HAMMER2_BREF_TYPE_VOLUME:
321 * Copy data from bp to embedded buffer
323 KKASSERT(0); /* not yet - have mount use this soon */
324 KKASSERT(off_hi == 0);
325 bcopy((char *)chain->bp->b_data + off_lo,
326 &hmp->voldata, HAMMER2_PBUFSIZE);
327 chain->data = (void *)&hmp->voldata;
331 case HAMMER2_BREF_TYPE_INODE:
333 * Copy data from bp to embedded buffer.
335 bcopy((char *)chain->bp->b_data + off_lo,
336 &chain->u.ip->ip_data,
337 HAMMER2_INODE_BYTES);
338 chain->data = (void *)&chain->u.ip->ip_data;
342 case HAMMER2_BREF_TYPE_INDIRECT:
344 * Indirect node data may or may not be embedded depending
345 * on how much there is.
347 if (chain->u.np->is_embedded) {
348 bcopy((char *)chain->bp->b_data + off_lo,
351 chain->data = (void *)&chain->u.np->buf[0];
361 data = (char *)chain->bp->b_data + off_lo;
369 * Resize the chain's physical storage allocation. Chains can be resized
370 * smaller without reallocating the storage. Resizing larger will reallocate
374 hammer2_chain_resize(hammer2_mount_t *hmp, hammer2_chain_t *chain, int nradix)
376 hammer2_chain_t *parent;
384 * Only data blocks can be resized for now
386 KKASSERT(chain != &hmp->vchain);
387 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA ||
388 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT);
391 * Nothing to do if the element is already the proper size
393 obytes = chain->bytes;
394 nbytes = 1 << nradix;
395 if (obytes == nbytes)
399 * A deleted inode may still be active but unreachable via sync
400 * because it has been disconnected from the tree. Do not allow
401 * deleted inodes to be marked as being modified because this will
402 * bump the refs and never get resolved by the sync, leaving the
403 * inode structure allocated after umount.
405 if ((chain->flags & HAMMER2_CHAIN_DELETED) &&
406 chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
407 KKASSERT(chain->data != NULL);
412 * Set MODIFIED1 and add a chain ref to prevent destruction. Both
413 * modified flags share the same ref.
415 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
416 if ((chain->flags & HAMMER2_CHAIN_MODIFIED2) == 0)
417 hammer2_chain_ref(hmp, chain);
419 if (nbytes < obytes) {
421 * If we are making it smaller we don't have to reallocate
424 chain->bref.data_off &= ~ HAMMER2_OFF_MASK_RADIX;
425 chain->bref.data_off |= (nradix & HAMMER2_OFF_MASK_RADIX);
426 chain->bytes = nbytes;
431 if (chain != &hmp->vchain) {
432 chain->bref.data_off =
433 hammer2_freemap_alloc(hmp, chain->bref.type, nbytes);
434 chain->bytes = nbytes;
436 /* XXX failed allocation */
438 switch(chain->bref.type) {
439 case HAMMER2_BREF_TYPE_VOLUME: /* embedded */
440 case HAMMER2_BREF_TYPE_INODE: /* embedded */
442 * data points to embedded structure, no copy needed
446 case HAMMER2_BREF_TYPE_INDIRECT:
447 panic("hammer2_chain_resize: "
448 "cannot resize indirect block");
451 case HAMMER2_BREF_TYPE_DATA:
453 * data (if not NULL) points into original bp or
454 * to embedded data. Copy-on-write to new block.
456 KKASSERT(chain != &hmp->vchain); /* safety */
457 if (nbytes == HAMMER2_PBUFSIZE) {
458 nbp = getblk(hmp->devvp,
459 chain->bref.data_off &
461 HAMMER2_PBUFSIZE, 0, 0);
465 error = bread(hmp->devvp,
466 chain->bref.data_off &
468 HAMMER2_PBUFSIZE, &nbp);
469 KKASSERT(error == 0);/* XXX handle error */
473 * The new block may be smaller or larger than the
474 * old block, only copy what fits.
476 ndata = nbp->b_data + (chain->bref.data_off &
477 HAMMER2_OFF_MASK_LO);
480 bcopy(chain->data, ndata, nbytes);
482 bcopy(chain->data, ndata, obytes);
483 KKASSERT(chain->bp != NULL);
490 panic("hammer2_chain_modify: unknown bref type");
497 * Recursively mark the parent chain elements so flushes can find
500 * NOTE: The flush code will modify a SUBMODIFIED-flagged chain
501 * during the flush recursion after clearing the parent's
502 * SUBMODIFIED bit. We don't want to re-set the parent's
503 * SUBMODIFIED bit in this case!
505 if ((chain->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
506 parent = chain->parent;
508 (parent->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
509 atomic_set_int(&parent->flags,
510 HAMMER2_CHAIN_SUBMODIFIED);
511 parent = parent->parent;
517 * Convert a locked chain that was retrieved read-only to read-write.
519 * If not already marked modified a new physical block will be allocated
520 * and assigned to the bref.
522 * allocated->modified (without calling hammer2_chain_lock()) results
523 * in chain->data typically being NULL. In this situation chain->data
524 * is assigned and the target area is zero'd out.
526 * If the data is pointing into a bp it will be relocated to a new bp.
527 * If the data is embedded we leave it alone for now.
530 hammer2_chain_modify(hammer2_mount_t *hmp, hammer2_chain_t *chain)
532 hammer2_chain_t *parent;
538 * Setting the DIRTYBP flag will cause the buffer to be dirtied or
539 * written-out on unlock. This bit is independent of the MODIFIED1
540 * bit because the chain may still need meta-data adjustments done
541 * by virtue of MODIFIED1 for its parent, and the buffer can be
542 * flushed out (possibly multiple times) by the OS before that.
544 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DIRTYBP);
547 * If the chain is already marked MODIFIED1 we can just return.
549 if (chain->flags & HAMMER2_CHAIN_MODIFIED1) {
550 KKASSERT(chain->data != NULL);
555 * A deleted inode may still be active but unreachable via sync
556 * because it has been disconnected from the tree. Do not allow
557 * deleted inodes to be marked as being modified because this will
558 * bump the refs and never get resolved by the sync, leaving the
559 * inode structure allocated after umount.
561 if ((chain->flags & HAMMER2_CHAIN_DELETED) &&
562 chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
563 KKASSERT(chain->data != NULL);
568 * Set MODIFIED1 and add a chain ref to prevent destruction. Both
569 * modified flags share the same ref.
571 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
572 if ((chain->flags & HAMMER2_CHAIN_MODIFIED2) == 0)
573 hammer2_chain_ref(hmp, chain);
576 * We must allocate the copy-on-write block.
578 * If the data is embedded no other action is required.
580 * If the data is not embedded we acquire and clear the
581 * new block. If chain->data is not NULL we then do the
582 * copy-on-write. chain->data will then be repointed to the new
583 * buffer and the old buffer will be released.
585 * For newly created elements with no prior allocation we go
586 * through the copy-on-write steps except without the copying part.
588 if (chain != &hmp->vchain) {
589 if (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)
590 kprintf("Replace %d\n", chain->bytes);
591 chain->bref.data_off =
592 hammer2_freemap_alloc(hmp, chain->bref.type, chain->bytes);
593 /* XXX failed allocation */
596 switch(chain->bref.type) {
597 case HAMMER2_BREF_TYPE_VOLUME: /* embedded */
598 case HAMMER2_BREF_TYPE_INODE: /* embedded */
600 * Inode and Volume data already points to the embedded
601 * structure, no copy is needed
605 case HAMMER2_BREF_TYPE_INDIRECT:
607 * If the indirect data is embedded we just leave it
608 * in its embedded space, otherwise fall-through to
609 * the bp-handling code.
611 * If this is a newly allocated block chain->data will
612 * be NULL, so make sure it is properly assigned. In
613 * this case the embedded space has already been zero'd
616 if (chain->u.np->is_embedded) {
617 chain->data = (void *)&chain->u.np->buf[0];
622 case HAMMER2_BREF_TYPE_DATA:
624 * data (if not NULL) points into original bp or to embedded
625 * data, copy-on-write to the new block.
627 * data (if NULL) indicates that no prior copy exists, the
628 * storage must be zero'd.
630 KKASSERT(chain != &hmp->vchain); /* safety */
631 if (chain->bytes == HAMMER2_PBUFSIZE) {
632 nbp = getblk(hmp->devvp,
633 chain->bref.data_off & HAMMER2_OFF_MASK_HI,
634 HAMMER2_PBUFSIZE, 0, 0);
636 * XXX want to set B_CACHE but not bother to
637 * zero because it will be zero'd below?
642 error = bread(hmp->devvp,
643 chain->bref.data_off & HAMMER2_OFF_MASK_HI,
644 HAMMER2_PBUFSIZE, &nbp);
645 KKASSERT(error == 0);/* XXX handle error */
649 * Copy or zero-fill on write depending on whether
650 * chain->data exists or not.
652 ndata = nbp->b_data + (chain->bref.data_off &
653 HAMMER2_OFF_MASK_LO);
655 bcopy(chain->data, ndata, chain->bytes);
656 KKASSERT(chain->bp != NULL);
659 bzero(ndata, chain->bytes);
665 panic("hammer2_chain_modify: unknown bref type");
671 * Recursively mark the parent chain elements so flushes can find
674 * NOTE: The flush code will modify a SUBMODIFIED-flagged chain
675 * during the flush recursion after clearing the parent's
676 * SUBMODIFIED bit. We don't want to re-set the parent's
677 * SUBMODIFIED bit in this case!
679 if ((chain->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
680 parent = chain->parent;
682 (parent->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
683 atomic_set_int(&parent->flags,
684 HAMMER2_CHAIN_SUBMODIFIED);
685 parent = parent->parent;
691 * Unlock a chain element without dropping its reference count.
692 * (see hammer2_chain_put() to do both).
694 * Non-embedded data references (chain->bp != NULL) are returned to the
695 * system and the data field is cleared in that case. If modified the
696 * dirty buffer is still returned to the system, can be flushed to disk by
697 * the system at any time, and will be reconstituted/re-read as needed.
700 hammer2_chain_unlock(hammer2_mount_t *hmp, hammer2_chain_t *chain)
704 if (chain->flags & HAMMER2_CHAIN_DIRTYBP) {
705 if (chain->flags & HAMMER2_CHAIN_IOFLUSH) {
706 atomic_clear_int(&chain->flags,
707 HAMMER2_CHAIN_IOFLUSH);
708 chain->bp->b_flags |= B_RELBUF;
714 if (chain->flags & HAMMER2_CHAIN_IOFLUSH) {
715 atomic_clear_int(&chain->flags,
716 HAMMER2_CHAIN_IOFLUSH);
717 chain->bp->b_flags |= B_RELBUF;
720 /* bp might still be dirty */
726 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DIRTYBP);
727 lockmgr(&chain->lk, LK_RELEASE);
731 * Locate an in-memory chain. The parent must be locked. The in-memory
732 * chain is returned or NULL if no in-memory chain is present.
734 * NOTE: A chain on-media might exist for this index when NULL is returned.
737 hammer2_chain_find(hammer2_mount_t *hmp, hammer2_chain_t *parent, int index)
739 hammer2_chain_t dummy;
740 hammer2_chain_t *chain;
743 chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead, &dummy);
748 * Return a locked chain structure with all associated data acquired.
750 * Caller must lock the parent on call, the returned child will be locked.
753 hammer2_chain_get(hammer2_mount_t *hmp, hammer2_chain_t *parent,
754 int index, int flags)
756 hammer2_blockref_t *bref;
757 hammer2_chain_t *chain;
758 hammer2_chain_t dummy;
761 * First see if we have a (possibly modified) chain element cached
762 * for this (parent, index). Acquire the data if necessary.
764 * If chain->data is non-NULL the chain should already be marked
768 chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead, &dummy);
770 hammer2_chain_ref(hmp, chain);
771 if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
772 hammer2_chain_lock(hmp, chain);
777 * Otherwise lookup the bref and issue I/O (switch on the parent)
779 switch(parent->bref.type) {
780 case HAMMER2_BREF_TYPE_INODE:
781 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
782 bref = &parent->data->ipdata.u.blockset.blockref[index];
784 case HAMMER2_BREF_TYPE_INDIRECT:
785 KKASSERT(index >= 0 &&
786 index < parent->bytes / sizeof(hammer2_blockref_t));
787 bref = &parent->data->npdata.blockref[index];
789 case HAMMER2_BREF_TYPE_VOLUME:
790 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
791 bref = &hmp->voldata.sroot_blockset.blockref[index];
795 panic("hammer2_chain_get: unrecognized blockref type: %d",
800 * Allocate a chain structure representing the existing media
801 * entry. Thus the chain is *not* INITIAL and certainly not
804 chain = hammer2_chain_alloc(hmp, bref);
807 * Link the chain into its parent. Caller is expected to hold an
808 * exclusive lock on the parent.
810 chain->parent = parent;
811 chain->index = index;
812 if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain))
813 panic("hammer2_chain_link: collision");
814 KKASSERT(parent->refs > 1);
815 atomic_add_int(&parent->refs, 1); /* for splay entry */
818 * Additional linkage for inodes. Reuse the parent pointer to
819 * find the parent directory.
821 if (bref->type == HAMMER2_BREF_TYPE_INODE) {
822 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT)
823 parent = parent->parent;
824 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE)
825 chain->u.ip->pip = parent->u.ip;
829 * Our new chain structure has already been referenced and locked
830 * but the lock code handles the I/O so call it to resolve the data.
831 * Then release one of our two exclusive locks.
833 * If NOLOCK is set the release will release the one-and-only lock.
835 if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
836 hammer2_chain_lock(hmp, chain);
837 lockmgr(&chain->lk, LK_RELEASE);
843 * Unlock and dereference a chain after use. It is possible for this to
844 * recurse up the chain.
847 hammer2_chain_put(hammer2_mount_t *hmp, hammer2_chain_t *chain)
849 hammer2_chain_unlock(hmp, chain);
850 hammer2_chain_drop(hmp, chain);
854 * Locate any key between key_beg and key_end inclusive. (*parentp)
855 * typically points to an inode but can also point to a related indirect
856 * block and this function will recurse upwards and find the inode again.
858 * WARNING! THIS DOES NOT RETURN KEYS IN LOGICAL KEY ORDER! ANY KEY
859 * WITHIN THE RANGE CAN BE RETURNED. HOWEVER, AN ITERATION
860 * WHICH PICKS UP WHERE WE LEFT OFF WILL CONTINUE THE SCAN.
862 * (*parentp) must be exclusively locked and referenced and can be an inode
863 * or an existing indirect block within the inode.
865 * On return (*parentp) will be modified to point at the deepest parent chain
866 * element encountered during the search, as a helper for an insertion or
867 * deletion. The new (*parentp) will be locked and referenced and the old
868 * will be unlocked and dereferenced (no change if they are both the same).
870 * The matching chain will be returned exclusively locked and referenced.
872 * NULL is returned if no match was found, but (*parentp) will still
873 * potentially be adjusted.
875 * This function will also recurse up the chain if the key is not within the
876 * current parent's range. (*parentp) can never be set to NULL. An iteration
877 * can simply allow (*parentp) to float inside the loop.
880 hammer2_chain_lookup(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
881 hammer2_key_t key_beg, hammer2_key_t key_end,
884 hammer2_chain_t *parent;
885 hammer2_chain_t *chain;
886 hammer2_chain_t *tmp;
887 hammer2_blockref_t *base;
888 hammer2_blockref_t *bref;
889 hammer2_key_t scan_beg;
890 hammer2_key_t scan_end;
895 * Recurse (*parentp) upward if necessary until the parent completely
896 * encloses the key range or we hit the inode.
899 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
900 scan_beg = parent->bref.key;
901 scan_end = scan_beg +
902 ((hammer2_key_t)1 << parent->bref.keybits) - 1;
903 if (key_beg >= scan_beg && key_end <= scan_end)
905 hammer2_chain_unlock(hmp, parent);
906 parent = parent->parent;
907 hammer2_chain_ref(hmp, parent); /* ref new parent */
908 hammer2_chain_lock(hmp, parent); /* lock new parent */
909 hammer2_chain_drop(hmp, *parentp); /* drop old parent */
910 *parentp = parent; /* new parent */
915 * Locate the blockref array. Currently we do a fully associative
916 * search through the array.
918 switch(parent->bref.type) {
919 case HAMMER2_BREF_TYPE_INODE:
921 * Special shortcut for embedded data returns the inode
922 * itself. Callers must detect this condition and access
923 * the embedded data (the strategy code does this for us).
925 * This is only applicable to regular files and softlinks.
927 if (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) {
928 hammer2_chain_ref(hmp, parent);
929 if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
930 hammer2_chain_lock(hmp, parent);
933 base = &parent->data->ipdata.u.blockset.blockref[0];
934 count = HAMMER2_SET_COUNT;
936 case HAMMER2_BREF_TYPE_INDIRECT:
937 if (parent->data == NULL)
938 panic("parent->data is NULL");
939 base = &parent->data->npdata.blockref[0];
940 count = parent->bytes / sizeof(hammer2_blockref_t);
942 case HAMMER2_BREF_TYPE_VOLUME:
943 base = &hmp->voldata.sroot_blockset.blockref[0];
944 count = HAMMER2_SET_COUNT;
947 panic("hammer2_chain_lookup: unrecognized blockref type: %d",
949 base = NULL; /* safety */
950 count = 0; /* safety */
954 * If the element and key overlap we use the element.
957 for (i = 0; i < count; ++i) {
958 tmp = hammer2_chain_find(hmp, parent, i);
959 bref = (tmp) ? &tmp->bref : &base[i];
962 scan_beg = bref->key;
963 scan_end = scan_beg + ((hammer2_key_t)1 << bref->keybits) - 1;
964 if (key_beg <= scan_end && key_end >= scan_beg)
968 if (key_beg == key_end)
970 return (hammer2_chain_next(hmp, parentp, NULL,
971 key_beg, key_end, flags));
975 * Acquire the new chain element. If the chain element is an
976 * indirect block we must search recursively.
978 chain = hammer2_chain_get(hmp, parent, i, flags);
983 * If the chain element is an indirect block it becomes the new
984 * parent and we loop on it. We must fixup the chain we loop on
985 * if the caller passed flags to us that aren't sufficient for our
988 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
989 hammer2_chain_put(hmp, parent);
990 *parentp = parent = chain;
991 if (flags & HAMMER2_LOOKUP_NOLOCK)
992 hammer2_chain_lock(hmp, chain);
997 * All done, return chain
1003 * After having issued a lookup we can iterate all matching keys.
1005 * If chain is non-NULL we continue the iteration from just after it's index.
1007 * If chain is NULL we assume the parent was exhausted and continue the
1008 * iteration at the next parent.
1011 hammer2_chain_next(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
1012 hammer2_chain_t *chain,
1013 hammer2_key_t key_beg, hammer2_key_t key_end,
1016 hammer2_chain_t *parent;
1017 hammer2_chain_t *tmp;
1018 hammer2_blockref_t *base;
1019 hammer2_blockref_t *bref;
1020 hammer2_key_t scan_beg;
1021 hammer2_key_t scan_end;
1029 * Calculate the next index and recalculate the parent if necessary.
1033 * Continue iteration within current parent. If not NULL
1034 * the passed-in chain may or may not be locked, based on
1035 * the LOOKUP_NOLOCK flag (passed in as returned from lookup
1038 i = chain->index + 1;
1039 if (flags & HAMMER2_LOOKUP_NOLOCK)
1040 hammer2_chain_drop(hmp, chain);
1042 hammer2_chain_put(hmp, chain);
1045 * Any scan where the lookup returned degenerate data embedded
1046 * in the inode has an invalid index and must terminate.
1048 if (chain == parent)
1051 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT) {
1053 * We reached the end of the iteration.
1058 * Continue iteration with next parent unless the current
1059 * parent covers the range.
1061 hammer2_chain_t *nparent;
1063 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT)
1066 scan_beg = parent->bref.key;
1067 scan_end = scan_beg +
1068 ((hammer2_key_t)1 << parent->bref.keybits) - 1;
1069 if (key_beg >= scan_beg && key_end <= scan_end)
1072 i = parent->index + 1;
1073 nparent = parent->parent;
1074 hammer2_chain_ref(hmp, nparent); /* ref new parent */
1075 hammer2_chain_unlock(hmp, parent);
1076 hammer2_chain_lock(hmp, nparent); /* lock new parent */
1077 hammer2_chain_drop(hmp, parent); /* drop old parent */
1078 *parentp = parent = nparent;
1083 * Locate the blockref array. Currently we do a fully associative
1084 * search through the array.
1086 switch(parent->bref.type) {
1087 case HAMMER2_BREF_TYPE_INODE:
1088 base = &parent->data->ipdata.u.blockset.blockref[0];
1089 count = HAMMER2_SET_COUNT;
1091 case HAMMER2_BREF_TYPE_INDIRECT:
1092 base = &parent->data->npdata.blockref[0];
1093 count = parent->bytes / sizeof(hammer2_blockref_t);
1095 case HAMMER2_BREF_TYPE_VOLUME:
1096 base = &hmp->voldata.sroot_blockset.blockref[0];
1097 count = HAMMER2_SET_COUNT;
1100 panic("hammer2_chain_next: unrecognized blockref type: %d",
1102 base = NULL; /* safety */
1103 count = 0; /* safety */
1106 KKASSERT(i <= count);
1109 * Look for the key. If we are unable to find a match and an exact
1110 * match was requested we return NULL. If a range was requested we
1111 * run hammer2_chain_next() to iterate.
1115 tmp = hammer2_chain_find(hmp, parent, i);
1116 bref = (tmp) ? &tmp->bref : &base[i];
1117 if (bref->type == 0) {
1121 scan_beg = bref->key;
1122 scan_end = scan_beg + ((hammer2_key_t)1 << bref->keybits) - 1;
1123 if (key_beg <= scan_end && key_end >= scan_beg)
1129 * If we couldn't find a match recurse up a parent to continue the
1136 * Acquire the new chain element. If the chain element is an
1137 * indirect block we must search recursively.
1139 chain = hammer2_chain_get(hmp, parent, i, flags);
1144 * If the chain element is an indirect block it becomes the new
1145 * parent and we loop on it. We may have to lock the chain when
1146 * cycling it in as the new parent as it will not be locked if the
1147 * caller passed NOLOCK.
1149 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
1150 hammer2_chain_put(hmp, parent);
1151 *parentp = parent = chain;
1152 if (flags & HAMMER2_LOOKUP_NOLOCK)
1153 hammer2_chain_lock(hmp, chain);
1159 * All done, return chain
1165 * Create and return a new hammer2 system memory structure of the specified
1166 * key, type and size and insert it RELATIVE TO (PARENT).
1168 * (parent) is typically either an inode or an indirect block, acquired
1169 * acquired as a side effect of issuing a prior failed lookup. parent
1170 * must be locked and held. Do not pass the inode chain to this function
1171 * unless that is the chain returned by the failed lookup.
1173 * Non-indirect types will automatically allocate indirect blocks as required
1174 * if the new item does not fit in the current (parent).
1176 * Indirect types will move a portion of the existing blockref array in
1177 * (parent) into the new indirect type and then use one of the free slots
1178 * to emplace the new indirect type.
1180 * A new locked, referenced chain element is returned of the specified type.
1181 * This element will also be marked as modified and contain a data area
1182 * ready for initialization.
1185 hammer2_chain_create(hammer2_mount_t *hmp, hammer2_chain_t *parent,
1186 hammer2_chain_t *chain,
1187 hammer2_key_t key, int keybits, int type, size_t bytes)
1189 hammer2_blockref_t dummy;
1190 hammer2_blockref_t *base;
1191 hammer2_blockref_t *bref;
1192 hammer2_chain_t dummy_chain;
1193 int unlock_parent = 0;
1198 if (chain == NULL) {
1200 * First allocate media space and construct the dummy bref,
1201 * then allocate the in-memory chain structure.
1203 bzero(&dummy, sizeof(dummy));
1206 dummy.keybits = keybits;
1207 dummy.data_off = hammer2_bytes_to_radix(bytes);
1208 chain = hammer2_chain_alloc(hmp, &dummy);
1212 * We set the WAS_MODIFIED flag here so the chain gets
1213 * marked as modified below.
1215 chain->flags |= HAMMER2_CHAIN_INITIAL |
1216 HAMMER2_CHAIN_WAS_MODIFIED;
1219 * Recalculate bytes to reflect the actual media block
1222 bytes = (hammer2_off_t)1 <<
1223 (int)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX);
1224 chain->bytes = bytes;
1227 case HAMMER2_BREF_TYPE_VOLUME:
1228 panic("hammer2_chain_create: called with volume type");
1230 case HAMMER2_BREF_TYPE_INODE:
1231 KKASSERT(bytes == HAMMER2_INODE_BYTES);
1232 chain->data = (void *)&chain->u.ip->ip_data;
1234 case HAMMER2_BREF_TYPE_INDIRECT:
1236 * May or may not be embedded (chain->data may or
1239 if (chain->u.np->is_embedded) {
1240 chain->data = (void *)&chain->u.np->buf[0];
1242 KKASSERT(chain->data == NULL);
1246 /* leave chain->data NULL */
1247 KKASSERT(chain->data == NULL);
1252 * Potentially update the chain's key/keybits, but it will
1253 * only be marked modified if WAS_MODIFIED is set (if it
1254 * was modified at the time of its removal during a rename).
1256 chain->bref.key = key;
1257 chain->bref.keybits = keybits;
1262 * Locate a free blockref in the parent's array
1264 switch(parent->bref.type) {
1265 case HAMMER2_BREF_TYPE_INODE:
1266 KKASSERT(parent->data != NULL);
1267 base = &parent->data->ipdata.u.blockset.blockref[0];
1268 count = HAMMER2_SET_COUNT;
1270 case HAMMER2_BREF_TYPE_INDIRECT:
1271 KKASSERT(parent->data != NULL);
1272 base = &parent->data->npdata.blockref[0];
1273 count = parent->bytes / sizeof(hammer2_blockref_t);
1275 case HAMMER2_BREF_TYPE_VOLUME:
1276 KKASSERT(parent->data != NULL);
1277 base = &hmp->voldata.sroot_blockset.blockref[0];
1278 count = HAMMER2_SET_COUNT;
1281 panic("hammer2_chain_create: unrecognized blockref type: %d",
1288 * Scan for an unallocated bref, also skipping any slots occupied
1289 * by in-memory chain elements that may not yet have been updated
1290 * in the parent's bref array.
1292 bzero(&dummy_chain, sizeof(dummy_chain));
1294 for (i = 0; i < count; ++i) {
1296 dummy_chain.index = i;
1297 if (bref->type == 0 &&
1298 SPLAY_FIND(hammer2_chain_splay,
1299 &parent->shead, &dummy_chain) == NULL) {
1305 * If no free blockref count be found we must create an indirect
1306 * block and move a number of blockrefs into it. With the parent
1307 * locked we can safely lock each child in order to move it without
1308 * causing a deadlock.
1310 * This may return the new indirect block or the old parent depending
1311 * on where the key falls.
1314 hammer2_chain_t *nparent;
1316 nparent = hammer2_chain_create_indirect(hmp, parent,
1318 if (nparent == NULL) {
1320 hammer2_chain_free(hmp, chain);
1324 if (parent != nparent) {
1326 hammer2_chain_put(hmp, parent);
1334 * Link the chain into its parent.
1336 if (chain->parent != NULL)
1337 panic("hammer2: hammer2_chain_create: chain already connected");
1338 KKASSERT(chain->parent == NULL);
1339 chain->parent = parent;
1341 if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain))
1342 panic("hammer2_chain_link: collision");
1343 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED);
1344 KKASSERT(parent->refs > 1);
1345 atomic_add_int(&parent->refs, 1);
1348 * Additional linkage for inodes. Reuse the parent pointer to
1349 * find the parent directory.
1351 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
1352 hammer2_chain_t *scan = parent;
1353 while (scan->bref.type == HAMMER2_BREF_TYPE_INDIRECT)
1354 scan = scan->parent;
1355 if (scan->bref.type == HAMMER2_BREF_TYPE_INODE)
1356 chain->u.ip->pip = scan->u.ip;
1360 * Mark the newly created or previously disconnected chain element
1361 * as modified and fully resolve the chain->data pointer. The
1362 * WAS_MODIFIED bit will be set in both cases.
1364 * Chain elements with embedded data will not issue I/O at this time.
1365 * A new block will be allocated for the buffer but not instantiated.
1367 * Chain elements which do not use embedded data will allocate
1368 * the new block AND instantiate its buffer cache buffer, pointing
1369 * the data at the bp.
1371 if (chain->flags & HAMMER2_CHAIN_WAS_MODIFIED) {
1372 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_WAS_MODIFIED);
1373 hammer2_chain_modify(hmp, chain);
1378 hammer2_chain_put(hmp, parent);
1383 * Create an indirect block that covers one or more of the elements in the
1384 * current parent. Either returns the existing parent with no locking or
1385 * ref changes or returns the new indirect block locked and referenced,
1386 * depending on what the specified key falls into.
1388 * The key/keybits for the indirect mode only needs to follow three rules:
1390 * (1) That all elements underneath it fit within its key space and
1392 * (2) That all elements outside it are outside its key space.
1394 * (3) When creating the new indirect block any elements in the current
1395 * parent that fit within the new indirect block's keyspace must be
1396 * moved into the new indirect block.
1398 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider
1399 * keyspace the the current parent, but lookup/iteration rules will
1400 * ensure (and must ensure) that rule (2) for all parents leading up
1401 * to the nearest inode or the root volume header is adhered to. This
1402 * is accomplished by always recursing through matching keyspaces in
1403 * the hammer2_chain_lookup() and hammer2_chain_next() API.
1405 * The current implementation calculates the current worst-case keyspace by
1406 * iterating the current parent and then divides it into two halves, choosing
1407 * whichever half has the most elements (not necessarily the half containing
1408 * the requested key).
1410 * We can also opt to use the half with the least number of elements. This
1411 * causes lower-numbered keys (aka logical file offsets) to recurse through
1412 * fewer indirect blocks and higher-numbered keys to recurse through more.
1413 * This also has the risk of not moving enough elements to the new indirect
1414 * block and being forced to create several indirect blocks before the element
1419 hammer2_chain_create_indirect(hammer2_mount_t *hmp, hammer2_chain_t *parent,
1420 hammer2_key_t create_key, int create_bits)
1422 hammer2_blockref_t *base;
1423 hammer2_blockref_t *bref;
1424 hammer2_chain_t *chain;
1425 hammer2_chain_t *ichain;
1426 hammer2_chain_t dummy;
1427 hammer2_key_t key = create_key;
1428 int keybits = create_bits;
1436 * Mark the parent modified so our base[] pointer remains valid
1437 * while we move entries.
1439 hammer2_chain_modify(hmp, parent);
1442 * Locate a free blockref in the parent's array
1444 switch(parent->bref.type) {
1445 case HAMMER2_BREF_TYPE_INODE:
1446 base = &parent->data->ipdata.u.blockset.blockref[0];
1447 count = HAMMER2_SET_COUNT;
1449 case HAMMER2_BREF_TYPE_INDIRECT:
1450 base = &parent->data->npdata.blockref[0];
1451 count = parent->bytes / sizeof(hammer2_blockref_t);
1453 case HAMMER2_BREF_TYPE_VOLUME:
1454 base = &hmp->voldata.sroot_blockset.blockref[0];
1455 count = HAMMER2_SET_COUNT;
1458 panic("hammer2_chain_create_indirect: "
1459 "unrecognized blockref type: %d",
1466 * Scan for an unallocated bref, also skipping any slots occupied
1467 * by in-memory chain elements that may not yet have been updated
1468 * in the parent's bref array.
1470 bzero(&dummy, sizeof(dummy));
1471 for (i = 0; i < count; ++i) {
1475 if (bref->type == 0) {
1477 chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead,
1481 bref = &chain->bref;
1485 * Expand our calculated key range (key, keybits) to fit
1486 * the scanned key. nkeybits represents the full range
1487 * that we will later cut in half (two halves @ nkeybits - 1).
1490 if (nkeybits < bref->keybits)
1491 nkeybits = bref->keybits;
1492 while ((~(((hammer2_key_t)1 << nkeybits) - 1) &
1493 (key ^ bref->key)) != 0) {
1498 * If the new key range is larger we have to determine
1499 * which side of the new key range the existing keys fall
1500 * under by checking the high bit, then collapsing the
1501 * locount into the hicount or vise-versa.
1503 if (keybits != nkeybits) {
1504 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) {
1515 * The newly scanned key will be in the lower half or the
1516 * higher half of the (new) key range.
1518 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key)
1525 * Adjust keybits to represent half of the full range calculated
1531 * Select whichever half contains the most elements. Theoretically
1532 * we can select either side as long as it contains at least one
1533 * element (in order to ensure that a free slot is present to hold
1534 * the indirect block).
1536 key &= ~(((hammer2_key_t)1 << keybits) - 1);
1537 if (hammer2_indirect_optimize) {
1539 * Insert node for least number of keys, this will arrange
1540 * the first few blocks of a large file or the first few
1541 * inodes in a directory with fewer indirect blocks when
1544 if (hicount < locount && hicount != 0)
1545 key |= (hammer2_key_t)1 << keybits;
1547 key &= ~(hammer2_key_t)1 << keybits;
1550 * Insert node for most number of keys, best for heavily
1553 if (hicount > locount)
1554 key |= (hammer2_key_t)1 << keybits;
1556 key &= ~(hammer2_key_t)1 << keybits;
1560 * How big should our new indirect block be? It has to be at least
1561 * as large as its parent.
1563 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE)
1564 nbytes = HAMMER2_IND_BYTES_MIN;
1566 nbytes = HAMMER2_IND_BYTES_MAX;
1567 if (nbytes < count * sizeof(hammer2_blockref_t))
1568 nbytes = count * sizeof(hammer2_blockref_t);
1571 * Ok, create our new indirect block
1573 dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT;
1574 dummy.bref.key = key;
1575 dummy.bref.keybits = keybits;
1576 dummy.bref.data_off = hammer2_bytes_to_radix(nbytes);
1577 ichain = hammer2_chain_alloc(hmp, &dummy.bref);
1578 ichain->flags |= HAMMER2_CHAIN_INITIAL;
1581 * Iterate the original parent and move the matching brefs into
1582 * the new indirect block.
1584 for (i = 0; i < count; ++i) {
1586 * For keying purposes access the bref from the media or
1587 * from our in-memory cache. In cases where the in-memory
1588 * cache overrides the media the keyrefs will be the same
1589 * anyway so we can avoid checking the cache when the media
1593 if (bref->type == 0) {
1595 chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead,
1597 if (chain == NULL) {
1599 * Select index indirect block is placed in
1601 if (ichain->index < 0)
1605 bref = &chain->bref;
1609 * Skip keys not in the chosen half (low or high), only bit
1610 * (keybits - 1) needs to be compared but for safety we
1611 * will compare all msb bits plus that bit again.
1613 if ((~(((hammer2_key_t)1 << keybits) - 1) &
1614 (key ^ bref->key)) != 0) {
1619 * This element is being moved, its slot is available
1620 * for our indirect block.
1622 if (ichain->index < 0)
1626 * Load the new indirect block by acquiring or allocating
1627 * the related chain entries, then simply move it to the
1628 * new parent (ichain).
1630 * Flagging the new chain entry MOVED will cause a flush
1631 * to synchronize its block into the new indirect block.
1632 * The chain is unlocked after being moved but needs to
1633 * retain a reference for the MOVED state
1635 * We must still set SUBMODIFIED in the parent but we do
1636 * that after the loop.
1638 * XXX we really need a lock here but we don't need the
1639 * data. NODATA feature needed.
1641 chain = hammer2_chain_get(hmp, parent, i,
1642 HAMMER2_LOOKUP_NOLOCK);
1643 SPLAY_REMOVE(hammer2_chain_splay, &parent->shead, chain);
1644 if (SPLAY_INSERT(hammer2_chain_splay, &ichain->shead, chain))
1645 panic("hammer2_chain_create_indirect: collision");
1646 chain->parent = ichain;
1647 bzero(&base[i], sizeof(base[i]));
1648 atomic_add_int(&parent->refs, -1);
1649 atomic_add_int(&ichain->refs, 1);
1650 if (chain->flags & HAMMER2_CHAIN_MOVED) {
1651 /* We don't need the ref from the chain_get */
1652 hammer2_chain_drop(hmp, chain);
1654 /* MOVED bit inherits the ref from the chain_get */
1655 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED);
1657 KKASSERT(parent->refs > 0);
1662 * Insert the new indirect block into the parent now that we've
1663 * cleared out some entries in the parent. We calculated a good
1664 * insertion index in the loop above (ichain->index).
1666 KKASSERT(ichain->index >= 0);
1667 if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, ichain))
1668 panic("hammer2_chain_create_indirect: ichain insertion");
1669 ichain->parent = parent;
1670 atomic_add_int(&parent->refs, 1);
1673 * Mark the new indirect block modified after insertion, which
1674 * will propagate up through parent all the way to the root and
1675 * also allocate the physical block in ichain for our caller,
1676 * and assign ichain->data to a pre-zero'd space (because there
1677 * is not prior data to copy into it).
1679 * We have to set SUBMODIFIED in ichain's flags manually so the
1680 * flusher knows it has to recurse through it to get to all of
1683 hammer2_chain_modify(hmp, ichain);
1684 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_SUBMODIFIED);
1687 * Figure out what to return.
1689 if (create_bits >= keybits) {
1691 * Key being created is way outside the key range,
1692 * return the original parent.
1694 hammer2_chain_put(hmp, ichain);
1695 } else if (~(((hammer2_key_t)1 << keybits) - 1) &
1696 (create_key ^ key)) {
1698 * Key being created is outside the key range,
1699 * return the original parent.
1701 hammer2_chain_put(hmp, ichain);
1704 * Otherwise its in the range, return the new parent.
1713 * Physically delete the specified chain element. Note that inodes with
1714 * open descriptors should not be deleted (as with other filesystems) until
1715 * the last open descriptor is closed.
1717 * This routine will remove the chain element from its parent and potentially
1718 * also recurse upward and delete indirect blocks which become empty as a
1721 * The caller must pass a pointer to the chain's parent, also locked and
1722 * referenced. (*parentp) will be modified in a manner similar to a lookup
1723 * or iteration when indirect blocks are also deleted as a side effect.
1726 hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t *parent,
1727 hammer2_chain_t *chain)
1729 hammer2_blockref_t *base;
1732 if (chain->parent != parent)
1733 panic("hammer2_chain_delete: parent mismatch");
1736 * Mark the parent modified so our base[] pointer remains valid
1737 * while we move entries.
1739 * Calculate the blockref reference in the parent
1741 hammer2_chain_modify(hmp, parent);
1743 switch(parent->bref.type) {
1744 case HAMMER2_BREF_TYPE_INODE:
1745 base = &parent->data->ipdata.u.blockset.blockref[0];
1746 count = HAMMER2_SET_COUNT;
1748 case HAMMER2_BREF_TYPE_INDIRECT:
1749 base = &parent->data->npdata.blockref[0];
1750 count = parent->bytes / sizeof(hammer2_blockref_t);
1752 case HAMMER2_BREF_TYPE_VOLUME:
1753 base = &hmp->voldata.sroot_blockset.blockref[0];
1754 count = HAMMER2_SET_COUNT;
1757 panic("hammer2_chain_delete: unrecognized blockref type: %d",
1764 * Disconnect the bref in the parent, remove the chain, and
1765 * disconnect in-memory fields from the parent.
1767 KKASSERT(chain->index >= 0 && chain->index < count);
1768 base += chain->index;
1769 bzero(base, sizeof(*base));
1771 SPLAY_REMOVE(hammer2_chain_splay, &parent->shead, chain);
1772 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED);
1773 atomic_add_int(&parent->refs, -1); /* for splay entry */
1776 chain->parent = NULL;
1777 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
1778 chain->u.ip->pip = NULL;
1781 * Nobody references the underlying object any more so we can
1782 * clear any pending modification(s) on it. This can theoretically
1783 * recurse downward but even just clearing the bit on this item
1784 * will effectively recurse if someone is doing a rm -rf and greatly
1785 * reduce the I/O required.
1787 * The MODIFIED1 bit is cleared but we have to remember the old state
1788 * in case this deletion is related to a rename. The ref on the
1789 * chain is shared by both modified flags.
1791 if (chain->flags & HAMMER2_CHAIN_MODIFIED1) {
1792 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
1793 atomic_set_int(&chain->flags, HAMMER2_CHAIN_WAS_MODIFIED);
1794 if ((chain->flags & HAMMER2_CHAIN_MODIFIED2) == 0)
1795 hammer2_chain_drop(hmp, chain);
1800 * Recursively flush the specified chain. The chain is locked and
1801 * referenced by the caller and will remain so on return.
1803 * This cannot be called with the volume header's vchain (yet).
1805 * PASS1 - clear the MODIFIED1 bit (and set the MODIFIED2 bit XXX)
1809 hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *chain)
1812 * Flush any children of this chain entry.
1814 if (chain->flags & HAMMER2_CHAIN_SUBMODIFIED) {
1815 hammer2_blockref_t *base;
1816 hammer2_chain_t *scan;
1817 hammer2_chain_t *next;
1819 int submodified = 0;
1822 * Modifications to the children will propagate up, forcing
1823 * us to become modified and copy-on-write too.
1825 * Clear SUBMODIFIED now, races during the flush will re-set
1828 hammer2_chain_modify(hmp, chain);
1829 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_SUBMODIFIED);
1832 * The blockref in the parent's array must be repointed at
1833 * the new block allocated by the child after its flush.
1835 * Calculate the base of the array.
1837 switch(chain->bref.type) {
1838 case HAMMER2_BREF_TYPE_INODE:
1839 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
1840 base = &chain->data->ipdata.u.blockset.blockref[0];
1841 count = HAMMER2_SET_COUNT;
1843 case HAMMER2_BREF_TYPE_INDIRECT:
1844 base = &chain->data->npdata.blockref[0];
1845 count = chain->bytes / sizeof(hammer2_blockref_t);
1847 case HAMMER2_BREF_TYPE_VOLUME:
1848 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
1849 base = &hmp->voldata.sroot_blockset.blockref[0];
1850 count = HAMMER2_SET_COUNT;
1854 panic("hammer2_chain_get: unrecognized blockref type: %d",
1859 * Flush the children and update the blockrefs in the parent.
1860 * Be careful of ripouts during the loop.
1862 next = SPLAY_MIN(hammer2_chain_splay, &chain->shead);
1863 while ((scan = next) != NULL) {
1864 next = SPLAY_NEXT(hammer2_chain_splay, &chain->shead,
1866 if (scan->flags & (HAMMER2_CHAIN_SUBMODIFIED |
1867 HAMMER2_CHAIN_MODIFIED1 |
1868 HAMMER2_CHAIN_MOVED)) {
1869 hammer2_chain_ref(hmp, scan);
1870 hammer2_chain_lock(hmp, scan);
1871 hammer2_chain_flush_pass1(hmp, scan);
1872 if (scan->flags & (HAMMER2_CHAIN_SUBMODIFIED |
1873 HAMMER2_CHAIN_MODIFIED1)) {
1876 KKASSERT(scan->index < count);
1877 base[scan->index] = scan->bref;
1878 if (scan->flags & HAMMER2_CHAIN_MOVED) {
1879 atomic_clear_int(&scan->flags,
1880 HAMMER2_CHAIN_MOVED);
1881 hammer2_chain_drop(hmp, scan);
1884 hammer2_chain_put(hmp, scan);
1888 atomic_set_int(&chain->flags,
1889 HAMMER2_CHAIN_SUBMODIFIED);
1894 * Flush this chain entry only if it is marked modified.
1896 if ((chain->flags & HAMMER2_CHAIN_MODIFIED1) == 0)
1900 * Clear MODIFIED1 and set HAMMER2_CHAIN_MOVED. The MODIFIED{1,2}
1901 * bits own a single parent ref and the MOVED bit owns its own
1904 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
1905 if (chain->flags & HAMMER2_CHAIN_MOVED) {
1906 if ((chain->flags & HAMMER2_CHAIN_MODIFIED2) == 0)
1907 hammer2_chain_drop(hmp, chain);
1909 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED);
1910 if (chain->flags & HAMMER2_CHAIN_MODIFIED2)
1911 hammer2_chain_ref(hmp, chain);
1915 * If this is part of a recursive flush we can go ahead and write
1916 * out the buffer cache buffer and pass a new bref back up the chain.
1918 * This will never be a volume header.
1920 if (chain != &hmp->vchain) {
1921 hammer2_blockref_t *bref;
1922 hammer2_off_t off_hi;
1928 KKASSERT(chain->data != NULL);
1929 bref = &chain->bref;
1931 off_hi = bref->data_off & HAMMER2_OFF_MASK_HI;
1932 off_lo = (size_t)bref->data_off & HAMMER2_OFF_MASK_LO;
1933 bytes = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
1934 KKASSERT(off_hi != 0); /* not the root volume header */
1936 if (chain->bp == NULL) {
1938 * The data is embedded, we have to acquire the
1939 * buffer cache buffer and copy the data into it.
1942 error = bread(hmp->devvp, off_hi,
1943 HAMMER2_PBUFSIZE, &bp);
1944 KKASSERT(error == 0); /* XXX */
1947 * Copy the data to the buffer, mark the buffer
1948 * dirty, and convert the chain to unmodified.
1950 bcopy(chain->data, (char *)bp->b_data + off_lo, bytes);
1954 chain->bref.check.iscsi32.value =
1955 hammer2_icrc32(chain->data, bytes);
1959 hammer2_blockref_t *bref;
1961 bref = &chain->bref;
1963 switch(bref->type) {
1964 case HAMMER2_BREF_TYPE_VOLUME:
1965 KKASSERT(chain->data != NULL);
1966 KKASSERT(chain->bp == NULL);
1968 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]=
1970 (char *)&hmp->voldata +
1971 HAMMER2_VOLUME_ICRC1_OFF,
1972 HAMMER2_VOLUME_ICRC1_SIZE);
1973 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]=
1975 (char *)&hmp->voldata +
1976 HAMMER2_VOLUME_ICRC0_OFF,
1977 HAMMER2_VOLUME_ICRC0_SIZE);
1978 hmp->voldata.icrc_volheader =
1980 (char *)&hmp->voldata +
1981 HAMMER2_VOLUME_ICRCVH_OFF,
1982 HAMMER2_VOLUME_ICRCVH_SIZE);
1990 * PASS2 - not yet implemented (should be called only with the root chain?)
1993 hammer2_chain_flush_pass2(hammer2_mount_t *hmp, hammer2_chain_t *chain)
1999 hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain)
2001 hammer2_chain_flush_pass1(hmp, chain);