2 * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * This subsystem handles direct and indirect block searches, recursions,
37 * creation, and deletion. Chains of blockrefs are tracked and modifications
38 * are flag for propagation... eventually all the way back to the volume
42 #include <sys/cdefs.h>
43 #include <sys/cdefs.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/types.h>
52 SPLAY_GENERATE(hammer2_chain_splay, hammer2_chain, snode, hammer2_chain_cmp);
54 static int hammer2_indirect_optimize; /* XXX SYSCTL */
56 static hammer2_chain_t *hammer2_chain_create_indirect(
57 hammer2_mount_t *hmp, hammer2_chain_t *parent,
58 hammer2_key_t key, int keybits);
61 * Compare function for chain splay tree
64 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2)
66 return(chain2->index - chain1->index);
70 * Allocate a new disconnected chain element representing the specified
71 * bref. The chain element is locked exclusively and refs is set to 1.
73 * This essentially allocates a system memory structure representing one
74 * of the media structure types, including inodes.
77 hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_blockref_t *bref)
79 hammer2_chain_t *chain;
81 hammer2_indblock_t *np;
83 u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
86 * Construct the appropriate system structure.
89 case HAMMER2_BREF_TYPE_INODE:
90 ip = kmalloc(sizeof(*ip), hmp->minode, M_WAITOK | M_ZERO);
93 lockinit(&chain->lk, "inode", 0, LK_CANRECURSE);
96 case HAMMER2_BREF_TYPE_INDIRECT:
97 if (bytes == HAMMER2_PBUFSIZE) {
98 np = kmalloc(sizeof(*np),
103 np = kmalloc(offsetof(struct hammer2_indblock,
111 lockinit(&chain->lk, "iblk", 0, LK_CANRECURSE);
113 case HAMMER2_BREF_TYPE_DATA:
114 dp = kmalloc(sizeof(*dp), hmp->mchain, M_WAITOK | M_ZERO);
117 lockinit(&chain->lk, "dblk", 0, LK_CANRECURSE);
119 case HAMMER2_BREF_TYPE_VOLUME:
121 panic("hammer2_chain_alloc volume type illegal for op");
124 panic("hammer2_chain_alloc: unrecognized blockref type: %d",
128 chain->index = -1; /* not yet assigned */
130 chain->bytes = bytes;
131 lockmgr(&chain->lk, LK_EXCLUSIVE);
137 * Free a disconnected chain element
140 hammer2_chain_free(hammer2_mount_t *hmp, hammer2_chain_t *chain)
144 KKASSERT(chain->bp == NULL);
145 KKASSERT(chain->data == NULL);
146 KKASSERT(chain->bref.type != HAMMER2_BREF_TYPE_INODE ||
147 chain->u.ip->vp == NULL);
149 if ((mem = chain->u.mem) != NULL) {
151 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
152 kfree(mem, hmp->minode);
154 kfree(mem, hmp->mchain);
159 * Add a reference to a chain element (for shared access). The chain
160 * element must already have at least 1 ref controlled by the caller.
163 hammer2_chain_ref(hammer2_mount_t *hmp, hammer2_chain_t *chain)
165 KKASSERT(chain->refs > 0);
166 atomic_add_int(&chain->refs, 1);
170 * Drop the callers reference to the chain element. If the ref count
171 * reaches zero the chain element and its related structure (typically an
172 * inode or indirect block) will be freed and the parent will be
173 * recursively dropped.
175 * Modified elements hold an additional reference so it should not be
176 * possible for the count on a modified element to drop to 0.
178 * The chain element must NOT be locked by the caller.
180 * The parent might or might not be locked by the caller but if so it
181 * will also be referenced so we shouldn't recurse upward.
184 hammer2_chain_drop(hammer2_mount_t *hmp, hammer2_chain_t *chain)
186 hammer2_chain_t *parent;
194 KKASSERT(chain != &hmp->vchain);
195 parent = chain->parent;
197 lockmgr(&parent->lk, LK_EXCLUSIVE);
198 if (atomic_cmpset_int(&chain->refs, 1, 0)) {
200 * Succeeded, recurse and drop parent
202 if (!(chain->flags & HAMMER2_CHAIN_DELETED)) {
203 SPLAY_REMOVE(hammer2_chain_splay,
204 &parent->shead, chain);
205 atomic_set_int(&chain->flags,
206 HAMMER2_CHAIN_DELETED);
207 /* parent refs dropped via recursion */
209 chain->parent = NULL;
211 lockmgr(&parent->lk, LK_RELEASE);
212 hammer2_chain_free(hmp, chain);
214 /* recurse on parent */
217 lockmgr(&parent->lk, LK_RELEASE);
218 /* retry the same chain */
221 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) {
223 * Succeeded, count did not reach zero so
224 * cut out of the loop.
228 /* retry the same chain */
234 * Lock a chain element, acquiring its data with I/O if necessary.
236 * Returns 0 on success or an error code if the data could not be acquired.
237 * The chain element is locked either way.
239 * chain->data will be pointed either at the embedded data (e.g. for
240 * inodes), in which case the buffer cache buffer is released, or will
241 * point into the bp->b_data buffer with the bp left intact while locked.
244 hammer2_chain_lock(hammer2_mount_t *hmp, hammer2_chain_t *chain)
246 hammer2_blockref_t *bref;
247 hammer2_off_t off_hi;
253 * Lock the element. Under certain conditions this might end up
254 * being a recursive lock.
256 KKASSERT(chain->refs > 0);
257 lockmgr(&chain->lk, LK_EXCLUSIVE);
260 * The volume header is a special case
262 if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME)
266 * bp must be NULL, so if the data pointer is valid here it points
267 * to embedded data and no I/O is necessary (whether modified or not).
269 KKASSERT(chain->bp == NULL);
274 * If data is NULL we must issue I/O. Any error returns the error
275 * code but leaves the chain locked.
277 * If the chain was modified a new bref will have already been
278 * allocated and its related bp is probably still sitting in the
283 off_hi = bref->data_off & HAMMER2_OFF_MASK_HI;
284 off_lo = (size_t)bref->data_off & HAMMER2_OFF_MASK_LO;
285 KKASSERT(off_hi != 0);
286 error = cluster_read(hmp->devvp,
287 hmp->voldata.volu_size, off_hi,
289 HAMMER2_PBUFSIZE, HAMMER2_PBUFSIZE*8,
293 * Even though this can be synthesized from bref->data_off we
294 * store it in the in-memory chain structure for convenience.
297 (1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX))) {
298 panic("hammer2_chain_lock: chain->bytes mismatch");
302 kprintf("hammer2_chain_get: I/O error %016jx: %d\n",
303 (intmax_t)off_hi, error);
310 * Setup the data pointer, either pointing it to an embedded data
311 * structure and copying the data from the buffer, or pointint it
314 * The buffer is not retained when copying to an embedded data
315 * structure in order to avoid potential deadlocks or recursions
316 * on the same physical buffer.
318 switch (bref->type) {
319 case HAMMER2_BREF_TYPE_VOLUME:
321 * Copy data from bp to embedded buffer
323 KKASSERT(0); /* not yet - have mount use this soon */
324 KKASSERT(off_hi == 0);
325 bcopy((char *)chain->bp->b_data + off_lo,
326 &hmp->voldata, HAMMER2_PBUFSIZE);
327 chain->data = (void *)&hmp->voldata;
331 case HAMMER2_BREF_TYPE_INODE:
333 * Copy data from bp to embedded buffer.
335 bcopy((char *)chain->bp->b_data + off_lo,
336 &chain->u.ip->ip_data,
337 HAMMER2_INODE_BYTES);
338 chain->data = (void *)&chain->u.ip->ip_data;
342 case HAMMER2_BREF_TYPE_INDIRECT:
344 * Indirect node data may or may not be embedded depending
345 * on how much there is.
347 if (chain->u.np->is_embedded) {
348 bcopy((char *)chain->bp->b_data + off_lo,
351 chain->data = (void *)&chain->u.np->buf[0];
361 data = (char *)chain->bp->b_data + off_lo;
369 * Resize the chain's physical storage allocation. Chains can be resized
370 * smaller without reallocating the storage. Resizing larger will reallocate
374 hammer2_chain_resize(hammer2_mount_t *hmp, hammer2_chain_t *chain, int nradix)
376 hammer2_chain_t *parent;
384 * Only data blocks can be resized for now
386 KKASSERT(chain != &hmp->vchain);
387 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA ||
388 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT);
391 * Nothing to do if the element is already the proper size
393 obytes = chain->bytes;
394 nbytes = 1 << nradix;
395 if (obytes == nbytes)
399 * A deleted inode may still be active but unreachable via sync
400 * because it has been disconnected from the tree. Do not allow
401 * deleted inodes to be marked as being modified because this will
402 * bump the refs and never get resolved by the sync, leaving the
403 * inode structure allocated after umount.
405 if ((chain->flags & HAMMER2_CHAIN_DELETED) &&
406 chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
407 KKASSERT(chain->data != NULL);
412 * Set MODIFIED1 and add a chain ref to prevent destruction. Both
413 * modified flags share the same ref.
415 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
416 if ((chain->flags & HAMMER2_CHAIN_MODIFIED2) == 0)
417 hammer2_chain_ref(hmp, chain);
419 if (nbytes < obytes) {
421 * If we are making it smaller we don't have to reallocate
424 chain->bref.data_off &= ~ HAMMER2_OFF_MASK_RADIX;
425 chain->bref.data_off |= (nradix & HAMMER2_OFF_MASK_RADIX);
426 chain->bytes = nbytes;
431 if (chain != &hmp->vchain) {
432 chain->bref.data_off =
433 hammer2_freemap_alloc(hmp, chain->bref.type, nbytes);
434 chain->bytes = nbytes;
436 /* XXX failed allocation */
438 switch(chain->bref.type) {
439 case HAMMER2_BREF_TYPE_VOLUME: /* embedded */
440 case HAMMER2_BREF_TYPE_INODE: /* embedded */
442 * data points to embedded structure, no copy needed
446 case HAMMER2_BREF_TYPE_INDIRECT:
447 panic("hammer2_chain_resize: "
448 "cannot resize indirect block");
451 case HAMMER2_BREF_TYPE_DATA:
453 * data (if not NULL) points into original bp or
454 * to embedded data. Copy-on-write to new block.
456 KKASSERT(chain != &hmp->vchain); /* safety */
457 if (nbytes == HAMMER2_PBUFSIZE) {
458 nbp = getblk(hmp->devvp,
459 chain->bref.data_off &
461 HAMMER2_PBUFSIZE, 0, 0);
465 error = bread(hmp->devvp,
466 chain->bref.data_off &
468 HAMMER2_PBUFSIZE, &nbp);
469 KKASSERT(error == 0);/* XXX handle error */
473 * The new block may be smaller or larger than the
474 * old block, only copy what fits.
476 ndata = nbp->b_data + (chain->bref.data_off &
477 HAMMER2_OFF_MASK_LO);
480 bcopy(chain->data, ndata, nbytes);
482 bcopy(chain->data, ndata, obytes);
483 KKASSERT(chain->bp != NULL);
490 panic("hammer2_chain_modify: unknown bref type");
497 * Recursively mark the parent chain elements so flushes can find
500 * NOTE: The flush code will modify a SUBMODIFIED-flagged chain
501 * during the flush recursion after clearing the parent's
502 * SUBMODIFIED bit. We don't want to re-set the parent's
503 * SUBMODIFIED bit in this case!
505 if ((chain->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
506 parent = chain->parent;
508 (parent->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
509 atomic_set_int(&parent->flags,
510 HAMMER2_CHAIN_SUBMODIFIED);
511 parent = parent->parent;
517 * Convert a locked chain that was retrieved read-only to read-write.
519 * If not already marked modified a new physical block will be allocated
520 * and assigned to the bref.
522 * allocated->modified (without calling hammer2_chain_lock()) results
523 * in chain->data typically being NULL. In this situation chain->data
524 * is assigned and the target area is zero'd out.
526 * If the data is pointing into a bp it will be relocated to a new bp.
527 * If the data is embedded we leave it alone for now.
530 hammer2_chain_modify(hammer2_mount_t *hmp, hammer2_chain_t *chain)
532 hammer2_chain_t *parent;
538 * If the chain is already marked MODIFIED1 we can just return.
540 if (chain->flags & HAMMER2_CHAIN_MODIFIED1) {
541 KKASSERT(chain->data != NULL);
546 * A deleted inode may still be active but unreachable via sync
547 * because it has been disconnected from the tree. Do not allow
548 * deleted inodes to be marked as being modified because this will
549 * bump the refs and never get resolved by the sync, leaving the
550 * inode structure allocated after umount.
552 if ((chain->flags & HAMMER2_CHAIN_DELETED) &&
553 chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
554 KKASSERT(chain->data != NULL);
559 * Set MODIFIED1 and add a chain ref to prevent destruction. Both
560 * modified flags share the same ref.
562 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
563 if ((chain->flags & HAMMER2_CHAIN_MODIFIED2) == 0)
564 hammer2_chain_ref(hmp, chain);
567 * We must allocate the copy-on-write block.
569 * If the data is embedded no other action is required.
571 * If the data is not embedded we acquire and clear the
572 * new block. If chain->data is not NULL we then do the
573 * copy-on-write. chain->data will then be repointed to the new
574 * buffer and the old buffer will be released.
576 * For newly created elements with no prior allocation we go
577 * through the copy-on-write steps except without the copying part.
579 if (chain != &hmp->vchain) {
580 if (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)
581 kprintf("Replace %d\n", chain->bytes);
582 chain->bref.data_off =
583 hammer2_freemap_alloc(hmp, chain->bref.type, chain->bytes);
584 /* XXX failed allocation */
587 switch(chain->bref.type) {
588 case HAMMER2_BREF_TYPE_VOLUME: /* embedded */
589 case HAMMER2_BREF_TYPE_INODE: /* embedded */
591 * Inode and Volume data already points to the embedded
592 * structure, no copy is needed
596 case HAMMER2_BREF_TYPE_INDIRECT:
598 * If the indirect data is embedded we just leave it
599 * in its embedded space, otherwise fall-through to
600 * the bp-handling code.
602 * If this is a newly allocated block chain->data will
603 * be NULL, so make sure it is properly assigned. In
604 * this case the embedded space has already been zero'd
607 if (chain->u.np->is_embedded) {
608 chain->data = (void *)&chain->u.np->buf[0];
613 case HAMMER2_BREF_TYPE_DATA:
615 * data (if not NULL) points into original bp or to embedded
616 * data, copy-on-write to the new block.
618 * data (if NULL) indicates that no prior copy exists, the
619 * storage must be zero'd.
621 KKASSERT(chain != &hmp->vchain); /* safety */
622 if (chain->bytes == HAMMER2_PBUFSIZE) {
623 nbp = getblk(hmp->devvp,
624 chain->bref.data_off & HAMMER2_OFF_MASK_HI,
625 HAMMER2_PBUFSIZE, 0, 0);
627 * XXX want to set B_CACHE but not bother to
628 * zero because it will be zero'd below?
633 error = bread(hmp->devvp,
634 chain->bref.data_off & HAMMER2_OFF_MASK_HI,
635 HAMMER2_PBUFSIZE, &nbp);
636 KKASSERT(error == 0);/* XXX handle error */
640 * Copy or zero-fill on write depending on whether
641 * chain->data exists or not.
643 ndata = nbp->b_data + (chain->bref.data_off &
644 HAMMER2_OFF_MASK_LO);
646 bcopy(chain->data, ndata, chain->bytes);
647 KKASSERT(chain->bp != NULL);
650 bzero(ndata, chain->bytes);
656 panic("hammer2_chain_modify: unknown bref type");
662 * Recursively mark the parent chain elements so flushes can find
665 * NOTE: The flush code will modify a SUBMODIFIED-flagged chain
666 * during the flush recursion after clearing the parent's
667 * SUBMODIFIED bit. We don't want to re-set the parent's
668 * SUBMODIFIED bit in this case!
670 if ((chain->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
671 parent = chain->parent;
673 (parent->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
674 atomic_set_int(&parent->flags,
675 HAMMER2_CHAIN_SUBMODIFIED);
676 parent = parent->parent;
682 * Unlock a chain element without dropping its reference count.
683 * (see hammer2_chain_put() to do both).
685 * Non-embedded data references (chain->bp != NULL) are returned to the
686 * system and the data field is cleared in that case. If modified the
687 * dirty buffer is still returned to the system, can be flushed to disk by
688 * the system at any time, and will be reconstituted/re-read as needed.
691 hammer2_chain_unlock(hammer2_mount_t *hmp, hammer2_chain_t *chain)
695 if (chain->flags & HAMMER2_CHAIN_MODIFIED1) {
696 if (chain->flags & HAMMER2_CHAIN_IOFLUSH) {
697 atomic_clear_int(&chain->flags,
698 HAMMER2_CHAIN_IOFLUSH);
704 /* bp might still be dirty */
709 lockmgr(&chain->lk, LK_RELEASE);
713 * Locate an in-memory chain. The parent must be locked. The in-memory
714 * chain is returned or NULL if no in-memory chain is present.
716 * NOTE: A chain on-media might exist for this index when NULL is returned.
719 hammer2_chain_find(hammer2_mount_t *hmp, hammer2_chain_t *parent, int index)
721 hammer2_chain_t dummy;
722 hammer2_chain_t *chain;
725 chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead, &dummy);
730 * Return a locked chain structure with all associated data acquired.
732 * Caller must lock the parent on call, the returned child will be locked.
735 hammer2_chain_get(hammer2_mount_t *hmp, hammer2_chain_t *parent,
736 int index, int flags)
738 hammer2_blockref_t *bref;
739 hammer2_chain_t *chain;
740 hammer2_chain_t dummy;
743 * First see if we have a (possibly modified) chain element cached
744 * for this (parent, index). Acquire the data if necessary.
746 * If chain->data is non-NULL the chain should already be marked
750 chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead, &dummy);
752 hammer2_chain_ref(hmp, chain);
753 if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
754 hammer2_chain_lock(hmp, chain);
759 * Otherwise lookup the bref and issue I/O (switch on the parent)
761 switch(parent->bref.type) {
762 case HAMMER2_BREF_TYPE_INODE:
763 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
764 bref = &parent->data->ipdata.u.blockset.blockref[index];
766 case HAMMER2_BREF_TYPE_INDIRECT:
767 KKASSERT(index >= 0 &&
768 index < parent->bytes / sizeof(hammer2_blockref_t));
769 bref = &parent->data->npdata.blockref[index];
771 case HAMMER2_BREF_TYPE_VOLUME:
772 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
773 bref = &hmp->voldata.sroot_blockset.blockref[index];
777 panic("hammer2_chain_get: unrecognized blockref type: %d",
782 * Allocate a chain structure representing the existing media
783 * entry. Thus the chain is *not* INITIAL and certainly not
786 chain = hammer2_chain_alloc(hmp, bref);
789 * Link the chain into its parent. Caller is expected to hold an
790 * exclusive lock on the parent.
792 chain->parent = parent;
793 chain->index = index;
794 if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain))
795 panic("hammer2_chain_link: collision");
796 KKASSERT(parent->refs > 1);
797 atomic_add_int(&parent->refs, 1); /* for splay entry */
800 * Additional linkage for inodes. Reuse the parent pointer to
801 * find the parent directory.
803 if (bref->type == HAMMER2_BREF_TYPE_INODE) {
804 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT)
805 parent = parent->parent;
806 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE)
807 chain->u.ip->pip = parent->u.ip;
811 * Our new chain structure has already been referenced and locked
812 * but the lock code handles the I/O so call it to resolve the data.
813 * Then release one of our two exclusive locks.
815 * If NOLOCK is set the release will release the one-and-only lock.
817 if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
818 hammer2_chain_lock(hmp, chain);
819 lockmgr(&chain->lk, LK_RELEASE);
825 * Unlock and dereference a chain after use. It is possible for this to
826 * recurse up the chain.
829 hammer2_chain_put(hammer2_mount_t *hmp, hammer2_chain_t *chain)
831 hammer2_chain_unlock(hmp, chain);
832 hammer2_chain_drop(hmp, chain);
836 * Locate any key between key_beg and key_end inclusive. (*parentp)
837 * typically points to an inode but can also point to a related indirect
838 * block and this function will recurse upwards and find the inode again.
840 * WARNING! THIS DOES NOT RETURN KEYS IN LOGICAL KEY ORDER! ANY KEY
841 * WITHIN THE RANGE CAN BE RETURNED. HOWEVER, AN ITERATION
842 * WHICH PICKS UP WHERE WE LEFT OFF WILL CONTINUE THE SCAN.
844 * (*parentp) must be exclusively locked and referenced and can be an inode
845 * or an existing indirect block within the inode.
847 * On return (*parentp) will be modified to point at the deepest parent chain
848 * element encountered during the search, as a helper for an insertion or
849 * deletion. The new (*parentp) will be locked and referenced and the old
850 * will be unlocked and dereferenced (no change if they are both the same).
852 * The matching chain will be returned exclusively locked and referenced.
854 * NULL is returned if no match was found, but (*parentp) will still
855 * potentially be adjusted.
857 * This function will also recurse up the chain if the key is not within the
858 * current parent's range. (*parentp) can never be set to NULL. An iteration
859 * can simply allow (*parentp) to float inside the loop.
862 hammer2_chain_lookup(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
863 hammer2_key_t key_beg, hammer2_key_t key_end,
866 hammer2_chain_t *parent;
867 hammer2_chain_t *chain;
868 hammer2_chain_t *tmp;
869 hammer2_blockref_t *base;
870 hammer2_blockref_t *bref;
871 hammer2_key_t scan_beg;
872 hammer2_key_t scan_end;
877 * Recurse (*parentp) upward if necessary until the parent completely
878 * encloses the key range or we hit the inode.
881 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
882 scan_beg = parent->bref.key;
883 scan_end = scan_beg +
884 ((hammer2_key_t)1 << parent->bref.keybits) - 1;
885 if (key_beg >= scan_beg && key_end <= scan_end)
887 hammer2_chain_unlock(hmp, parent);
888 parent = parent->parent;
889 hammer2_chain_ref(hmp, parent); /* ref new parent */
890 hammer2_chain_lock(hmp, parent); /* lock new parent */
891 hammer2_chain_drop(hmp, *parentp); /* drop old parent */
892 *parentp = parent; /* new parent */
897 * Locate the blockref array. Currently we do a fully associative
898 * search through the array.
900 switch(parent->bref.type) {
901 case HAMMER2_BREF_TYPE_INODE:
903 * Special shortcut for embedded data returns the inode
904 * itself. Callers must detect this condition and access
905 * the embedded data (the strategy code does this for us).
907 * This is only applicable to regular files and softlinks.
909 if (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) {
910 hammer2_chain_ref(hmp, parent);
911 if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0)
912 hammer2_chain_lock(hmp, parent);
915 base = &parent->data->ipdata.u.blockset.blockref[0];
916 count = HAMMER2_SET_COUNT;
918 case HAMMER2_BREF_TYPE_INDIRECT:
919 if (parent->data == NULL)
920 panic("parent->data is NULL");
921 base = &parent->data->npdata.blockref[0];
922 count = parent->bytes / sizeof(hammer2_blockref_t);
924 case HAMMER2_BREF_TYPE_VOLUME:
925 base = &hmp->voldata.sroot_blockset.blockref[0];
926 count = HAMMER2_SET_COUNT;
929 panic("hammer2_chain_lookup: unrecognized blockref type: %d",
931 base = NULL; /* safety */
932 count = 0; /* safety */
936 * If the element and key overlap we use the element.
939 for (i = 0; i < count; ++i) {
940 tmp = hammer2_chain_find(hmp, parent, i);
941 bref = (tmp) ? &tmp->bref : &base[i];
944 scan_beg = bref->key;
945 scan_end = scan_beg + ((hammer2_key_t)1 << bref->keybits) - 1;
946 if (key_beg <= scan_end && key_end >= scan_beg)
950 if (key_beg == key_end)
952 return (hammer2_chain_next(hmp, parentp, NULL,
953 key_beg, key_end, flags));
957 * Acquire the new chain element. If the chain element is an
958 * indirect block we must search recursively.
960 chain = hammer2_chain_get(hmp, parent, i, flags);
965 * If the chain element is an indirect block it becomes the new
966 * parent and we loop on it. We must fixup the chain we loop on
967 * if the caller passed flags to us that aren't sufficient for our
970 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
971 hammer2_chain_put(hmp, parent);
972 *parentp = parent = chain;
973 if (flags & HAMMER2_LOOKUP_NOLOCK)
974 hammer2_chain_lock(hmp, chain);
979 * All done, return chain
985 * After having issued a lookup we can iterate all matching keys.
987 * If chain is non-NULL we continue the iteration from just after it's index.
989 * If chain is NULL we assume the parent was exhausted and continue the
990 * iteration at the next parent.
993 hammer2_chain_next(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
994 hammer2_chain_t *chain,
995 hammer2_key_t key_beg, hammer2_key_t key_end,
998 hammer2_chain_t *parent;
999 hammer2_chain_t *tmp;
1000 hammer2_blockref_t *base;
1001 hammer2_blockref_t *bref;
1002 hammer2_key_t scan_beg;
1003 hammer2_key_t scan_end;
1011 * Calculate the next index and recalculate the parent if necessary.
1015 * Continue iteration within current parent. If not NULL
1016 * the passed-in chain may or may not be locked, based on
1017 * the LOOKUP_NOLOCK flag (passed in as returned from lookup
1020 i = chain->index + 1;
1021 if (flags & HAMMER2_LOOKUP_NOLOCK)
1022 hammer2_chain_drop(hmp, chain);
1024 hammer2_chain_put(hmp, chain);
1027 * Any scan where the lookup returned degenerate data embedded
1028 * in the inode has an invalid index and must terminate.
1030 if (chain == parent)
1033 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT) {
1035 * We reached the end of the iteration.
1040 * Continue iteration with next parent unless the current
1041 * parent covers the range.
1043 hammer2_chain_t *nparent;
1045 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT)
1048 scan_beg = parent->bref.key;
1049 scan_end = scan_beg +
1050 ((hammer2_key_t)1 << parent->bref.keybits) - 1;
1051 if (key_beg >= scan_beg && key_end <= scan_end)
1054 i = parent->index + 1;
1055 nparent = parent->parent;
1056 hammer2_chain_ref(hmp, nparent); /* ref new parent */
1057 hammer2_chain_unlock(hmp, parent);
1058 hammer2_chain_lock(hmp, nparent); /* lock new parent */
1059 hammer2_chain_drop(hmp, parent); /* drop old parent */
1060 *parentp = parent = nparent;
1065 * Locate the blockref array. Currently we do a fully associative
1066 * search through the array.
1068 switch(parent->bref.type) {
1069 case HAMMER2_BREF_TYPE_INODE:
1070 base = &parent->data->ipdata.u.blockset.blockref[0];
1071 count = HAMMER2_SET_COUNT;
1073 case HAMMER2_BREF_TYPE_INDIRECT:
1074 base = &parent->data->npdata.blockref[0];
1075 count = parent->bytes / sizeof(hammer2_blockref_t);
1077 case HAMMER2_BREF_TYPE_VOLUME:
1078 base = &hmp->voldata.sroot_blockset.blockref[0];
1079 count = HAMMER2_SET_COUNT;
1082 panic("hammer2_chain_next: unrecognized blockref type: %d",
1084 base = NULL; /* safety */
1085 count = 0; /* safety */
1088 KKASSERT(i <= count);
1091 * Look for the key. If we are unable to find a match and an exact
1092 * match was requested we return NULL. If a range was requested we
1093 * run hammer2_chain_next() to iterate.
1097 tmp = hammer2_chain_find(hmp, parent, i);
1098 bref = (tmp) ? &tmp->bref : &base[i];
1099 if (bref->type == 0) {
1103 scan_beg = bref->key;
1104 scan_end = scan_beg + ((hammer2_key_t)1 << bref->keybits) - 1;
1105 if (key_beg <= scan_end && key_end >= scan_beg)
1111 * If we couldn't find a match recurse up a parent to continue the
1118 * Acquire the new chain element. If the chain element is an
1119 * indirect block we must search recursively.
1121 chain = hammer2_chain_get(hmp, parent, i, flags);
1126 * If the chain element is an indirect block it becomes the new
1127 * parent and we loop on it. We may have to lock the chain when
1128 * cycling it in as the new parent as it will not be locked if the
1129 * caller passed NOLOCK.
1131 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
1132 hammer2_chain_put(hmp, parent);
1133 *parentp = parent = chain;
1134 if (flags & HAMMER2_LOOKUP_NOLOCK)
1135 hammer2_chain_lock(hmp, chain);
1141 * All done, return chain
1147 * Create and return a new hammer2 system memory structure of the specified
1148 * key, type and size and insert it RELATIVE TO (PARENT).
1150 * (parent) is typically either an inode or an indirect block, acquired
1151 * acquired as a side effect of issuing a prior failed lookup. parent
1152 * must be locked and held. Do not pass the inode chain to this function
1153 * unless that is the chain returned by the failed lookup.
1155 * Non-indirect types will automatically allocate indirect blocks as required
1156 * if the new item does not fit in the current (parent).
1158 * Indirect types will move a portion of the existing blockref array in
1159 * (parent) into the new indirect type and then use one of the free slots
1160 * to emplace the new indirect type.
1162 * A new locked, referenced chain element is returned of the specified type.
1163 * This element will also be marked as modified and contain a data area
1164 * ready for initialization.
1167 hammer2_chain_create(hammer2_mount_t *hmp, hammer2_chain_t *parent,
1168 hammer2_chain_t *chain,
1169 hammer2_key_t key, int keybits, int type, size_t bytes)
1171 hammer2_blockref_t dummy;
1172 hammer2_blockref_t *base;
1173 hammer2_blockref_t *bref;
1174 hammer2_chain_t dummy_chain;
1175 int unlock_parent = 0;
1180 if (chain == NULL) {
1182 * First allocate media space and construct the dummy bref,
1183 * then allocate the in-memory chain structure.
1185 bzero(&dummy, sizeof(dummy));
1188 dummy.keybits = keybits;
1189 dummy.data_off = hammer2_bytes_to_radix(bytes);
1190 chain = hammer2_chain_alloc(hmp, &dummy);
1194 * We set the WAS_MODIFIED flag here so the chain gets
1195 * marked as modified below.
1197 chain->flags |= HAMMER2_CHAIN_INITIAL |
1198 HAMMER2_CHAIN_WAS_MODIFIED;
1201 * Recalculate bytes to reflect the actual media block
1204 bytes = (hammer2_off_t)1 <<
1205 (int)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX);
1206 chain->bytes = bytes;
1209 case HAMMER2_BREF_TYPE_VOLUME:
1210 panic("hammer2_chain_create: called with volume type");
1212 case HAMMER2_BREF_TYPE_INODE:
1213 KKASSERT(bytes == HAMMER2_INODE_BYTES);
1214 chain->data = (void *)&chain->u.ip->ip_data;
1216 case HAMMER2_BREF_TYPE_INDIRECT:
1218 * May or may not be embedded (chain->data may or
1221 if (chain->u.np->is_embedded) {
1222 chain->data = (void *)&chain->u.np->buf[0];
1224 KKASSERT(chain->data == NULL);
1228 /* leave chain->data NULL */
1229 KKASSERT(chain->data == NULL);
1234 * Potentially update the chain's key/keybits, but it will
1235 * only be marked modified if WAS_MODIFIED is set (if it
1236 * was modified at the time of its removal during a rename).
1238 chain->bref.key = key;
1239 chain->bref.keybits = keybits;
1244 * Locate a free blockref in the parent's array
1246 switch(parent->bref.type) {
1247 case HAMMER2_BREF_TYPE_INODE:
1248 KKASSERT(parent->data != NULL);
1249 base = &parent->data->ipdata.u.blockset.blockref[0];
1250 count = HAMMER2_SET_COUNT;
1252 case HAMMER2_BREF_TYPE_INDIRECT:
1253 KKASSERT(parent->data != NULL);
1254 base = &parent->data->npdata.blockref[0];
1255 count = parent->bytes / sizeof(hammer2_blockref_t);
1257 case HAMMER2_BREF_TYPE_VOLUME:
1258 KKASSERT(parent->data != NULL);
1259 base = &hmp->voldata.sroot_blockset.blockref[0];
1260 count = HAMMER2_SET_COUNT;
1263 panic("hammer2_chain_create: unrecognized blockref type: %d",
1270 * Scan for an unallocated bref, also skipping any slots occupied
1271 * by in-memory chain elements that may not yet have been updated
1272 * in the parent's bref array.
1274 bzero(&dummy_chain, sizeof(dummy_chain));
1276 for (i = 0; i < count; ++i) {
1278 dummy_chain.index = i;
1279 if (bref->type == 0 &&
1280 SPLAY_FIND(hammer2_chain_splay,
1281 &parent->shead, &dummy_chain) == NULL) {
1287 * If no free blockref count be found we must create an indirect
1288 * block and move a number of blockrefs into it. With the parent
1289 * locked we can safely lock each child in order to move it without
1290 * causing a deadlock.
1292 * This may return the new indirect block or the old parent depending
1293 * on where the key falls.
1296 hammer2_chain_t *nparent;
1298 nparent = hammer2_chain_create_indirect(hmp, parent,
1300 if (nparent == NULL) {
1302 hammer2_chain_free(hmp, chain);
1306 if (parent != nparent) {
1308 hammer2_chain_put(hmp, parent);
1316 * Link the chain into its parent.
1318 if (chain->parent != NULL)
1319 panic("hammer2: hammer2_chain_create: chain already connected");
1320 KKASSERT(chain->parent == NULL);
1321 chain->parent = parent;
1323 if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain))
1324 panic("hammer2_chain_link: collision");
1325 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED);
1326 KKASSERT(parent->refs > 1);
1327 atomic_add_int(&parent->refs, 1);
1330 * Additional linkage for inodes. Reuse the parent pointer to
1331 * find the parent directory.
1333 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
1334 hammer2_chain_t *scan = parent;
1335 while (scan->bref.type == HAMMER2_BREF_TYPE_INDIRECT)
1336 scan = scan->parent;
1337 if (scan->bref.type == HAMMER2_BREF_TYPE_INODE)
1338 chain->u.ip->pip = scan->u.ip;
1342 * Mark the newly created or previously disconnected chain element
1343 * as modified and fully resolve the chain->data pointer. The
1344 * WAS_MODIFIED bit will be set in both cases.
1346 * Chain elements with embedded data will not issue I/O at this time.
1347 * A new block will be allocated for the buffer but not instantiated.
1349 * Chain elements which do not use embedded data will allocate
1350 * the new block AND instantiate its buffer cache buffer, pointing
1351 * the data at the bp.
1353 if (chain->flags & HAMMER2_CHAIN_WAS_MODIFIED) {
1354 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_WAS_MODIFIED);
1355 hammer2_chain_modify(hmp, chain);
1360 hammer2_chain_put(hmp, parent);
1365 * Create an indirect block that covers one or more of the elements in the
1366 * current parent. Either returns the existing parent with no locking or
1367 * ref changes or returns the new indirect block locked and referenced,
1368 * depending on what the specified key falls into.
1370 * The key/keybits for the indirect mode only needs to follow three rules:
1372 * (1) That all elements underneath it fit within its key space and
1374 * (2) That all elements outside it are outside its key space.
1376 * (3) When creating the new indirect block any elements in the current
1377 * parent that fit within the new indirect block's keyspace must be
1378 * moved into the new indirect block.
1380 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider
1381 * keyspace the the current parent, but lookup/iteration rules will
1382 * ensure (and must ensure) that rule (2) for all parents leading up
1383 * to the nearest inode or the root volume header is adhered to. This
1384 * is accomplished by always recursing through matching keyspaces in
1385 * the hammer2_chain_lookup() and hammer2_chain_next() API.
1387 * The current implementation calculates the current worst-case keyspace by
1388 * iterating the current parent and then divides it into two halves, choosing
1389 * whichever half has the most elements (not necessarily the half containing
1390 * the requested key).
1392 * We can also opt to use the half with the least number of elements. This
1393 * causes lower-numbered keys (aka logical file offsets) to recurse through
1394 * fewer indirect blocks and higher-numbered keys to recurse through more.
1395 * This also has the risk of not moving enough elements to the new indirect
1396 * block and being forced to create several indirect blocks before the element
1401 hammer2_chain_create_indirect(hammer2_mount_t *hmp, hammer2_chain_t *parent,
1402 hammer2_key_t create_key, int create_bits)
1404 hammer2_blockref_t *base;
1405 hammer2_blockref_t *bref;
1406 hammer2_chain_t *chain;
1407 hammer2_chain_t *ichain;
1408 hammer2_chain_t dummy;
1409 hammer2_key_t key = create_key;
1410 int keybits = create_bits;
1418 * Mark the parent modified so our base[] pointer remains valid
1419 * while we move entries.
1421 hammer2_chain_modify(hmp, parent);
1424 * Locate a free blockref in the parent's array
1426 switch(parent->bref.type) {
1427 case HAMMER2_BREF_TYPE_INODE:
1428 base = &parent->data->ipdata.u.blockset.blockref[0];
1429 count = HAMMER2_SET_COUNT;
1431 case HAMMER2_BREF_TYPE_INDIRECT:
1432 base = &parent->data->npdata.blockref[0];
1433 count = parent->bytes / sizeof(hammer2_blockref_t);
1435 case HAMMER2_BREF_TYPE_VOLUME:
1436 base = &hmp->voldata.sroot_blockset.blockref[0];
1437 count = HAMMER2_SET_COUNT;
1440 panic("hammer2_chain_create_indirect: "
1441 "unrecognized blockref type: %d",
1448 * Scan for an unallocated bref, also skipping any slots occupied
1449 * by in-memory chain elements that may not yet have been updated
1450 * in the parent's bref array.
1452 bzero(&dummy, sizeof(dummy));
1453 for (i = 0; i < count; ++i) {
1457 if (bref->type == 0) {
1459 chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead,
1463 bref = &chain->bref;
1467 * Expand our calculated key range (key, keybits) to fit
1468 * the scanned key. nkeybits represents the full range
1469 * that we will later cut in half (two halves @ nkeybits - 1).
1472 if (nkeybits < bref->keybits)
1473 nkeybits = bref->keybits;
1474 while ((~(((hammer2_key_t)1 << nkeybits) - 1) &
1475 (key ^ bref->key)) != 0) {
1480 * If the new key range is larger we have to determine
1481 * which side of the new key range the existing keys fall
1482 * under by checking the high bit, then collapsing the
1483 * locount into the hicount or vise-versa.
1485 if (keybits != nkeybits) {
1486 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) {
1497 * The newly scanned key will be in the lower half or the
1498 * higher half of the (new) key range.
1500 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key)
1507 * Adjust keybits to represent half of the full range calculated
1513 * Select whichever half contains the most elements. Theoretically
1514 * we can select either side as long as it contains at least one
1515 * element (in order to ensure that a free slot is present to hold
1516 * the indirect block).
1518 key &= ~(((hammer2_key_t)1 << keybits) - 1);
1519 if (hammer2_indirect_optimize) {
1521 * Insert node for least number of keys, this will arrange
1522 * the first few blocks of a large file or the first few
1523 * inodes in a directory with fewer indirect blocks when
1526 if (hicount < locount && hicount != 0)
1527 key |= (hammer2_key_t)1 << keybits;
1529 key &= ~(hammer2_key_t)1 << keybits;
1532 * Insert node for most number of keys, best for heavily
1535 if (hicount > locount)
1536 key |= (hammer2_key_t)1 << keybits;
1538 key &= ~(hammer2_key_t)1 << keybits;
1542 * How big should our new indirect block be? It has to be at least
1543 * as large as its parent.
1545 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE)
1546 nbytes = HAMMER2_IND_BYTES_MIN;
1548 nbytes = HAMMER2_IND_BYTES_MAX;
1549 if (nbytes < count * sizeof(hammer2_blockref_t))
1550 nbytes = count * sizeof(hammer2_blockref_t);
1553 * Ok, create our new indirect block
1555 dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT;
1556 dummy.bref.key = key;
1557 dummy.bref.keybits = keybits;
1558 dummy.bref.data_off = hammer2_bytes_to_radix(nbytes);
1559 ichain = hammer2_chain_alloc(hmp, &dummy.bref);
1560 ichain->flags |= HAMMER2_CHAIN_INITIAL;
1563 * Iterate the original parent and move the matching brefs into
1564 * the new indirect block.
1566 for (i = 0; i < count; ++i) {
1568 * For keying purposes access the bref from the media or
1569 * from our in-memory cache. In cases where the in-memory
1570 * cache overrides the media the keyrefs will be the same
1571 * anyway so we can avoid checking the cache when the media
1575 if (bref->type == 0) {
1577 chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead,
1579 if (chain == NULL) {
1581 * Select index indirect block is placed in
1583 if (ichain->index < 0)
1587 bref = &chain->bref;
1591 * Skip keys not in the chosen half (low or high), only bit
1592 * (keybits - 1) needs to be compared but for safety we
1593 * will compare all msb bits plus that bit again.
1595 if ((~(((hammer2_key_t)1 << keybits) - 1) &
1596 (key ^ bref->key)) != 0) {
1601 * This element is being moved, its slot is available
1602 * for our indirect block.
1604 if (ichain->index < 0)
1608 * Load the new indirect block by acquiring or allocating
1609 * the related chain entries, then simply move it to the
1610 * new parent (ichain).
1612 * Flagging the new chain entry MOVED will cause a flush
1613 * to synchronize its block into the new indirect block.
1614 * The chain is unlocked after being moved but needs to
1615 * retain a reference for the MOVED state
1617 * We must still set SUBMODIFIED in the parent but we do
1618 * that after the loop.
1620 * XXX we really need a lock here but we don't need the
1621 * data. NODATA feature needed.
1623 chain = hammer2_chain_get(hmp, parent, i,
1624 HAMMER2_LOOKUP_NOLOCK);
1625 SPLAY_REMOVE(hammer2_chain_splay, &parent->shead, chain);
1626 if (SPLAY_INSERT(hammer2_chain_splay, &ichain->shead, chain))
1627 panic("hammer2_chain_create_indirect: collision");
1628 chain->parent = ichain;
1629 bzero(&base[i], sizeof(base[i]));
1630 atomic_add_int(&parent->refs, -1);
1631 atomic_add_int(&ichain->refs, 1);
1632 if (chain->flags & HAMMER2_CHAIN_MOVED) {
1633 /* We don't need the ref from the chain_get */
1634 hammer2_chain_drop(hmp, chain);
1636 /* MOVED bit inherits the ref from the chain_get */
1637 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED);
1639 KKASSERT(parent->refs > 0);
1644 * Insert the new indirect block into the parent now that we've
1645 * cleared out some entries in the parent. We calculated a good
1646 * insertion index in the loop above (ichain->index).
1648 KKASSERT(ichain->index >= 0);
1649 if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, ichain))
1650 panic("hammer2_chain_create_indirect: ichain insertion");
1651 ichain->parent = parent;
1652 atomic_add_int(&parent->refs, 1);
1655 * Mark the new indirect block modified after insertion, which
1656 * will propagate up through parent all the way to the root and
1657 * also allocate the physical block in ichain for our caller,
1658 * and assign ichain->data to a pre-zero'd space (because there
1659 * is not prior data to copy into it).
1661 * We have to set SUBMODIFIED in ichain's flags manually so the
1662 * flusher knows it has to recurse through it to get to all of
1665 hammer2_chain_modify(hmp, ichain);
1666 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_SUBMODIFIED);
1669 * Figure out what to return.
1671 if (create_bits >= keybits) {
1673 * Key being created is way outside the key range,
1674 * return the original parent.
1676 hammer2_chain_put(hmp, ichain);
1677 } else if (~(((hammer2_key_t)1 << keybits) - 1) &
1678 (create_key ^ key)) {
1680 * Key being created is outside the key range,
1681 * return the original parent.
1683 hammer2_chain_put(hmp, ichain);
1686 * Otherwise its in the range, return the new parent.
1695 * Physically delete the specified chain element. Note that inodes with
1696 * open descriptors should not be deleted (as with other filesystems) until
1697 * the last open descriptor is closed.
1699 * This routine will remove the chain element from its parent and potentially
1700 * also recurse upward and delete indirect blocks which become empty as a
1703 * The caller must pass a pointer to the chain's parent, also locked and
1704 * referenced. (*parentp) will be modified in a manner similar to a lookup
1705 * or iteration when indirect blocks are also deleted as a side effect.
1708 hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t *parent,
1709 hammer2_chain_t *chain)
1711 hammer2_blockref_t *base;
1714 if (chain->parent != parent)
1715 panic("hammer2_chain_delete: parent mismatch");
1718 * Mark the parent modified so our base[] pointer remains valid
1719 * while we move entries.
1721 * Calculate the blockref reference in the parent
1723 hammer2_chain_modify(hmp, parent);
1725 switch(parent->bref.type) {
1726 case HAMMER2_BREF_TYPE_INODE:
1727 base = &parent->data->ipdata.u.blockset.blockref[0];
1728 count = HAMMER2_SET_COUNT;
1730 case HAMMER2_BREF_TYPE_INDIRECT:
1731 base = &parent->data->npdata.blockref[0];
1732 count = parent->bytes / sizeof(hammer2_blockref_t);
1734 case HAMMER2_BREF_TYPE_VOLUME:
1735 base = &hmp->voldata.sroot_blockset.blockref[0];
1736 count = HAMMER2_SET_COUNT;
1739 panic("hammer2_chain_delete: unrecognized blockref type: %d",
1746 * Disconnect the bref in the parent, remove the chain, and
1747 * disconnect in-memory fields from the parent.
1749 KKASSERT(chain->index >= 0 && chain->index < count);
1750 base += chain->index;
1751 bzero(base, sizeof(*base));
1753 SPLAY_REMOVE(hammer2_chain_splay, &parent->shead, chain);
1754 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED);
1755 atomic_add_int(&parent->refs, -1); /* for splay entry */
1758 chain->parent = NULL;
1759 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
1760 chain->u.ip->pip = NULL;
1763 * Nobody references the underlying object any more so we can
1764 * clear any pending modification(s) on it. This can theoretically
1765 * recurse downward but even just clearing the bit on this item
1766 * will effectively recurse if someone is doing a rm -rf and greatly
1767 * reduce the I/O required.
1769 * The MODIFIED1 bit is cleared but we have to remember the old state
1770 * in case this deletion is related to a rename. The ref on the
1771 * chain is shared by both modified flags.
1773 if (chain->flags & HAMMER2_CHAIN_MODIFIED1) {
1774 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
1775 atomic_set_int(&chain->flags, HAMMER2_CHAIN_WAS_MODIFIED);
1776 if ((chain->flags & HAMMER2_CHAIN_MODIFIED2) == 0)
1777 hammer2_chain_drop(hmp, chain);
1782 * Recursively flush the specified chain. The chain is locked and
1783 * referenced by the caller and will remain so on return.
1785 * This cannot be called with the volume header's vchain (yet).
1787 * PASS1 - clear the MODIFIED1 bit (and set the MODIFIED2 bit XXX)
1791 hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *chain)
1794 * Flush any children of this chain entry.
1796 if (chain->flags & HAMMER2_CHAIN_SUBMODIFIED) {
1797 hammer2_blockref_t *base;
1798 hammer2_chain_t *scan;
1799 hammer2_chain_t *next;
1801 int submodified = 0;
1804 * Modifications to the children will propagate up, forcing
1805 * us to become modified and copy-on-write too.
1807 * Clear SUBMODIFIED now, races during the flush will re-set
1810 hammer2_chain_modify(hmp, chain);
1811 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_SUBMODIFIED);
1814 * The blockref in the parent's array must be repointed at
1815 * the new block allocated by the child after its flush.
1817 * Calculate the base of the array.
1819 switch(chain->bref.type) {
1820 case HAMMER2_BREF_TYPE_INODE:
1821 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
1822 base = &chain->data->ipdata.u.blockset.blockref[0];
1823 count = HAMMER2_SET_COUNT;
1825 case HAMMER2_BREF_TYPE_INDIRECT:
1826 base = &chain->data->npdata.blockref[0];
1827 count = chain->bytes / sizeof(hammer2_blockref_t);
1829 case HAMMER2_BREF_TYPE_VOLUME:
1830 KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
1831 base = &hmp->voldata.sroot_blockset.blockref[0];
1832 count = HAMMER2_SET_COUNT;
1836 panic("hammer2_chain_get: unrecognized blockref type: %d",
1841 * Flush the children and update the blockrefs in the parent.
1842 * Be careful of ripouts during the loop.
1844 next = SPLAY_MIN(hammer2_chain_splay, &chain->shead);
1845 while ((scan = next) != NULL) {
1846 next = SPLAY_NEXT(hammer2_chain_splay, &chain->shead,
1848 if (scan->flags & (HAMMER2_CHAIN_SUBMODIFIED |
1849 HAMMER2_CHAIN_MODIFIED1 |
1850 HAMMER2_CHAIN_MOVED)) {
1851 hammer2_chain_ref(hmp, scan);
1852 hammer2_chain_lock(hmp, scan);
1853 hammer2_chain_flush_pass1(hmp, scan);
1854 if (scan->flags & (HAMMER2_CHAIN_SUBMODIFIED |
1855 HAMMER2_CHAIN_MODIFIED1)) {
1858 KKASSERT(scan->index < count);
1859 base[scan->index] = scan->bref;
1860 if (scan->flags & HAMMER2_CHAIN_MOVED) {
1861 atomic_clear_int(&scan->flags,
1862 HAMMER2_CHAIN_MOVED);
1863 hammer2_chain_drop(hmp, scan);
1866 hammer2_chain_put(hmp, scan);
1870 atomic_set_int(&chain->flags,
1871 HAMMER2_CHAIN_SUBMODIFIED);
1876 * Flush this chain entry only if it is marked modified.
1878 if ((chain->flags & HAMMER2_CHAIN_MODIFIED1) == 0)
1882 * Clear MODIFIED1 and set HAMMER2_CHAIN_MOVED. The MODIFIED{1,2}
1883 * bits own a single parent ref and the MOVED bit owns its own
1886 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
1887 if (chain->flags & HAMMER2_CHAIN_MOVED) {
1888 if ((chain->flags & HAMMER2_CHAIN_MODIFIED2) == 0)
1889 hammer2_chain_drop(hmp, chain);
1891 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED);
1892 if (chain->flags & HAMMER2_CHAIN_MODIFIED2)
1893 hammer2_chain_ref(hmp, chain);
1897 * If this is part of a recursive flush we can go ahead and write
1898 * out the buffer cache buffer and pass a new bref back up the chain.
1900 * This will never be a volume header.
1902 if (chain != &hmp->vchain) {
1903 hammer2_blockref_t *bref;
1904 hammer2_off_t off_hi;
1910 KKASSERT(chain->data != NULL);
1911 bref = &chain->bref;
1913 off_hi = bref->data_off & HAMMER2_OFF_MASK_HI;
1914 off_lo = (size_t)bref->data_off & HAMMER2_OFF_MASK_LO;
1915 bytes = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
1916 KKASSERT(off_hi != 0); /* not the root volume header */
1920 * The data is mapped directly to the bp. Dirty the
1921 * bp so it gets flushed out by the kernel later on.
1926 * The data is embedded, we have to acquire the
1927 * buffer cache buffer and copy the data into it.
1930 error = bread(hmp->devvp, off_hi,
1931 HAMMER2_PBUFSIZE, &bp);
1932 KKASSERT(error == 0); /* XXX */
1935 * Copy the data to the buffer, mark the buffer
1936 * dirty, and convert the chain to unmodified.
1938 bcopy(chain->data, (char *)bp->b_data + off_lo, bytes);
1942 chain->bref.check.iscsi32.value =
1943 hammer2_icrc32(chain->data, bytes);
1947 hammer2_blockref_t *bref;
1949 bref = &chain->bref;
1951 switch(bref->type) {
1952 case HAMMER2_BREF_TYPE_VOLUME:
1953 KKASSERT(chain->data != NULL);
1954 KKASSERT(chain->bp == NULL);
1956 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]=
1958 (char *)&hmp->voldata +
1959 HAMMER2_VOLUME_ICRC1_OFF,
1960 HAMMER2_VOLUME_ICRC1_SIZE);
1961 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]=
1963 (char *)&hmp->voldata +
1964 HAMMER2_VOLUME_ICRC0_OFF,
1965 HAMMER2_VOLUME_ICRC0_SIZE);
1966 hmp->voldata.icrc_volheader =
1968 (char *)&hmp->voldata +
1969 HAMMER2_VOLUME_ICRCVH_OFF,
1970 HAMMER2_VOLUME_ICRCVH_SIZE);
1978 * PASS2 - not yet implemented (should be called only with the root chain?)
1981 hammer2_chain_flush_pass2(hammer2_mount_t *hmp, hammer2_chain_t *chain)
1987 hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain)
1989 hammer2_chain_flush_pass1(hmp, chain);