2 * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
45 * Adding a ref to an inode is only legal if the inode already has at least
49 hammer2_inode_ref(hammer2_inode_t *ip)
51 atomic_add_int(&ip->refs, 1);
55 * Drop an inode reference, freeing the inode when the last reference goes
59 hammer2_inode_drop(hammer2_inode_t *ip)
63 hammer2_chain_t *chain;
70 if (atomic_cmpset_int(&ip->refs, 1, 0)) {
71 KKASSERT(ip->topo_cst.count == 0);
80 hammer2_chain_drop(chain);
83 * We have to drop pip (if non-NULL) to
84 * dispose of our implied reference from
85 * ip->pip. We can simply loop on it.
87 kfree(ip, hmp->minode);
94 if (atomic_cmpset_int(&ip->refs, refs, refs - 1))
101 * Get the vnode associated with the given inode, allocating the vnode if
102 * necessary. The vnode will be returned exclusively locked.
104 * The caller must lock the inode (shared or exclusive).
106 * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
110 hammer2_igetv(hammer2_inode_t *ip, int *errorp)
112 hammer2_inode_data_t *ipdata;
113 hammer2_pfsmount_t *pmp;
118 KKASSERT(pmp != NULL);
120 ipdata = &ip->chain->data->ipdata;
124 * Attempt to reuse an existing vnode assignment. It is
125 * possible to race a reclaim so the vget() may fail. The
126 * inode must be unlocked during the vget() to avoid a
127 * deadlock against a reclaim.
132 * Inode must be unlocked during the vget() to avoid
133 * possible deadlocks, but leave the ip ref intact.
135 * vnode is held to prevent destruction during the
136 * vget(). The vget() can still fail if we lost
137 * a reclaim race on the vnode.
139 vhold_interlocked(vp);
140 ostate = hammer2_inode_lock_temp_release(ip);
141 if (vget(vp, LK_EXCLUSIVE)) {
143 hammer2_inode_lock_restore(ip, ostate);
146 hammer2_inode_lock_restore(ip, ostate);
148 /* vp still locked and ref from vget */
150 kprintf("hammer2: igetv race %p/%p\n",
160 * No vnode exists, allocate a new vnode. Beware of
161 * allocation races. This function will return an
162 * exclusively locked and referenced vnode.
164 *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
166 kprintf("hammer2: igetv getnewvnode failed %d\n",
173 * Lock the inode and check for an allocation race.
175 ostate = hammer2_inode_lock_upgrade(ip);
176 if (ip->vp != NULL) {
179 hammer2_inode_lock_restore(ip, ostate);
183 switch (ipdata->type) {
184 case HAMMER2_OBJTYPE_DIRECTORY:
187 case HAMMER2_OBJTYPE_REGFILE:
189 vinitvmio(vp, ipdata->size,
191 (int)ipdata->size & HAMMER2_LBUFMASK);
193 case HAMMER2_OBJTYPE_SOFTLINK:
195 * XXX for now we are using the generic file_read
196 * and file_write code so we need a buffer cache
200 vinitvmio(vp, ipdata->size,
202 (int)ipdata->size & HAMMER2_LBUFMASK);
206 panic("hammer2: unhandled objtype %d", ipdata->type);
210 if (ip == pmp->iroot)
211 vsetflags(vp, VROOT);
215 hammer2_inode_ref(ip); /* vp association */
216 hammer2_inode_lock_restore(ip, ostate);
221 * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
223 if (hammer2_debug & 0x0002) {
224 kprintf("igetv vp %p refs %d aux %d\n",
225 vp, vp->v_sysref.refcnt, vp->v_auxrefs);
231 * The passed-in chain must be locked and the returned inode will also be
232 * locked. A ref is added to both the chain and the inode.
234 * The hammer2_inode structure regulates the interface between the high level
235 * kernel VNOPS API and the filesystem backend (the chains).
237 * NOTE! This routine allocates the hammer2_inode structure
238 * unconditionally, and thus there might be several which
239 * are associated with the same chain. Particularly for hardlinks
240 * but this can also happen temporarily for normal files and
243 * WARNING! This routine sucks up the chain's lock (makes it part of the
244 * inode lock from the point of view of the inode lock API),
245 * so callers need to be careful.
247 * WARNING! The mount code is allowed to pass dip == NULL for iroot and
248 * is allowed to pass pmp == NULL and dip == NULL for sroot.
251 hammer2_inode_get(hammer2_mount_t *hmp, hammer2_pfsmount_t *pmp,
252 hammer2_inode_t *dip, hammer2_chain_t *chain)
254 hammer2_inode_t *nip;
256 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
258 nip = kmalloc(sizeof(*nip), hmp->minode, M_WAITOK | M_ZERO);
261 hammer2_chain_ref(chain); /* nip->chain */
262 nip->pip = dip; /* can be NULL */
264 hammer2_inode_ref(dip); /* ref dip for nip->pip */
270 * ref and lock on nip gives it state compatible to after a
271 * hammer2_inode_lock_ex() call.
274 ccms_cst_init(&nip->topo_cst, &nip->chain);
275 ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE);
276 /* combination of thread lock and chain lock == inode lock */
282 * Put away an inode, unlocking it and disconnecting it from its chain.
284 * The inode must be exclusively locked on call and non-recursed, with
285 * at least 2 refs (one belonging to the exclusive lock, and one additional
286 * ref belonging to the caller).
288 * Upon return the inode typically has one ref remaining which the caller
292 hammer2_inode_put(hammer2_inode_t *ip)
294 hammer2_inode_t *pip;
295 hammer2_chain_t *chain;
298 * Disconnect and unlock chain
300 KKASSERT(ip->refs >= 2);
301 KKASSERT(ip->topo_cst.count == -1); /* one excl lock allowed */
302 if ((chain = ip->chain) != NULL) {
304 hammer2_inode_unlock_ex(ip);
305 hammer2_chain_unlock(chain); /* because ip->chain now NULL */
306 hammer2_chain_drop(chain); /* from *_get() */
312 if ((pip = ip->pip) != NULL) {
314 hammer2_inode_drop(pip);
319 * Create a new inode in the specified directory using the vattr to
320 * figure out the type of inode.
322 * If no error occurs the new inode with its chain locked is returned in
323 * *nipp, otherwise an error is returned and *nipp is set to NULL.
325 * If vap and/or cred are NULL the related fields are not set and the
326 * inode type defaults to a directory. This is used when creating PFSs
327 * under the super-root, so the inode number is set to 1 in this case.
329 * dip is not locked on entry.
332 hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip,
333 struct vattr *vap, struct ucred *cred,
334 const uint8_t *name, size_t name_len,
337 hammer2_inode_data_t *dipdata;
338 hammer2_inode_data_t *nipdata;
339 hammer2_mount_t *hmp;
340 hammer2_chain_t *chain;
341 hammer2_chain_t *parent;
342 hammer2_inode_t *nip;
351 lhc = hammer2_dirhash(name, name_len);
355 * Locate the inode or indirect block to create the new
356 * entry in. At the same time check for key collisions
357 * and iterate until we don't get one.
360 hammer2_inode_lock_ex(dip);
361 dipdata = &dip->chain->data->ipdata;
362 dip_uid = dipdata->uid;
363 dip_gid = dipdata->gid;
364 dip_mode = dipdata->mode;
366 parent = hammer2_chain_lookup_init(dip->chain, 0);
369 chain = hammer2_chain_lookup(&parent, lhc, lhc, 0);
372 if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
374 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
376 hammer2_chain_unlock(chain);
381 error = hammer2_chain_create(trans, parent, &chain,
383 HAMMER2_BREF_TYPE_INODE,
384 HAMMER2_INODE_BYTES);
388 * Cleanup and handle retries.
390 if (error == EAGAIN) {
391 hammer2_chain_ref(parent);
392 hammer2_chain_lookup_done(parent);
393 hammer2_inode_unlock_ex(dip);
394 hammer2_chain_wait(parent);
395 hammer2_chain_drop(parent);
398 hammer2_chain_lookup_done(parent);
399 hammer2_inode_unlock_ex(dip);
402 KKASSERT(chain == NULL);
408 * Set up the new inode.
410 * NOTE: *_get() integrates chain's lock into the inode lock.
412 * NOTE: Only one new inode can currently be created per
413 * transaction. If the need arises we can adjust
414 * hammer2_trans_init() to allow more.
416 nip = hammer2_inode_get(dip->hmp, dip->pmp, dip, chain);
417 nipdata = &chain->data->ipdata;
420 KKASSERT(trans->inodes_created == 0);
421 nipdata->type = hammer2_get_obj_type(vap->va_type);
422 nipdata->inum = trans->sync_tid;
423 ++trans->inodes_created;
425 nipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
428 nipdata->version = HAMMER2_INODE_VERSION_ONE;
429 hammer2_update_time(&nipdata->ctime);
430 nipdata->mtime = nipdata->ctime;
432 nipdata->mode = vap->va_mode;
436 xuid = hammer2_to_unix_xid(&dip_uid);
437 xuid = vop_helper_create_uid(dip->pmp->mp,
445 if (vap->va_vaflags & VA_UID_UUID_VALID)
446 nipdata->uid = vap->va_uid_uuid;
447 else if (vap->va_uid != (uid_t)VNOVAL)
448 hammer2_guid_to_uuid(&nipdata->uid, vap->va_uid);
450 hammer2_guid_to_uuid(&nipdata->uid, xuid);
452 if (vap->va_vaflags & VA_GID_UUID_VALID)
453 nipdata->gid = vap->va_gid_uuid;
454 else if (vap->va_gid != (gid_t)VNOVAL)
455 hammer2_guid_to_uuid(&nipdata->gid, vap->va_gid);
457 nipdata->gid = dip_gid;
461 * Regular files and softlinks allow a small amount of data to be
462 * directly embedded in the inode. This flag will be cleared if
463 * the size is extended past the embedded limit.
465 if (nipdata->type == HAMMER2_OBJTYPE_REGFILE ||
466 nipdata->type == HAMMER2_OBJTYPE_SOFTLINK) {
467 nipdata->op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
470 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
471 bcopy(name, nipdata->filename, name_len);
472 nipdata->name_key = lhc;
473 nipdata->name_len = name_len;
479 * Create a duplicate of (ochain) in the specified target directory (dip).
480 * ochain must represent an inode. The new chain is returned locked and
484 hammer2_inode_duplicate(hammer2_trans_t *trans, hammer2_chain_t *ochain,
485 hammer2_inode_t *dip, int *errorp)
487 hammer2_inode_data_t *nipdata;
488 hammer2_mount_t *hmp;
489 hammer2_chain_t *parent;
490 hammer2_chain_t *chain;
495 lhc = ochain->data->ipdata.inum;
496 KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
499 * Locate the inode or indirect block to create the new
502 * There should be no key collisions with invisible inode keys.
505 parent = hammer2_chain_lookup_init(dip->chain, 0);
506 chain = hammer2_chain_lookup(&parent, lhc, lhc, 0);
508 hammer2_chain_unlock(chain);
514 * Create entry in common parent directory.
517 *errorp = hammer2_chain_create(trans, parent, &chain,
519 HAMMER2_BREF_TYPE_INODE,/* n/a */
520 HAMMER2_INODE_BYTES); /* n/a */
524 * Cleanup and handle retries.
526 if (*errorp == EAGAIN) {
527 hammer2_chain_ref(parent);
528 hammer2_chain_lookup_done(parent);
529 hammer2_chain_wait(parent);
530 hammer2_chain_drop(parent);
534 hammer2_chain_lookup_done(parent);
537 * Handle the error case
540 KKASSERT(chain == NULL);
545 * XXX This is currently a horrible hack. Well, if we wanted to
546 * duplicate a file, i.e. as in a snapshot, we definitely
547 * would have to flush it first.
549 * For hardlink target generation we can theoretically move any
550 * active chain structures without flushing, but that gets really
551 * iffy for code which follows chain->parent and ip->pip links.
553 * XXX only works with files. Duplicating a directory hierarchy
554 * requires a flush but doesn't deal with races post-flush.
555 * Well, it would work I guess, but you might catch some files
558 * We cannot leave ochain with any in-memory chains because (for a
559 * hardlink), ochain will become a OBJTYPE_HARDLINK which is just a
560 * pointer to the real hardlink's inum and can't have any sub-chains.
561 * XXX might be 0-ref chains left.
563 hammer2_chain_flush(trans, ochain);
564 /*KKASSERT(RB_EMPTY(&ochain.rbhead));*/
566 hammer2_chain_modify(trans, chain, 0);
567 nipdata = &chain->data->ipdata;
568 *nipdata = ochain->data->ipdata;
571 * Directory entries are inodes but this is a hidden hardlink
572 * target. The name isn't used but to ease debugging give it
573 * a name after its inode number.
575 ksnprintf(nipdata->filename, sizeof(nipdata->filename),
576 "0x%016jx", (intmax_t)nipdata->inum);
577 nipdata->name_len = strlen(nipdata->filename);
578 nipdata->name_key = lhc;
584 * Connect the target inode to the media topology at (dip, name, len).
585 * This function creates a directory entry and replace (*chainp).
587 * The caller usually holds the related inode exclusive locked through this
588 * call and is also responsible for replacing ip->chain after we return.
590 * If (*chainp) was marked DELETED then it represents a terminus inode
591 * with no other nlinks, we can simply duplicate the chain (in-memory
592 * chain structures cannot be moved within the in-memory topology, only
593 * duplicated, but the duplicate uses the same bref).
595 * if (*chainp) is not marked DELETED then it represents a hardlink
596 * terminus which still has a non-zero nlink count. Instead of duplicating
597 * it (which would be like a snapshot), we need to create a
598 * OBJTYPE_HARDLINK directory entry which references (*chainp)'s inode
599 * number and bump (*chainp)'s nlinks. In this situation we return
600 * the terminus as *chainp.
602 * (*chainp) is adjusted if necessary and returned locked. If different,
603 * the original (*chainp) is unlocked. Note that the (*chainp) that is
604 * returned is always the hardlink terminus (the actual inode), which
605 * might reside in some parent directory. It will not be the
606 * OBJTYPE_HARDLINK pointer.
608 * WARNING! The caller is likely holding ip/ip->chain locked exclusively.
609 * Replacing ip->chain here would create confusion so we leave
610 * it to the caller to do that.
612 * (The caller is expected to hold the related inode exclusively)
615 hammer2_inode_connect(hammer2_trans_t *trans, hammer2_inode_t *dip,
616 hammer2_chain_t **chainp,
617 const uint8_t *name, size_t name_len)
619 hammer2_inode_data_t *ipdata;
620 hammer2_mount_t *hmp;
621 hammer2_chain_t *nchain;
622 hammer2_chain_t *parent;
623 hammer2_chain_t *ochain;
633 * Since ochain is either disconnected from the topology or represents
634 * a hardlink terminus which is always a parent of or equal to dip,
635 * we should be able to safely lock dip->chain for our setup.
637 parent = hammer2_chain_lookup_init(dip->chain, 0);
639 lhc = hammer2_dirhash(name, name_len);
640 hlink = ((ochain->flags & HAMMER2_CHAIN_DELETED) == 0);
641 kprintf("reconnect hlink=%d name=%*.*s\n",
642 hlink, (int)name_len, (int)name_len, name);
645 * In fake mode flush oip so we can just snapshot it downbelow.
646 * A flush is not otherwise needed as the new chain inherits
647 * all active children of the old chain (they will share the same
650 if (hlink && hammer2_hardlink_enable < 0)
651 hammer2_chain_flush(trans, ochain);
654 * Locate the inode or indirect block to create the new
655 * entry in. At the same time check for key collisions
656 * and iterate until we don't get one.
660 nchain = hammer2_chain_lookup(&parent, lhc, lhc, 0);
663 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
665 hammer2_chain_unlock(nchain);
671 * Passing a non-NULL chain to hammer2_chain_create() reconnects the
672 * existing chain instead of creating a new one. The chain's bref
673 * will be properly updated.
678 * Hardlink pointer needed, create totally fresh
681 KKASSERT(nchain == NULL);
682 error = hammer2_chain_create(trans, parent, &nchain,
684 HAMMER2_BREF_TYPE_INODE,
685 HAMMER2_INODE_BYTES);
688 * Original inode reconnected, duplicate as a
689 * new directory entry, leave unconnected and
690 * then call chain_create() to connect it.
694 hammer2_chain_duplicate(trans, NULL, -1, &nchain);
695 error = hammer2_chain_create(trans, parent, &nchain,
697 HAMMER2_BREF_TYPE_INODE,
698 HAMMER2_INODE_BYTES);
705 KKASSERT(error != EAGAIN);
706 hammer2_chain_lookup_done(parent);
710 * nchain should be NULL on error, leave ochain (== *chainp) alone.
713 KKASSERT(nchain == NULL);
718 * Directory entries are inodes so if the name has changed we have
719 * to update the inode.
721 * When creating an OBJTYPE_HARDLINK entry remember to unlock the
722 * chain, the caller will access the hardlink via the actual hardlink
723 * target file and not the hardlink pointer entry, so we must still
726 if (hlink && hammer2_hardlink_enable >= 0) {
728 * Create the HARDLINK pointer. oip represents the hardlink
729 * target in this situation.
731 * We will return ochain (the hardlink target).
733 hammer2_chain_modify(trans, nchain, 0);
734 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
735 ipdata = &nchain->data->ipdata;
736 bcopy(name, ipdata->filename, name_len);
737 ipdata->name_key = lhc;
738 ipdata->name_len = name_len;
739 ipdata->target_type = ochain->data->ipdata.type;
740 ipdata->type = HAMMER2_OBJTYPE_HARDLINK;
741 ipdata->inum = ochain->data->ipdata.inum;
743 kprintf("created hardlink %*.*s\n",
744 (int)name_len, (int)name_len, name);
745 hammer2_chain_unlock(nchain);
748 } else if (hlink && hammer2_hardlink_enable < 0) {
750 * Create a snapshot (hardlink fake mode for debugging).
751 * (ochain already flushed above so we can just copy the
754 * Since this is a snapshot we return nchain in the fake
757 hammer2_chain_modify(trans, nchain, 0);
758 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
759 ipdata = &nchain->data->ipdata;
760 *ipdata = ochain->data->ipdata;
761 bcopy(name, ipdata->filename, name_len);
762 ipdata->name_key = lhc;
763 ipdata->name_len = name_len;
764 kprintf("created fake hardlink %*.*s\n",
765 (int)name_len, (int)name_len, name);
768 * We are reconnecting a previously DELETED node in a new
769 * location. nchain is a duplication of the deleted node.
771 * We must fixup the name stored in oip.
773 hammer2_chain_modify(trans, nchain, 0);
774 ipdata = &nchain->data->ipdata;
776 if (ipdata->name_len != name_len ||
777 bcmp(ipdata->filename, name, name_len) != 0) {
778 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
779 bcopy(name, ipdata->filename, name_len);
780 ipdata->name_key = lhc;
781 ipdata->name_len = name_len;
787 * We are replacing ochain with nchain, unlock ochain. In the
788 * case where ochain is left unchanged the code above sets
789 * nchain to ochain and ochain to NULL, resulting in a NOP here.
792 hammer2_chain_unlock(ochain);
799 * Caller must hold exactly ONE exclusive lock on the inode. *nchainp
800 * must be exclusive locked (its own exclusive lock even if it is the
801 * same as ip->chain).
803 * This function replaces ip->chain. The exclusive lock on the passed
804 * nchain is inherited by the inode and the caller becomes responsible
805 * for unlocking it when the caller unlocks the inode.
807 * ochain was locked by the caller indirectly via the inode lock. Since
808 * ip->chain is being repointed, we become responsible for cleaning up
811 * Return *nchainp = NULL as a safety.
814 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_chain_t **nchainp)
816 hammer2_chain_t *nchain = *nchainp;
817 hammer2_chain_t *ochain;
820 * Repoint ip->chain if necessary.
822 * (Inode must be locked exclusively by parent)
825 if (ochain != nchain) {
826 hammer2_chain_ref(nchain); /* for ip->chain */
829 hammer2_chain_unlock(ochain);
830 hammer2_chain_drop(ochain); /* for ip->chain */
833 hammer2_chain_unlock(nchain);
839 * Unlink the file from the specified directory inode. The directory inode
840 * does not need to be locked. The caller should pass a non-NULL (ip)
841 * representing the object being removed only if the related vnode is
842 * potentially inactive (not referenced in the caller's active path),
843 * so we can vref/vrele it to trigger the VOP_INACTIVE path and properly
846 * isdir determines whether a directory/non-directory check should be made.
847 * No check is made if isdir is set to -1.
849 * NOTE! This function does not prevent the underlying file from still
850 * being used if it has other refs (such as from an inode, or if it's
851 * chain is manually held). However, the caller is responsible for
852 * fixing up ip->chain if e.g. a rename occurs (see chain_duplicate()).
855 hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
856 const uint8_t *name, size_t name_len, int isdir)
858 hammer2_inode_data_t *ipdata;
859 hammer2_mount_t *hmp;
860 hammer2_chain_t *parent;
861 hammer2_chain_t *ochain;
862 hammer2_chain_t *chain;
863 hammer2_chain_t *dparent;
864 hammer2_chain_t *dchain;
874 lhc = hammer2_dirhash(name, name_len);
877 * Search for the filename in the directory
879 hammer2_inode_lock_ex(dip);
881 parent = hammer2_chain_lookup_init(dip->chain, 0);
882 chain = hammer2_chain_lookup(&parent,
883 lhc, lhc + HAMMER2_DIRHASH_LOMASK,
886 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
887 name_len == chain->data->ipdata.name_len &&
888 bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
891 chain = hammer2_chain_next(&parent, chain,
892 lhc, lhc + HAMMER2_DIRHASH_LOMASK,
895 hammer2_inode_unlock_ex(dip); /* retain parent */
898 * Not found or wrong type (isdir < 0 disables the type check).
899 * If a hardlink pointer, type checks use the hardlink target.
905 if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK)
906 type = chain->data->ipdata.target_type;
908 if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
912 if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
918 * Hardlink must be resolved. We can't hold parent locked while we
919 * do this or we could deadlock.
921 * On success chain will be adjusted to point at the hardlink target
922 * and ochain will point to the hardlink pointer in the original
923 * directory. Otherwise chain remains pointing to the original.
925 if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) {
926 KKASSERT(parent_ref == 0);
927 hammer2_chain_unlock(parent);
929 error = hammer2_hardlink_find(dip, &chain, &ochain);
933 * If this is a directory the directory must be empty. However, if
934 * isdir < 0 we are doing a rename and the directory does not have
937 * NOTE: We check the full key range here which covers both visible
938 * and invisible entries. Theoretically there should be no
939 * invisible (hardlink target) entries if there are no visible
942 if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir >= 0) {
943 dparent = hammer2_chain_lookup_init(chain, 0);
944 dchain = hammer2_chain_lookup(&dparent,
945 0, (hammer2_key_t)-1,
946 HAMMER2_LOOKUP_NODATA);
948 hammer2_chain_unlock(dchain);
949 hammer2_chain_lookup_done(dparent);
953 hammer2_chain_lookup_done(dparent);
959 * Ok, we can now unlink the chain. We always decrement nlinks even
960 * if the entry can be deleted in case someone has the file open and
963 * The chain itself will no longer be in the on-media topology but
964 * can still be flushed to the media (e.g. if an open descriptor
965 * remains). When the last vnode/ip ref goes away the chain will
966 * be marked unmodified, avoiding any further (now unnecesary) I/O.
968 * A non-NULL ochain indicates a hardlink.
972 * Delete the original hardlink pointer.
974 * NOTE: parent from above is NULL when ochain != NULL
975 * so we can reuse it.
977 hammer2_chain_lock(ochain, HAMMER2_RESOLVE_ALWAYS);
980 parent = ochain->parent;
981 hammer2_chain_ref(parent);
982 hammer2_chain_unlock(ochain);
983 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS);
984 hammer2_chain_lock(ochain, HAMMER2_RESOLVE_ALWAYS);
985 if (ochain->parent == parent)
987 hammer2_chain_unlock(parent);
988 hammer2_chain_drop(parent);
991 hammer2_chain_delete(trans, parent, ochain);
992 hammer2_chain_unlock(ochain);
993 hammer2_chain_unlock(parent);
994 hammer2_chain_drop(parent);
998 * Then decrement nlinks on hardlink target, deleting
999 * the target when nlinks drops to 0.
1001 if (chain->data->ipdata.nlinks == 1) {
1002 dparent = chain->parent;
1003 hammer2_chain_ref(chain);
1004 hammer2_chain_unlock(chain);
1005 hammer2_chain_lock(dparent, HAMMER2_RESOLVE_ALWAYS);
1006 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
1007 hammer2_chain_drop(chain);
1008 hammer2_chain_modify(trans, chain, 0);
1009 --chain->data->ipdata.nlinks;
1010 hammer2_chain_delete(trans, dparent, chain);
1011 hammer2_chain_unlock(dparent);
1013 hammer2_chain_modify(trans, chain, 0);
1014 --chain->data->ipdata.nlinks;
1018 * Otherwise this was not a hardlink and we can just
1019 * remove the entry and decrement nlinks.
1021 * NOTE: *_get() integrates chain's lock into the inode lock.
1023 ipdata = &chain->data->ipdata;
1024 hammer2_chain_modify(trans, chain, 0);
1026 hammer2_chain_delete(trans, parent, chain);
1032 hammer2_chain_unlock(chain);
1034 hammer2_chain_lookup_done(parent);
1036 hammer2_chain_drop(parent);
1039 hammer2_chain_drop(ochain);
1045 * Calculate the allocation size for the file fragment straddling EOF
1048 hammer2_inode_calc_alloc(hammer2_key_t filesize)
1050 int frag = (int)filesize & HAMMER2_PBUFMASK;
1055 for (radix = HAMMER2_MINALLOCRADIX; frag > (1 << radix); ++radix)
1061 * Given an exclusively locked inode we consolidate its chain for hardlink
1062 * creation, adding (nlinks) to the file's link count and potentially
1063 * relocating the file to a directory common to ip->pip and tdip.
1065 * Returns a locked chain in (*chainp) (the chain's lock is in addition to
1066 * any lock it might already have due to the inode being locked). *chainp
1067 * is set unconditionally and its previous contents can be garbage.
1069 * The caller is responsible for replacing ip->chain, not us. For certain
1070 * operations such as renames the caller may do additional manipulation
1071 * of the chain before replacing ip->chain.
1074 hammer2_hardlink_consolidate(hammer2_trans_t *trans, hammer2_inode_t *ip,
1075 hammer2_chain_t **chainp,
1076 hammer2_inode_t *tdip, int nlinks)
1078 hammer2_inode_data_t *ipdata;
1079 hammer2_mount_t *hmp;
1080 hammer2_inode_t *fdip;
1081 hammer2_inode_t *cdip;
1082 hammer2_chain_t *chain;
1083 hammer2_chain_t *nchain;
1084 hammer2_chain_t *parent;
1088 * Extra lock on chain so it can be returned locked.
1093 error = hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
1094 KKASSERT(error == 0);
1096 if (nlinks == 0 && /* no hardlink needed */
1097 (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE)) {
1101 if (hammer2_hardlink_enable < 0) { /* fake hardlinks */
1106 if (hammer2_hardlink_enable == 0) { /* disallow hardlinks */
1107 hammer2_chain_unlock(chain);
1113 * cdip will be returned with a ref, but not locked.
1116 cdip = hammer2_inode_common_parent(fdip, tdip);
1119 * If no change in the hardlink's target directory is required and
1120 * this is already a hardlink target, all we need to do is adjust
1124 (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1126 hammer2_chain_modify(trans, chain, 0);
1127 chain->data->ipdata.nlinks += nlinks;
1135 * We either have to move an existing hardlink target or we have
1136 * to create a fresh hardlink target.
1138 * Hardlink targets are hidden inodes in a parent directory common
1139 * to all directory entries referencing the hardlink.
1141 nchain = hammer2_inode_duplicate(trans, chain, cdip, &error);
1144 * Bump nlinks on duplicated hidden inode.
1146 hammer2_chain_modify(trans, nchain, 0);
1147 nchain->data->ipdata.nlinks += nlinks;
1150 * If the old chain is not a hardlink target then replace
1151 * it with a OBJTYPE_HARDLINK pointer.
1153 * If the old chain IS a hardlink target then delete it.
1155 if (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE) {
1156 hammer2_chain_modify(trans, chain, 0);
1157 ipdata = &chain->data->ipdata;
1158 ipdata->target_type = ipdata->type;
1159 ipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1167 bzero(&ipdata->uid, sizeof(ipdata->uid));
1168 bzero(&ipdata->gid, sizeof(ipdata->gid));
1169 ipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1170 ipdata->cap_flags = 0;
1174 ipdata->iparent = 0; /* XXX */
1175 ipdata->pfs_type = 0;
1176 ipdata->pfs_inum = 0;
1177 bzero(&ipdata->pfs_clid, sizeof(ipdata->pfs_clid));
1178 bzero(&ipdata->pfs_fsid, sizeof(ipdata->pfs_fsid));
1179 ipdata->data_quota = 0;
1180 ipdata->data_count = 0;
1181 ipdata->inode_quota = 0;
1182 ipdata->inode_count = 0;
1183 ipdata->attr_tid = 0;
1184 ipdata->dirent_tid = 0;
1185 bzero(&ipdata->u, sizeof(ipdata->u));
1186 /* XXX transaction ids */
1188 kprintf("DELETE INVISIBLE\n");
1190 parent = chain->parent;
1191 hammer2_chain_ref(parent);
1192 hammer2_chain_ref(chain);
1193 hammer2_chain_unlock(chain);
1194 hammer2_chain_lock(parent,
1195 HAMMER2_RESOLVE_ALWAYS);
1196 hammer2_chain_lock(chain,
1197 HAMMER2_RESOLVE_ALWAYS);
1198 hammer2_chain_drop(chain);
1199 if (chain->parent == parent)
1201 hammer2_chain_unlock(parent);
1202 hammer2_chain_drop(parent);
1204 hammer2_chain_delete(trans, parent, chain);
1205 hammer2_chain_unlock(parent);
1206 hammer2_chain_drop(parent);
1210 * Return the new chain.
1212 hammer2_chain_unlock(chain);
1218 hammer2_chain_unlock(chain);
1223 * Cleanup, chain/nchain already dealt with.
1226 hammer2_inode_drop(cdip);
1232 * If (*ochainp) is non-NULL it points to the forward OBJTYPE_HARDLINK
1233 * inode while (*chainp) points to the resolved (hidden hardlink
1234 * target) inode. In this situation when nlinks is 1 we wish to
1235 * deconsolidate the hardlink, moving it back to the directory that now
1236 * represents the only remaining link.
1239 hammer2_hardlink_deconsolidate(hammer2_trans_t *trans,
1240 hammer2_inode_t *dip,
1241 hammer2_chain_t **chainp,
1242 hammer2_chain_t **ochainp)
1244 if (*ochainp == NULL)
1251 * The caller presents a locked *chainp pointing to a HAMMER2_BREF_TYPE_INODE
1252 * with an obj_type of HAMMER2_OBJTYPE_HARDLINK. This routine will gobble
1253 * the *chainp and return a new locked *chainp representing the file target
1254 * (the original *chainp will be unlocked).
1256 * When a match is found the chain representing the original HARDLINK
1257 * will be returned in *ochainp with a ref, but not locked.
1259 * When no match is found *chainp is set to NULL and EIO is returned.
1260 * (*ochainp) will still be set to the original chain with a ref but not
1264 hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp,
1265 hammer2_chain_t **ochainp)
1267 hammer2_chain_t *chain = *chainp;
1268 hammer2_chain_t *parent;
1269 hammer2_inode_t *ip;
1270 hammer2_inode_t *pip;
1274 hammer2_inode_ref(pip); /* for loop */
1275 hammer2_chain_ref(chain); /* for (*ochainp) */
1280 * Locate the hardlink. pip is referenced and not locked,
1285 lhc = chain->data->ipdata.inum;
1286 hammer2_chain_unlock(chain);
1289 while ((ip = pip) != NULL) {
1290 hammer2_inode_lock_ex(ip);
1291 parent = hammer2_chain_lookup_init(ip->chain, 0);
1292 hammer2_inode_drop(ip); /* loop */
1293 KKASSERT(parent->bref.type == HAMMER2_BREF_TYPE_INODE);
1294 chain = hammer2_chain_lookup(&parent, lhc, lhc, 0);
1295 hammer2_chain_lookup_done(parent);
1298 pip = ip->pip; /* safe, ip held locked */
1300 hammer2_inode_ref(pip); /* loop */
1301 hammer2_inode_unlock_ex(ip);
1305 * chain is locked, ip is locked. Unlock ip, return the locked
1306 * chain. *ipp is already set w/a ref count and not locked.
1308 * (parent is already unlocked).
1311 hammer2_inode_unlock_ex(ip);
1314 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
1315 /* already locked */
1323 * Find the directory common to both fdip and tdip, hold and return
1327 hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip)
1329 hammer2_inode_t *scan1;
1330 hammer2_inode_t *scan2;
1333 * We used to have a depth field but it complicated matters too
1334 * much for directory renames. So now its ugly. Check for
1335 * simple cases before giving up and doing it the expensive way.
1337 * XXX need a bottom-up topology stability lock
1339 if (fdip == tdip || fdip == tdip->pip) {
1340 hammer2_inode_ref(fdip);
1343 if (fdip->pip == tdip) {
1344 hammer2_inode_ref(tdip);
1351 for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) {
1353 while (scan2->pmp == tdip->pmp) {
1354 if (scan1 == scan2) {
1355 hammer2_inode_ref(scan1);
1361 panic("hammer2_inode_common_parent: no common parent %p %p\n",