hammer2 - Refactor file unlink w/open descriptor
[dragonfly.git] / sys / vfs / hammer2 / hammer2_inode.c
1 /*
2  * Copyright (c) 2011-2013 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 #include <sys/lock.h>
40 #include <sys/uuid.h>
41
42 #include "hammer2.h"
43
44 static void hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
45                                          hammer2_chain_t **chainp,
46                                          hammer2_tid_t inum);
47
48 RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
49              hammer2_tid_t, inum);
50
51 int
52 hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2)
53 {
54         if (ip1->inum < ip2->inum)
55                 return(-1);
56         if (ip1->inum > ip2->inum)
57                 return(1);
58         return(0);
59 }
60
61 /*
62  * HAMMER2 inode locks
63  *
64  * HAMMER2 offers shared locks and exclusive locks on inodes.
65  *
66  * An inode's ip->chain pointer is resolved and stable while an inode is
67  * locked, and can be cleaned out at any time (become NULL) when an inode
68  * is not locked.
69  *
70  * This function handles duplication races and hardlink replacement races
71  * which can cause ip's cached chain to become stale.
72  *
73  * The underlying chain is also locked and returned.
74  *
75  * NOTE: We don't combine the inode/chain lock because putting away an
76  *       inode would otherwise confuse multiple lock holders of the inode.
77  */
78 hammer2_chain_t *
79 hammer2_inode_lock_ex(hammer2_inode_t *ip)
80 {
81         hammer2_chain_t *chain;
82         hammer2_chain_t *ochain;
83         hammer2_chain_core_t *core;
84         int error;
85
86         hammer2_inode_ref(ip);
87         ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE);
88
89         chain = ip->chain;
90         core = chain->core;
91         for (;;) {
92                 if (chain->flags & HAMMER2_CHAIN_DUPLICATED) {
93                         spin_lock(&core->cst.spin);
94                         while (chain->flags & HAMMER2_CHAIN_DUPLICATED)
95                                 chain = TAILQ_NEXT(chain, core_entry);
96                         hammer2_chain_ref(chain);
97                         spin_unlock(&core->cst.spin);
98                         hammer2_inode_repoint(ip, NULL, chain);
99                         hammer2_chain_drop(chain);
100                 }
101                 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
102                 if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0)
103                         break;
104                 hammer2_chain_unlock(chain);
105         }
106         if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) {
107                 error = hammer2_hardlink_find(ip->pip, &chain, &ochain);
108                 hammer2_chain_drop(ochain);
109                 KKASSERT(error == 0);
110                 /* XXX error handling */
111         }
112         return (chain);
113 }
114
115 void
116 hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_chain_t *chain)
117 {
118         /*
119          * XXX this will catch parent directories too which we don't
120          *     really want.
121          */
122         if (chain)
123                 hammer2_chain_unlock(chain);
124         ccms_thread_unlock(&ip->topo_cst);
125         hammer2_inode_drop(ip);
126 }
127
128 /*
129  * NOTE: We don't combine the inode/chain lock because putting away an
130  *       inode would otherwise confuse multiple lock holders of the inode.
131  *
132  *       Shared locks are especially sensitive to having too many shared
133  *       lock counts (from the same thread) on certain paths which might
134  *       need to upgrade them.  Only one count of a shared lock can be
135  *       upgraded.
136  */
137 hammer2_chain_t *
138 hammer2_inode_lock_sh(hammer2_inode_t *ip)
139 {
140         hammer2_chain_t *chain;
141
142         hammer2_inode_ref(ip);
143         for (;;) {
144                 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED);
145
146                 chain = ip->chain;
147                 KKASSERT(chain != NULL);        /* for now */
148                 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS |
149                                           HAMMER2_RESOLVE_SHARED);
150
151                 /*
152                  * Resolve duplication races, resolve hardlinks by giving
153                  * up and cycling an exclusive lock.
154                  */
155                 if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0 &&
156                     chain->data->ipdata.type != HAMMER2_OBJTYPE_HARDLINK) {
157                         break;
158                 }
159                 hammer2_chain_unlock(chain);
160                 ccms_thread_unlock(&ip->topo_cst);
161                 chain = hammer2_inode_lock_ex(ip);
162                 hammer2_inode_unlock_ex(ip, chain);
163         }
164         return (chain);
165 }
166
167 void
168 hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_chain_t *chain)
169 {
170         if (chain)
171                 hammer2_chain_unlock(chain);
172         ccms_thread_unlock(&ip->topo_cst);
173         hammer2_inode_drop(ip);
174 }
175
176 ccms_state_t
177 hammer2_inode_lock_temp_release(hammer2_inode_t *ip)
178 {
179         return(ccms_thread_lock_temp_release(&ip->topo_cst));
180 }
181
182 void
183 hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, ccms_state_t ostate)
184 {
185         ccms_thread_lock_temp_restore(&ip->topo_cst, ostate);
186 }
187
188 ccms_state_t
189 hammer2_inode_lock_upgrade(hammer2_inode_t *ip)
190 {
191         return(ccms_thread_lock_upgrade(&ip->topo_cst));
192 }
193
194 void
195 hammer2_inode_lock_downgrade(hammer2_inode_t *ip, ccms_state_t ostate)
196 {
197         ccms_thread_lock_downgrade(&ip->topo_cst, ostate);
198 }
199
200 /*
201  * Lookup an inode by inode number
202  */
203 hammer2_inode_t *
204 hammer2_inode_lookup(hammer2_pfsmount_t *pmp, hammer2_tid_t inum)
205 {
206         hammer2_inode_t *ip;
207
208         if (pmp) {
209                 spin_lock(&pmp->inum_spin);
210                 ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum);
211                 if (ip)
212                         hammer2_inode_ref(ip);
213                 spin_unlock(&pmp->inum_spin);
214         } else {
215                 ip = NULL;
216         }
217         return(ip);
218 }
219
220 /*
221  * Adding a ref to an inode is only legal if the inode already has at least
222  * one ref.
223  */
224 void
225 hammer2_inode_ref(hammer2_inode_t *ip)
226 {
227         atomic_add_int(&ip->refs, 1);
228 }
229
230 /*
231  * Drop an inode reference, freeing the inode when the last reference goes
232  * away.
233  */
234 void
235 hammer2_inode_drop(hammer2_inode_t *ip)
236 {
237         hammer2_pfsmount_t *pmp;
238         hammer2_inode_t *pip;
239         u_int refs;
240
241         while (ip) {
242                 refs = ip->refs;
243                 cpu_ccfence();
244                 if (refs == 1) {
245                         /*
246                          * Transition to zero, must interlock with
247                          * the inode inumber lookup tree (if applicable).
248                          *
249                          * NOTE: The super-root inode has no pmp.
250                          */
251                         pmp = ip->pmp;
252                         if (pmp)
253                                 spin_lock(&pmp->inum_spin);
254
255                         if (atomic_cmpset_int(&ip->refs, 1, 0)) {
256                                 KKASSERT(ip->topo_cst.count == 0);
257                                 if (ip->flags & HAMMER2_INODE_ONRBTREE) {
258                                         atomic_clear_int(&ip->flags,
259                                                      HAMMER2_INODE_ONRBTREE);
260                                         RB_REMOVE(hammer2_inode_tree,
261                                                   &pmp->inum_tree, ip);
262                                 }
263                                 if (pmp)
264                                         spin_unlock(&pmp->inum_spin);
265
266                                 pip = ip->pip;
267                                 ip->pip = NULL;
268                                 ip->pmp = NULL;
269
270                                 /*
271                                  * Cleaning out ip->chain isn't entirely
272                                  * trivial.
273                                  */
274                                 hammer2_inode_repoint(ip, NULL, NULL);
275
276                                 /*
277                                  * We have to drop pip (if non-NULL) to
278                                  * dispose of our implied reference from
279                                  * ip->pip.  We can simply loop on it.
280                                  */
281                                 if (pmp) {
282                                         KKASSERT((ip->flags &
283                                                   HAMMER2_INODE_SROOT) == 0);
284                                         kfree(ip, pmp->minode);
285                                         atomic_add_long(&pmp->inmem_inodes, -1);
286                                 } else {
287                                         KKASSERT(ip->flags &
288                                                  HAMMER2_INODE_SROOT);
289                                         kfree(ip, M_HAMMER2);
290                                 }
291                                 ip = pip;
292                                 /* continue with pip (can be NULL) */
293                         } else {
294                                 if (pmp)
295                                         spin_unlock(&ip->pmp->inum_spin);
296                         }
297                 } else {
298                         /*
299                          * Non zero transition
300                          */
301                         if (atomic_cmpset_int(&ip->refs, refs, refs - 1))
302                                 break;
303                 }
304         }
305 }
306
307 /*
308  * Get the vnode associated with the given inode, allocating the vnode if
309  * necessary.  The vnode will be returned exclusively locked.
310  *
311  * The caller must lock the inode (shared or exclusive).
312  *
313  * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
314  * races.
315  */
316 struct vnode *
317 hammer2_igetv(hammer2_inode_t *ip, int *errorp)
318 {
319         hammer2_inode_data_t *ipdata;
320         hammer2_pfsmount_t *pmp;
321         struct vnode *vp;
322         ccms_state_t ostate;
323
324         pmp = ip->pmp;
325         KKASSERT(pmp != NULL);
326         *errorp = 0;
327         ipdata = &ip->chain->data->ipdata;
328
329         for (;;) {
330                 /*
331                  * Attempt to reuse an existing vnode assignment.  It is
332                  * possible to race a reclaim so the vget() may fail.  The
333                  * inode must be unlocked during the vget() to avoid a
334                  * deadlock against a reclaim.
335                  */
336                 vp = ip->vp;
337                 if (vp) {
338                         /*
339                          * Inode must be unlocked during the vget() to avoid
340                          * possible deadlocks, but leave the ip ref intact.
341                          *
342                          * vnode is held to prevent destruction during the
343                          * vget().  The vget() can still fail if we lost
344                          * a reclaim race on the vnode.
345                          */
346                         vhold(vp);
347                         ostate = hammer2_inode_lock_temp_release(ip);
348                         if (vget(vp, LK_EXCLUSIVE)) {
349                                 vdrop(vp);
350                                 hammer2_inode_lock_temp_restore(ip, ostate);
351                                 continue;
352                         }
353                         hammer2_inode_lock_temp_restore(ip, ostate);
354                         vdrop(vp);
355                         /* vp still locked and ref from vget */
356                         if (ip->vp != vp) {
357                                 kprintf("hammer2: igetv race %p/%p\n",
358                                         ip->vp, vp);
359                                 vput(vp);
360                                 continue;
361                         }
362                         *errorp = 0;
363                         break;
364                 }
365
366                 /*
367                  * No vnode exists, allocate a new vnode.  Beware of
368                  * allocation races.  This function will return an
369                  * exclusively locked and referenced vnode.
370                  */
371                 *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
372                 if (*errorp) {
373                         kprintf("hammer2: igetv getnewvnode failed %d\n",
374                                 *errorp);
375                         vp = NULL;
376                         break;
377                 }
378
379                 /*
380                  * Lock the inode and check for an allocation race.
381                  */
382                 ostate = hammer2_inode_lock_upgrade(ip);
383                 if (ip->vp != NULL) {
384                         vp->v_type = VBAD;
385                         vx_put(vp);
386                         hammer2_inode_lock_downgrade(ip, ostate);
387                         continue;
388                 }
389
390                 switch (ipdata->type) {
391                 case HAMMER2_OBJTYPE_DIRECTORY:
392                         vp->v_type = VDIR;
393                         break;
394                 case HAMMER2_OBJTYPE_REGFILE:
395                         vp->v_type = VREG;
396                         vinitvmio(vp, ipdata->size,
397                                   HAMMER2_LBUFSIZE,
398                                   (int)ipdata->size & HAMMER2_LBUFMASK);
399                         break;
400                 case HAMMER2_OBJTYPE_SOFTLINK:
401                         /*
402                          * XXX for now we are using the generic file_read
403                          * and file_write code so we need a buffer cache
404                          * association.
405                          */
406                         vp->v_type = VLNK;
407                         vinitvmio(vp, ipdata->size,
408                                   HAMMER2_LBUFSIZE,
409                                   (int)ipdata->size & HAMMER2_LBUFMASK);
410                         break;
411                 case HAMMER2_OBJTYPE_CDEV:
412                         vp->v_type = VCHR;
413                         /* fall through */
414                 case HAMMER2_OBJTYPE_BDEV:
415                         vp->v_ops = &pmp->mp->mnt_vn_spec_ops;
416                         if (ipdata->type != HAMMER2_OBJTYPE_CDEV)
417                                 vp->v_type = VBLK;
418                         addaliasu(vp, ipdata->rmajor, ipdata->rminor);
419                         break;
420                 case HAMMER2_OBJTYPE_FIFO:
421                         vp->v_type = VFIFO;
422                         vp->v_ops = &pmp->mp->mnt_vn_fifo_ops;
423                         break;
424                 default:
425                         panic("hammer2: unhandled objtype %d", ipdata->type);
426                         break;
427                 }
428
429                 if (ip == pmp->iroot)
430                         vsetflags(vp, VROOT);
431
432                 vp->v_data = ip;
433                 ip->vp = vp;
434                 hammer2_inode_ref(ip);          /* vp association */
435                 hammer2_inode_lock_downgrade(ip, ostate);
436                 break;
437         }
438
439         /*
440          * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
441          */
442         if (hammer2_debug & 0x0002) {
443                 kprintf("igetv vp %p refs 0x%08x aux 0x%08x\n",
444                         vp, vp->v_refcnt, vp->v_auxrefs);
445         }
446         return (vp);
447 }
448
449 /*
450  * The passed-in chain must be locked and the returned inode will also be
451  * locked.  This routine typically locates or allocates the inode, assigns
452  * ip->chain (adding a ref to chain if necessary), and returns the inode.
453  *
454  * The hammer2_inode structure regulates the interface between the high level
455  * kernel VNOPS API and the filesystem backend (the chains).
456  *
457  * WARNING!  This routine sucks up the chain's lock (makes it part of the
458  *           inode lock from the point of view of the inode lock API),
459  *           so callers need to be careful.
460  *
461  * WARNING!  The mount code is allowed to pass dip == NULL for iroot and
462  *           is allowed to pass pmp == NULL and dip == NULL for sroot.
463  */
464 hammer2_inode_t *
465 hammer2_inode_get(hammer2_pfsmount_t *pmp, hammer2_inode_t *dip,
466                   hammer2_chain_t *chain)
467 {
468         hammer2_inode_t *nip;
469
470         KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
471
472         /*
473          * Interlocked lookup/ref of the inode.  This code is only needed
474          * when looking up inodes with nlinks != 0 (TODO: optimize out
475          * otherwise and test for duplicates).
476          */
477 again:
478         for (;;) {
479                 nip = hammer2_inode_lookup(pmp, chain->data->ipdata.inum);
480                 if (nip == NULL)
481                         break;
482                 ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE);
483                 if ((nip->flags & HAMMER2_INODE_ONRBTREE) == 0) { /* race */
484                         ccms_thread_unlock(&nip->topo_cst);
485                         hammer2_inode_drop(nip);
486                         continue;
487                 }
488                 if (nip->chain != chain)
489                         hammer2_inode_repoint(nip, NULL, chain);
490
491                 /*
492                  * Consolidated nip/nip->chain is locked (chain locked
493                  * by caller).
494                  */
495                 return nip;
496         }
497
498         /*
499          * We couldn't find the inode number, create a new inode.
500          */
501         if (pmp) {
502                 nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO);
503                 atomic_add_long(&pmp->inmem_inodes, 1);
504                 hammer2_chain_memory_inc(pmp);
505                 hammer2_chain_memory_wakeup(pmp);
506         } else {
507                 nip = kmalloc(sizeof(*nip), M_HAMMER2, M_WAITOK | M_ZERO);
508                 nip->flags = HAMMER2_INODE_SROOT;
509         }
510         nip->inum = chain->data->ipdata.inum;
511         nip->size = chain->data->ipdata.size;
512         nip->mtime = chain->data->ipdata.mtime;
513         hammer2_inode_repoint(nip, NULL, chain);
514         nip->pip = dip;                         /* can be NULL */
515         if (dip)
516                 hammer2_inode_ref(dip); /* ref dip for nip->pip */
517
518         nip->pmp = pmp;
519
520         /*
521          * ref and lock on nip gives it state compatible to after a
522          * hammer2_inode_lock_ex() call.
523          */
524         nip->refs = 1;
525         ccms_cst_init(&nip->topo_cst, &nip->chain);
526         ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE);
527         /* combination of thread lock and chain lock == inode lock */
528
529         /*
530          * Attempt to add the inode.  If it fails we raced another inode
531          * get.  Undo all the work and try again.
532          */
533         if (pmp) {
534                 spin_lock(&pmp->inum_spin);
535                 if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) {
536                         spin_unlock(&pmp->inum_spin);
537                         ccms_thread_unlock(&nip->topo_cst);
538                         hammer2_inode_drop(nip);
539                         goto again;
540                 }
541                 atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE);
542                 spin_unlock(&pmp->inum_spin);
543         }
544
545         return (nip);
546 }
547
548 /*
549  * Create a new inode in the specified directory using the vattr to
550  * figure out the type of inode.
551  *
552  * If no error occurs the new inode with its chain locked is returned in
553  * *nipp, otherwise an error is returned and *nipp is set to NULL.
554  *
555  * If vap and/or cred are NULL the related fields are not set and the
556  * inode type defaults to a directory.  This is used when creating PFSs
557  * under the super-root, so the inode number is set to 1 in this case.
558  *
559  * dip is not locked on entry.
560  */
561 hammer2_inode_t *
562 hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip,
563                      struct vattr *vap, struct ucred *cred,
564                      const uint8_t *name, size_t name_len,
565                      hammer2_chain_t **chainp, int *errorp)
566 {
567         hammer2_inode_data_t *dipdata;
568         hammer2_inode_data_t *nipdata;
569         hammer2_chain_t *chain;
570         hammer2_chain_t *parent;
571         hammer2_inode_t *nip;
572         hammer2_key_t key_dummy;
573         hammer2_key_t lhc;
574         int error;
575         uid_t xuid;
576         uuid_t dip_uid;
577         uuid_t dip_gid;
578         uint32_t dip_mode;
579         uint8_t dip_algo;
580         int cache_index = -1;
581
582         lhc = hammer2_dirhash(name, name_len);
583         *errorp = 0;
584
585         /*
586          * Locate the inode or indirect block to create the new
587          * entry in.  At the same time check for key collisions
588          * and iterate until we don't get one.
589          *
590          * NOTE: hidden inodes do not have iterators.
591          */
592 retry:
593         parent = hammer2_inode_lock_ex(dip);
594         dipdata = &dip->chain->data->ipdata;
595         dip_uid = dipdata->uid;
596         dip_gid = dipdata->gid;
597         dip_mode = dipdata->mode;
598         dip_algo = dipdata->comp_algo;
599
600         error = 0;
601         while (error == 0) {
602                 chain = hammer2_chain_lookup(&parent, &key_dummy,
603                                              lhc, lhc, &cache_index, 0);
604                 if (chain == NULL)
605                         break;
606                 if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
607                         error = ENOSPC;
608                 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
609                         error = ENOSPC;
610                 hammer2_chain_unlock(chain);
611                 chain = NULL;
612                 ++lhc;
613         }
614
615         if (error == 0) {
616                 error = hammer2_chain_create(trans, &parent, &chain,
617                                              lhc, 0,
618                                              HAMMER2_BREF_TYPE_INODE,
619                                              HAMMER2_INODE_BYTES);
620         }
621
622         /*
623          * Cleanup and handle retries.
624          */
625         if (error == EAGAIN) {
626                 hammer2_chain_ref(parent);
627                 hammer2_inode_unlock_ex(dip, parent);
628                 hammer2_chain_wait(parent);
629                 hammer2_chain_drop(parent);
630                 goto retry;
631         }
632         hammer2_inode_unlock_ex(dip, parent);
633
634         if (error) {
635                 KKASSERT(chain == NULL);
636                 *errorp = error;
637                 return (NULL);
638         }
639
640         /*
641          * Set up the new inode.
642          *
643          * NOTE: *_get() integrates chain's lock into the inode lock.
644          *
645          * NOTE: Only one new inode can currently be created per
646          *       transaction.  If the need arises we can adjust
647          *       hammer2_trans_init() to allow more.
648          *
649          * NOTE: nipdata will have chain's blockset data.
650          */
651         chain->data->ipdata.inum = trans->inode_tid;
652         nip = hammer2_inode_get(dip->pmp, dip, chain);
653         nipdata = &chain->data->ipdata;
654
655         if (vap) {
656                 KKASSERT(trans->inodes_created == 0);
657                 nipdata->type = hammer2_get_obj_type(vap->va_type);
658                 nipdata->inum = trans->inode_tid;
659                 ++trans->inodes_created;
660
661                 switch (nipdata->type) {
662                 case HAMMER2_OBJTYPE_CDEV:
663                 case HAMMER2_OBJTYPE_BDEV:
664                         nipdata->rmajor = vap->va_rmajor;
665                         nipdata->rminor = vap->va_rminor;
666                         break;
667                 default:
668                         break;
669                 }
670         } else {
671                 nipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
672                 nipdata->inum = 1;
673         }
674         
675         /* Inherit parent's inode compression mode. */
676         nip->comp_heuristic = 0;
677         nipdata->comp_algo = dip_algo;
678         nipdata->version = HAMMER2_INODE_VERSION_ONE;
679         hammer2_update_time(&nipdata->ctime);
680         nipdata->mtime = nipdata->ctime;
681         if (vap)
682                 nipdata->mode = vap->va_mode;
683         nipdata->nlinks = 1;
684         if (vap) {
685                 if (dip && dip->pmp) {
686                         xuid = hammer2_to_unix_xid(&dip_uid);
687                         xuid = vop_helper_create_uid(dip->pmp->mp,
688                                                      dip_mode,
689                                                      xuid,
690                                                      cred,
691                                                      &vap->va_mode);
692                 } else {
693                         /* super-root has no dip and/or pmp */
694                         xuid = 0;
695                 }
696                 if (vap->va_vaflags & VA_UID_UUID_VALID)
697                         nipdata->uid = vap->va_uid_uuid;
698                 else if (vap->va_uid != (uid_t)VNOVAL)
699                         hammer2_guid_to_uuid(&nipdata->uid, vap->va_uid);
700                 else
701                         hammer2_guid_to_uuid(&nipdata->uid, xuid);
702
703                 if (vap->va_vaflags & VA_GID_UUID_VALID)
704                         nipdata->gid = vap->va_gid_uuid;
705                 else if (vap->va_gid != (gid_t)VNOVAL)
706                         hammer2_guid_to_uuid(&nipdata->gid, vap->va_gid);
707                 else if (dip)
708                         nipdata->gid = dip_gid;
709         }
710
711         /*
712          * Regular files and softlinks allow a small amount of data to be
713          * directly embedded in the inode.  This flag will be cleared if
714          * the size is extended past the embedded limit.
715          */
716         if (nipdata->type == HAMMER2_OBJTYPE_REGFILE ||
717             nipdata->type == HAMMER2_OBJTYPE_SOFTLINK) {
718                 nipdata->op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
719         }
720
721         KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
722         bcopy(name, nipdata->filename, name_len);
723         nipdata->name_key = lhc;
724         nipdata->name_len = name_len;
725         *chainp = chain;
726
727         return (nip);
728 }
729
730 /*
731  * chain may have been moved around by the create.
732  */
733 void
734 hammer2_chain_refactor(hammer2_chain_t **chainp)
735 {
736         hammer2_chain_t *chain = *chainp;
737         hammer2_chain_core_t *core;
738
739         core = chain->core;
740         while (chain->flags & HAMMER2_CHAIN_DUPLICATED) {
741                 spin_lock(&core->cst.spin);
742                 chain = TAILQ_NEXT(chain, core_entry);
743                 while (chain->flags & HAMMER2_CHAIN_DUPLICATED)
744                         chain = TAILQ_NEXT(chain, core_entry);
745                 hammer2_chain_ref(chain);
746                 spin_unlock(&core->cst.spin);
747                 KKASSERT(chain->core == core);
748
749                 hammer2_chain_unlock(*chainp);
750                 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS |
751                                           HAMMER2_RESOLVE_NOREF); /* eat ref */
752                 *chainp = chain;
753         }
754 }
755
756 /*
757  * Shift *chainp up to the specified directory, change the filename
758  * to "0xINODENUMBER", and adjust the key.  The chain becomes the
759  * invisible hardlink target.
760  *
761  * The original *chainp has already been marked deleted.
762  */
763 static
764 void
765 hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_chain_t **chainp,
766                         hammer2_inode_t *dip, int nlinks, int *errorp)
767 {
768         hammer2_inode_data_t *nipdata;
769         hammer2_chain_t *parent;
770         hammer2_chain_t *chain;
771         hammer2_chain_t *xchain;
772         hammer2_key_t key_dummy;
773         hammer2_key_t lhc;
774         hammer2_blockref_t bref;
775         int cache_index = -1;
776
777         chain = *chainp;
778         lhc = chain->data->ipdata.inum;
779         KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
780
781         /*
782          * Locate the inode or indirect block to create the new
783          * entry in.  lhc represents the inode number so there is
784          * no collision iteration.
785          *
786          * There should be no key collisions with invisible inode keys.
787          *
788          * WARNING! Must use inode_lock_ex() on dip to handle a stale
789          *          dip->chain cache.
790          */
791 retry:
792         *errorp = 0;
793         parent = hammer2_inode_lock_ex(dip);
794         /*parent = hammer2_chain_lookup_init(dip->chain, 0);*/
795         xchain = hammer2_chain_lookup(&parent, &key_dummy,
796                                       lhc, lhc, &cache_index, 0);
797         if (xchain) {
798                 kprintf("X3 chain %p parent %p dip %p dip->chain %p\n",
799                         xchain, parent, dip, dip->chain);
800                 hammer2_chain_unlock(xchain);
801                 xchain = NULL;
802                 *errorp = ENOSPC;
803 #if 0
804                 Debugger("X3");
805 #endif
806         }
807
808         /*
809          * Create entry in common parent directory using the seek position
810          * calculated above.
811          *
812          * We must refactor chain because it might have been shifted into
813          * an indirect chain by the create.
814          */
815         if (*errorp == 0) {
816                 KKASSERT(xchain == NULL);
817 #if 0
818                 *errorp = hammer2_chain_create(trans, &parent, &xchain,
819                                                lhc, 0,
820                                                HAMMER2_BREF_TYPE_INODE,/* n/a */
821                                                HAMMER2_INODE_BYTES);   /* n/a */
822 #endif
823                 /*XXX this somehow isn't working on chain XXX*/
824                 /*KKASSERT(xxx)*/
825         }
826
827         /*
828          * Cleanup and handle retries.
829          */
830         if (*errorp == EAGAIN) {
831                 hammer2_chain_ref(parent);
832                 /* hammer2_chain_lookup_done(parent); */
833                 hammer2_inode_unlock_ex(dip, parent);
834                 hammer2_chain_wait(parent);
835                 hammer2_chain_drop(parent);
836                 goto retry;
837         }
838
839         /*
840          * Handle the error case
841          */
842         if (*errorp) {
843                 panic("error2");
844                 KKASSERT(xchain == NULL);
845                 hammer2_inode_unlock_ex(dip, parent);
846                 /*hammer2_chain_lookup_done(parent);*/
847                 return;
848         }
849
850         /*
851          * Use xchain as a placeholder for (lhc).  Duplicate chain to the
852          * same target bref as xchain and then delete xchain.  The duplication
853          * occurs after xchain in flush order even though xchain is deleted
854          * after the duplication. XXX
855          *
856          * WARNING! Duplications (to a different parent) can cause indirect
857          *          blocks to be inserted, refactor xchain.
858          */
859         bref = chain->bref;
860         bref.key = lhc;                 /* invisible dir entry key */
861         bref.keybits = 0;
862 #if 0
863         hammer2_chain_delete(trans, xchain, 0);
864 #endif
865         hammer2_chain_duplicate(trans, &parent, &chain, &bref, 0, 2);
866 #if 0
867         hammer2_chain_refactor(&xchain);
868         /*hammer2_chain_delete(trans, xchain, 0);*/
869 #endif
870
871         hammer2_inode_unlock_ex(dip, parent);
872         /*hammer2_chain_lookup_done(parent);*/
873 #if 0
874         hammer2_chain_unlock(xchain);   /* no longer needed */
875 #endif
876
877         /*
878          * chain is now 'live' again.. adjust the filename.
879          *
880          * Directory entries are inodes but this is a hidden hardlink
881          * target.  The name isn't used but to ease debugging give it
882          * a name after its inode number.
883          */
884         hammer2_chain_modify(trans, &chain, 0);
885         nipdata = &chain->data->ipdata;
886         ksnprintf(nipdata->filename, sizeof(nipdata->filename),
887                   "0x%016jx", (intmax_t)nipdata->inum);
888         nipdata->name_len = strlen(nipdata->filename);
889         nipdata->name_key = lhc;
890         nipdata->nlinks += nlinks;
891
892         *chainp = chain;
893 }
894
895 /*
896  * Connect the target inode represented by (*chainp) to the media topology
897  * at (dip, name, len).
898  *
899  * If hlink is TRUE this function creates an OBJTYPE_HARDLINK directory
900  * entry instead of connecting (*chainp).
901  *
902  * If hlink is FALSE this function uses chain_duplicate() to make a copy
903  * if (*chainp) in the directory entry.  (*chainp) is likely to be deleted
904  * by the caller in this case (e.g. rename).
905  */
906 int
907 hammer2_inode_connect(hammer2_trans_t *trans, int hlink,
908                       hammer2_inode_t *dip, hammer2_chain_t **chainp,
909                       const uint8_t *name, size_t name_len,
910                       hammer2_key_t lhc)
911 {
912         hammer2_inode_data_t *ipdata;
913         hammer2_chain_t *nchain;
914         hammer2_chain_t *parent;
915         hammer2_chain_t *ochain;
916         hammer2_key_t key_dummy;
917         int cache_index = -1;
918         int error;
919
920         /*
921          * Since ochain is either disconnected from the topology or represents
922          * a hardlink terminus which is always a parent of or equal to dip,
923          * we should be able to safely lock dip->chain for our setup.
924          *
925          * WARNING! Must use inode_lock_ex() on dip to handle a stale
926          *          dip->chain cache.
927          */
928         ochain = *chainp;
929         parent = hammer2_inode_lock_ex(dip);
930         /*parent = hammer2_chain_lookup_init(dip->chain, 0);*/
931
932         /*
933          * If name is non-NULL we calculate lhc, else we use the passed-in
934          * lhc.
935          */
936         if (name) {
937                 lhc = hammer2_dirhash(name, name_len);
938
939                 /*
940                  * Locate the inode or indirect block to create the new
941                  * entry in.  At the same time check for key collisions
942                  * and iterate until we don't get one.
943                  */
944                 error = 0;
945                 while (error == 0) {
946                         nchain = hammer2_chain_lookup(&parent, &key_dummy,
947                                                       lhc, lhc,
948                                                       &cache_index, 0);
949                         if (nchain == NULL)
950                                 break;
951                         if ((lhc & HAMMER2_DIRHASH_LOMASK) ==
952                             HAMMER2_DIRHASH_LOMASK) {
953                                 error = ENOSPC;
954                         }
955                         hammer2_chain_unlock(nchain);
956                         nchain = NULL;
957                         ++lhc;
958                 }
959         }
960
961         if (error == 0) {
962                 if (hlink) {
963                         /*
964                          * Hardlink pointer needed, create totally fresh
965                          * directory entry.
966                          *
967                          * We must refactor ochain because it might have
968                          * been shifted into an indirect chain by the
969                          * create.
970                          */
971                         KKASSERT(nchain == NULL);
972                         error = hammer2_chain_create(trans, &parent, &nchain,
973                                                      lhc, 0,
974                                                      HAMMER2_BREF_TYPE_INODE,
975                                                      HAMMER2_INODE_BYTES);
976                         hammer2_chain_refactor(&ochain);
977                 } else {
978                         /*
979                          * Reconnect the original chain and rename.  Use
980                          * chain_duplicate().  The caller will likely delete
981                          * or has already deleted the original chain in
982                          * this case.
983                          *
984                          * NOTE: chain_duplicate() generates a new chain
985                          *       with CHAIN_DELETED cleared (ochain typically
986                          *       has it set from the file unlink).
987                          *
988                          * WARNING! Can cause held-over chains to require a
989                          *          refactor.  Fortunately we have none (our
990                          *          locked chains are passed into and
991                          *          modified by the call).
992                          */
993                         nchain = ochain;
994                         ochain = NULL;
995                         hammer2_chain_duplicate(trans, NULL, &nchain, NULL,
996                                                 0, 3);
997                         error = hammer2_chain_create(trans, &parent, &nchain,
998                                                      lhc, 0,
999                                                      HAMMER2_BREF_TYPE_INODE,
1000                                                      HAMMER2_INODE_BYTES);
1001                 }
1002         }
1003
1004         /*
1005          * Unlock stuff.
1006          */
1007         KKASSERT(error != EAGAIN);
1008         hammer2_inode_unlock_ex(dip, parent);
1009         /*hammer2_chain_lookup_done(parent);*/
1010         parent = NULL;
1011
1012         /*
1013          * nchain should be NULL on error, leave ochain (== *chainp) alone.
1014          */
1015         if (error) {
1016                 KKASSERT(nchain == NULL);
1017                 return (error);
1018         }
1019
1020         /*
1021          * Directory entries are inodes so if the name has changed we have
1022          * to update the inode.
1023          *
1024          * When creating an OBJTYPE_HARDLINK entry remember to unlock the
1025          * chain, the caller will access the hardlink via the actual hardlink
1026          * target file and not the hardlink pointer entry, so we must still
1027          * return ochain.
1028          */
1029         if (hlink && hammer2_hardlink_enable >= 0) {
1030                 /*
1031                  * Create the HARDLINK pointer.  oip represents the hardlink
1032                  * target in this situation.
1033                  *
1034                  * We will return ochain (the hardlink target).
1035                  */
1036                 hammer2_chain_modify(trans, &nchain, 0);
1037                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1038                 ipdata = &nchain->data->ipdata;
1039                 bcopy(name, ipdata->filename, name_len);
1040                 ipdata->name_key = lhc;
1041                 ipdata->name_len = name_len;
1042                 ipdata->target_type = ochain->data->ipdata.type;
1043                 ipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1044                 ipdata->inum = ochain->data->ipdata.inum;
1045                 ipdata->nlinks = 1;
1046                 hammer2_chain_unlock(nchain);
1047                 nchain = ochain;
1048                 ochain = NULL;
1049         } else if (hlink && hammer2_hardlink_enable < 0) {
1050                 /*
1051                  * Create a snapshot (hardlink fake mode for debugging).
1052                  * (ochain already flushed above so we can just copy the
1053                  * bref XXX).
1054                  *
1055                  * Since this is a snapshot we return nchain in the fake
1056                  * hardlink case.
1057                  */
1058                 hammer2_chain_modify(trans, &nchain, 0);
1059                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1060                 ipdata = &nchain->data->ipdata;
1061                 *ipdata = ochain->data->ipdata;
1062                 bcopy(name, ipdata->filename, name_len);
1063                 ipdata->name_key = lhc;
1064                 ipdata->name_len = name_len;
1065                 atomic_clear_int(&nchain->core->flags,
1066                                  HAMMER2_CORE_COUNTEDBREFS);
1067                 kprintf("created fake hardlink %*.*s\n",
1068                         (int)name_len, (int)name_len, name);
1069         } else {
1070                 /*
1071                  * nchain is a duplicate of ochain at the new location.
1072                  * We must fixup the name stored in oip.  The bref key
1073                  * has already been set up.
1074                  */
1075                 hammer2_chain_modify(trans, &nchain, 0);
1076                 ipdata = &nchain->data->ipdata;
1077
1078                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1079                 bcopy(name, ipdata->filename, name_len);
1080                 ipdata->name_key = lhc;
1081                 ipdata->name_len = name_len;
1082                 ipdata->nlinks = 1;
1083         }
1084
1085         /*
1086          * We are replacing ochain with nchain, unlock ochain.  In the
1087          * case where ochain is left unchanged the code above sets
1088          * nchain to ochain and ochain to NULL, resulting in a NOP here.
1089          */
1090         if (ochain)
1091                 hammer2_chain_unlock(ochain);
1092         *chainp = nchain;
1093
1094         return (0);
1095 }
1096
1097 /*
1098  * Repoint ip->chain to nchain.  Caller must hold the inode exclusively
1099  * locked.
1100  *
1101  * ip->chain is set to nchain.  The prior chain in ip->chain is dropped
1102  * and nchain is ref'd.
1103  */
1104 void
1105 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
1106                       hammer2_chain_t *nchain)
1107 {
1108         hammer2_chain_t *ochain;
1109         hammer2_inode_t *opip;
1110
1111         /*
1112          * Repoint ip->chain if requested.
1113          */
1114         ochain = ip->chain;
1115         ip->chain = nchain;
1116         if (nchain)
1117                 hammer2_chain_ref(nchain);
1118         if (ochain)
1119                 hammer2_chain_drop(ochain);
1120
1121         /*
1122          * Repoint ip->pip if requested (non-NULL pip).
1123          */
1124         if (pip && ip->pip != pip) {
1125                 opip = ip->pip;
1126                 hammer2_inode_ref(pip);
1127                 ip->pip = pip;
1128                 if (opip)
1129                         hammer2_inode_drop(opip);
1130         }
1131 }
1132
1133 /*
1134  * Unlink the file from the specified directory inode.  The directory inode
1135  * does not need to be locked.
1136  *
1137  * isdir determines whether a directory/non-directory check should be made.
1138  * No check is made if isdir is set to -1.
1139  *
1140  * isopen specifies whether special unlink-with-open-descriptor handling
1141  * must be performed.  If set to -1 the caller is deleting a PFS and we
1142  * check whether the chain is mounted or not (chain->pmp != NULL).  1 is
1143  * implied if it is mounted.
1144  *
1145  * If isopen is 1 and nlinks drops to 0 this function must move the chain
1146  * to a special hidden directory until last-close occurs on the file.
1147  *
1148  * NOTE!  The underlying file can still be active with open descriptors
1149  *        or if the chain is being manually held (e.g. for rename).
1150  *
1151  *        The caller is responsible for fixing up ip->chain if e.g. a
1152  *        rename occurs (see chain_duplicate()).
1153  */
1154 int
1155 hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
1156                     const uint8_t *name, size_t name_len,
1157                     int isdir, int *hlinkp, struct nchandle *nch)
1158 {
1159         hammer2_inode_data_t *ipdata;
1160         hammer2_chain_t *parent;
1161         hammer2_chain_t *ochain;
1162         hammer2_chain_t *chain;
1163         hammer2_chain_t *dparent;
1164         hammer2_chain_t *dchain;
1165         hammer2_key_t key_dummy;
1166         hammer2_key_t key_next;
1167         hammer2_key_t lhc;
1168         int error;
1169         int cache_index = -1;
1170         uint8_t type;
1171
1172         error = 0;
1173         ochain = NULL;
1174         lhc = hammer2_dirhash(name, name_len);
1175
1176         /*
1177          * Search for the filename in the directory
1178          */
1179         if (hlinkp)
1180                 *hlinkp = 0;
1181         parent = hammer2_inode_lock_ex(dip);
1182         chain = hammer2_chain_lookup(&parent, &key_next,
1183                                      lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1184                                      &cache_index, 0);
1185         while (chain) {
1186                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
1187                     name_len == chain->data->ipdata.name_len &&
1188                     bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
1189                         break;
1190                 }
1191                 chain = hammer2_chain_next(&parent, chain, &key_next,
1192                                            key_next,
1193                                            lhc + HAMMER2_DIRHASH_LOMASK,
1194                                            &cache_index, 0);
1195         }
1196         hammer2_inode_unlock_ex(dip, NULL);     /* retain parent */
1197
1198         /*
1199          * Not found or wrong type (isdir < 0 disables the type check).
1200          * If a hardlink pointer, type checks use the hardlink target.
1201          */
1202         if (chain == NULL) {
1203                 error = ENOENT;
1204                 goto done;
1205         }
1206         if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK) {
1207                 if (hlinkp)
1208                         *hlinkp = 1;
1209                 type = chain->data->ipdata.target_type;
1210         }
1211
1212         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
1213                 error = ENOTDIR;
1214                 goto done;
1215         }
1216         if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir >= 1) {
1217                 error = EISDIR;
1218                 goto done;
1219         }
1220
1221         /*
1222          * Hardlink must be resolved.  We can't hold parent locked while we
1223          * do this or we could deadlock.
1224          *
1225          * On success chain will be adjusted to point at the hardlink target
1226          * and ochain will point to the hardlink pointer in the original
1227          * directory.  Otherwise chain remains pointing to the original.
1228          */
1229         if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) {
1230                 hammer2_chain_unlock(parent);
1231                 parent = NULL;
1232                 error = hammer2_hardlink_find(dip, &chain, &ochain);
1233         }
1234
1235         /*
1236          * If this is a directory the directory must be empty.  However, if
1237          * isdir < 0 we are doing a rename and the directory does not have
1238          * to be empty, and if isdir > 1 we are deleting a PFS/snapshot
1239          * and the directory does not have to be empty.
1240          *
1241          * NOTE: We check the full key range here which covers both visible
1242          *       and invisible entries.  Theoretically there should be no
1243          *       invisible (hardlink target) entries if there are no visible
1244          *       entries.
1245          */
1246         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
1247                 dparent = hammer2_chain_lookup_init(chain, 0);
1248                 dchain = hammer2_chain_lookup(&dparent, &key_dummy,
1249                                               0, (hammer2_key_t)-1,
1250                                               &cache_index,
1251                                               HAMMER2_LOOKUP_NODATA);
1252                 if (dchain) {
1253                         hammer2_chain_unlock(dchain);
1254                         hammer2_chain_lookup_done(dparent);
1255                         error = ENOTEMPTY;
1256                         goto done;
1257                 }
1258                 hammer2_chain_lookup_done(dparent);
1259                 dparent = NULL;
1260                 /* dchain NULL */
1261         }
1262
1263         /*
1264          * Ok, we can now unlink the chain.  We always decrement nlinks even
1265          * if the entry can be deleted in case someone has the file open and
1266          * does an fstat().
1267          *
1268          * The chain itself will no longer be in the on-media topology but
1269          * can still be flushed to the media (e.g. if an open descriptor
1270          * remains).  When the last vnode/ip ref goes away the chain will
1271          * be marked unmodified, avoiding any further (now unnecesary) I/O.
1272          *
1273          * A non-NULL ochain indicates a hardlink.
1274          */
1275         if (ochain) {
1276                 /*
1277                  * Delete the original hardlink pointer unconditionally.
1278                  * (any open descriptors will migrate to the hardlink
1279                  * target and have no affect on this operation).
1280                  *
1281                  * NOTE: parent from above is NULL when ochain != NULL
1282                  *       so we can reuse it.
1283                  */
1284                 hammer2_chain_lock(ochain, HAMMER2_RESOLVE_ALWAYS);
1285                 hammer2_chain_delete(trans, ochain, 0);
1286                 hammer2_chain_unlock(ochain);
1287         }
1288
1289         /*
1290          * Decrement nlinks on the hardlink target (or original file if
1291          * there it was not hardlinked).  Delete the target when nlinks
1292          * reaches 0 with special handling if (isopen) is set.
1293          *
1294          * NOTE! In DragonFly the vnops function calls cache_unlink() after
1295          *       calling us here to clean out the namecache association,
1296          *       (which does not represent a ref for the open-test), and to
1297          *       force finalization of the vnode if/when the last ref gets
1298          *       dropped.
1299          */
1300         hammer2_chain_modify(trans, &chain, 0);
1301         ipdata = &chain->data->ipdata;
1302         --ipdata->nlinks;
1303         kprintf("file %s nlinks %ld\n", ipdata->filename, ipdata->nlinks);
1304         if ((int64_t)ipdata->nlinks < 0)        /* XXX debugging */
1305                 ipdata->nlinks = 0;
1306         if (ipdata->nlinks == 0) {
1307                 if ((chain->flags & HAMMER2_CHAIN_PFSROOT) && chain->pmp) {
1308                         error = EINVAL;
1309                         kprintf("hammer2: PFS \"%s\" cannot be deleted "
1310                                 "while still mounted\n",
1311                                 ipdata->filename);
1312                         goto done;
1313                 }
1314                 if (nch && cache_isopen(nch)) {
1315                         kprintf("WARNING: unlinking open file\n");
1316                         atomic_set_int(&chain->flags, HAMMER2_CHAIN_UNLINKED);
1317                         hammer2_inode_move_to_hidden(trans, &chain,
1318                                                      ipdata->inum);
1319                 } else {
1320                         hammer2_chain_delete(trans, chain, 0);
1321                 }
1322         }
1323         error = 0;
1324 done:
1325         if (chain)
1326                 hammer2_chain_unlock(chain);
1327         if (parent)
1328                 hammer2_chain_lookup_done(parent);
1329         if (ochain)
1330                 hammer2_chain_drop(ochain);
1331
1332         return error;
1333 }
1334
1335 /*
1336  * This is called from the mount code to initialize pmp->ihidden
1337  */
1338 void
1339 hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp)
1340 {
1341         hammer2_trans_t trans;
1342         hammer2_chain_t *parent;
1343         hammer2_chain_t *chain;
1344         hammer2_chain_t *scan;
1345         hammer2_inode_data_t *ipdata;
1346         hammer2_key_t key_dummy;
1347         hammer2_key_t key_next;
1348         int cache_index;
1349         int error;
1350         int count;
1351
1352         if (pmp->ihidden)
1353                 return;
1354
1355         /*
1356          * Find the hidden directory
1357          */
1358         bzero(&key_dummy, sizeof(key_dummy));
1359         hammer2_trans_init(&trans, pmp, NULL, 0);
1360
1361         parent = hammer2_inode_lock_ex(pmp->iroot);
1362         chain = hammer2_chain_lookup(&parent, &key_dummy,
1363                                      HAMMER2_INODE_HIDDENDIR,
1364                                      HAMMER2_INODE_HIDDENDIR,
1365                                      &cache_index, 0);
1366         if (chain) {
1367                 pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, chain);
1368                 hammer2_inode_ref(pmp->ihidden);
1369
1370                 /*
1371                  * Remove any unlinked files which were left open as-of
1372                  * any system crash.
1373                  */
1374                 count = 0;
1375                 scan = hammer2_chain_lookup(&chain, &key_next,
1376                                             0, HAMMER2_MAX_TID,
1377                                             &cache_index,
1378                                             HAMMER2_LOOKUP_NODATA);
1379                 while (scan) {
1380                         if (scan->bref.type == HAMMER2_BREF_TYPE_INODE) {
1381                                 hammer2_chain_delete(&trans, scan, 0);
1382                                 ++count;
1383                         }
1384                         scan = hammer2_chain_next(&chain, scan, &key_next,
1385                                                    0, HAMMER2_MAX_TID,
1386                                                    &cache_index,
1387                                                    HAMMER2_LOOKUP_NODATA);
1388                 }
1389
1390                 hammer2_inode_unlock_ex(pmp->ihidden, chain);
1391                 hammer2_inode_unlock_ex(pmp->iroot, parent);
1392                 hammer2_trans_done(&trans);
1393                 kprintf("hammer2: PFS loaded hidden dir, "
1394                         "removed %d dead entries\n", count);
1395                 return;
1396         }
1397
1398         /*
1399          * Create the hidden directory
1400          */
1401         error = hammer2_chain_create(&trans, &parent, &chain,
1402                                      HAMMER2_INODE_HIDDENDIR, 0,
1403                                      HAMMER2_BREF_TYPE_INODE,
1404                                      HAMMER2_INODE_BYTES);
1405         hammer2_inode_unlock_ex(pmp->iroot, parent);
1406         hammer2_chain_modify(&trans, &chain, 0);
1407         ipdata = &chain->data->ipdata;
1408         ipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
1409         ipdata->inum = HAMMER2_INODE_HIDDENDIR;
1410         ipdata->nlinks = 1;
1411         kprintf("hammer2: PFS root missing hidden directory, creating\n");
1412
1413         pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, chain);
1414         hammer2_inode_ref(pmp->ihidden);
1415         hammer2_inode_unlock_ex(pmp->ihidden, chain);
1416         hammer2_trans_done(&trans);
1417 }
1418
1419 /*
1420  * If an open file is unlinked H2 needs to retain the file in the topology
1421  * to ensure that its backing store is not recovered by the bulk free scan.
1422  * This also allows us to avoid having to special-case the CHAIN_DELETED flag.
1423  *
1424  * To do this the file is moved to a hidden directory in the PFS root and
1425  * renamed.  The hidden directory must be created if it does not exist.
1426  */
1427 static
1428 void
1429 hammer2_inode_move_to_hidden(hammer2_trans_t *trans, hammer2_chain_t **chainp,
1430                              hammer2_tid_t inum)
1431 {
1432         hammer2_chain_t *chain;
1433         hammer2_pfsmount_t *pmp;
1434         int error;
1435
1436         chain = *chainp;
1437         pmp = chain->pmp;
1438         KKASSERT(pmp != NULL);
1439         KKASSERT(pmp->ihidden != NULL);
1440
1441         hammer2_chain_delete(trans, chain, 0);
1442         error = hammer2_inode_connect(trans, 0,
1443                                       pmp->ihidden, chainp,
1444                                       NULL, 0, inum);
1445         KKASSERT(error == 0);
1446 }
1447
1448 /*
1449  * Given an exclusively locked inode and chain we consolidate its chain
1450  * for hardlink creation, adding (nlinks) to the file's link count and
1451  * potentially relocating the inode to a directory common to ip->pip and tdip.
1452  *
1453  * Replaces (*chainp) if consolidation occurred, unlocking the old chain
1454  * and returning a new locked chain.
1455  *
1456  * NOTE!  This function will also replace ip->chain.
1457  */
1458 int
1459 hammer2_hardlink_consolidate(hammer2_trans_t *trans, hammer2_inode_t *ip,
1460                              hammer2_chain_t **chainp,
1461                              hammer2_inode_t *tdip, int nlinks)
1462 {
1463         hammer2_inode_data_t *ipdata;
1464         hammer2_inode_t *fdip;
1465         hammer2_inode_t *cdip;
1466         hammer2_chain_t *chain;
1467         hammer2_chain_t *nchain;
1468         int error;
1469
1470         chain = *chainp;
1471         if (nlinks == 0 &&                      /* no hardlink needed */
1472             (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE)) {
1473                 return (0);
1474         }
1475         if (hammer2_hardlink_enable < 0) {      /* fake hardlinks */
1476                 return (0);
1477         }
1478
1479         if (hammer2_hardlink_enable == 0) {     /* disallow hardlinks */
1480                 hammer2_chain_unlock(chain);
1481                 *chainp = NULL;
1482                 return (ENOTSUP);
1483         }
1484
1485         /*
1486          * cdip will be returned with a ref, but not locked.
1487          */
1488         fdip = ip->pip;
1489         cdip = hammer2_inode_common_parent(fdip, tdip);
1490
1491         /*
1492          * If no change in the hardlink's target directory is required and
1493          * this is already a hardlink target, all we need to do is adjust
1494          * the link count.
1495          *
1496          * XXX The common parent is a big wiggly due to duplication from
1497          *     renames.  Compare the core (RBTREE) pointer instead of the
1498          *     ip's.
1499          */
1500         if (cdip == fdip &&
1501             (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1502                 if (nlinks) {
1503                         hammer2_chain_modify(trans, &chain, 0);
1504                         chain->data->ipdata.nlinks += nlinks;
1505                 }
1506                 error = 0;
1507                 goto done;
1508         }
1509
1510
1511         /*
1512          * chain is the real inode.  If it's visible we have to convert it
1513          * to a hardlink pointer.  If it is not visible then it is already
1514          * a hardlink target and only needs to be deleted.
1515          */
1516         KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0);
1517         KKASSERT(chain->data->ipdata.type != HAMMER2_OBJTYPE_HARDLINK);
1518         if (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE) {
1519                 /*
1520                  * We are going to duplicate chain later, causing its
1521                  * media block to be shifted to the duplicate.  Even though
1522                  * we are delete-duplicating nchain here it might decide not
1523                  * to reallocate the block.  Set FORCECOW to force it to.
1524                  */
1525                 nchain = chain;
1526                 hammer2_chain_lock(nchain, HAMMER2_RESOLVE_ALWAYS);
1527                 atomic_set_int(&nchain->flags, HAMMER2_CHAIN_FORCECOW);
1528                 hammer2_chain_delete_duplicate(trans, &nchain,
1529                                                HAMMER2_DELDUP_RECORE);
1530                 KKASSERT((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0);
1531
1532                 ipdata = &nchain->data->ipdata;
1533                 ipdata->target_type = ipdata->type;
1534                 ipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1535                 ipdata->uflags = 0;
1536                 ipdata->rmajor = 0;
1537                 ipdata->rminor = 0;
1538                 ipdata->ctime = 0;
1539                 ipdata->mtime = 0;
1540                 ipdata->atime = 0;
1541                 ipdata->btime = 0;
1542                 bzero(&ipdata->uid, sizeof(ipdata->uid));
1543                 bzero(&ipdata->gid, sizeof(ipdata->gid));
1544                 ipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1545                 ipdata->cap_flags = 0;
1546                 ipdata->mode = 0;
1547                 ipdata->size = 0;
1548                 ipdata->nlinks = 1;
1549                 ipdata->iparent = 0;    /* XXX */
1550                 ipdata->pfs_type = 0;
1551                 ipdata->pfs_inum = 0;
1552                 bzero(&ipdata->pfs_clid, sizeof(ipdata->pfs_clid));
1553                 bzero(&ipdata->pfs_fsid, sizeof(ipdata->pfs_fsid));
1554                 ipdata->data_quota = 0;
1555                 ipdata->data_count = 0;
1556                 ipdata->inode_quota = 0;
1557                 ipdata->inode_count = 0;
1558                 ipdata->attr_tid = 0;
1559                 ipdata->dirent_tid = 0;
1560                 bzero(&ipdata->u, sizeof(ipdata->u));
1561                 /* XXX transaction ids */
1562         } else {
1563                 hammer2_chain_delete(trans, chain, 0);
1564                 nchain = NULL;
1565         }
1566
1567         /*
1568          * chain represents the hardlink target and is now flagged deleted.
1569          * duplicate it to the parent directory and adjust nlinks.
1570          *
1571          * WARNING! The shiftup() call can cause nchain to be moved into
1572          *          an indirect block, and our nchain will wind up pointing
1573          *          to the older/original version.
1574          */
1575         KKASSERT(chain->flags & HAMMER2_CHAIN_DELETED);
1576         hammer2_hardlink_shiftup(trans, &chain, cdip, nlinks, &error);
1577
1578         if (error == 0)
1579                 hammer2_inode_repoint(ip, cdip, chain);
1580
1581         /*
1582          * Unlock the original chain last as the lock blocked races against
1583          * the creation of the new hardlink target.
1584          */
1585         if (nchain)
1586                 hammer2_chain_unlock(nchain);
1587
1588 done:
1589         /*
1590          * Cleanup, chain/nchain already dealt with.
1591          */
1592         *chainp = chain;
1593         hammer2_inode_drop(cdip);
1594
1595         return (error);
1596 }
1597
1598 /*
1599  * If (*ochainp) is non-NULL it points to the forward OBJTYPE_HARDLINK
1600  * inode while (*chainp) points to the resolved (hidden hardlink
1601  * target) inode.  In this situation when nlinks is 1 we wish to
1602  * deconsolidate the hardlink, moving it back to the directory that now
1603  * represents the only remaining link.
1604  */
1605 int
1606 hammer2_hardlink_deconsolidate(hammer2_trans_t *trans,
1607                                hammer2_inode_t *dip,
1608                                hammer2_chain_t **chainp,
1609                                hammer2_chain_t **ochainp)
1610 {
1611         if (*ochainp == NULL)
1612                 return (0);
1613         /* XXX */
1614         return (0);
1615 }
1616
1617 /*
1618  * The caller presents a locked *chainp pointing to a HAMMER2_BREF_TYPE_INODE
1619  * with an obj_type of HAMMER2_OBJTYPE_HARDLINK.  This routine will gobble
1620  * the *chainp and return a new locked *chainp representing the file target
1621  * (the original *chainp will be unlocked).
1622  *
1623  * When a match is found the chain representing the original HARDLINK
1624  * will be returned in *ochainp with a ref, but not locked.
1625  *
1626  * When no match is found *chainp is set to NULL and EIO is returned.
1627  * (*ochainp) will still be set to the original chain with a ref but not
1628  * locked.
1629  */
1630 int
1631 hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp,
1632                       hammer2_chain_t **ochainp)
1633 {
1634         hammer2_chain_t *chain = *chainp;
1635         hammer2_chain_t *parent;
1636         hammer2_inode_t *ip;
1637         hammer2_inode_t *pip;
1638         hammer2_key_t key_dummy;
1639         hammer2_key_t lhc;
1640         int cache_index = -1;
1641
1642         pip = dip;
1643         hammer2_inode_ref(pip);         /* for loop */
1644         hammer2_chain_ref(chain);       /* for (*ochainp) */
1645         *ochainp = chain;
1646
1647         /*
1648          * Locate the hardlink.  pip is referenced and not locked,
1649          * ipp.
1650          *
1651          * chain is reused.
1652          */
1653         lhc = chain->data->ipdata.inum;
1654         hammer2_chain_unlock(chain);
1655         chain = NULL;
1656
1657         while ((ip = pip) != NULL) {
1658                 parent = hammer2_inode_lock_ex(ip);
1659                 hammer2_inode_drop(ip);                 /* loop */
1660                 KKASSERT(parent->bref.type == HAMMER2_BREF_TYPE_INODE);
1661                 chain = hammer2_chain_lookup(&parent, &key_dummy,
1662                                              lhc, lhc, &cache_index, 0);
1663                 hammer2_chain_lookup_done(parent);      /* discard parent */
1664                 if (chain)
1665                         break;
1666                 pip = ip->pip;          /* safe, ip held locked */
1667                 if (pip)
1668                         hammer2_inode_ref(pip);         /* loop */
1669                 hammer2_inode_unlock_ex(ip, NULL);
1670         }
1671
1672         /*
1673          * chain is locked, ip is locked.  Unlock ip, return the locked
1674          * chain.  *ipp is already set w/a ref count and not locked.
1675          *
1676          * (parent is already unlocked).
1677          */
1678         if (ip)
1679                 hammer2_inode_unlock_ex(ip, NULL);
1680         *chainp = chain;
1681         if (chain) {
1682                 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
1683                 /* already locked */
1684                 return (0);
1685         } else {
1686                 return (EIO);
1687         }
1688 }
1689
1690 /*
1691  * Find the directory common to both fdip and tdip, hold and return
1692  * its inode.
1693  */
1694 hammer2_inode_t *
1695 hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip)
1696 {
1697         hammer2_inode_t *scan1;
1698         hammer2_inode_t *scan2;
1699
1700         /*
1701          * We used to have a depth field but it complicated matters too
1702          * much for directory renames.  So now its ugly.  Check for
1703          * simple cases before giving up and doing it the expensive way.
1704          *
1705          * XXX need a bottom-up topology stability lock
1706          */
1707         if (fdip == tdip || fdip == tdip->pip) {
1708                 hammer2_inode_ref(fdip);
1709                 return(fdip);
1710         }
1711         if (fdip->pip == tdip) {
1712                 hammer2_inode_ref(tdip);
1713                 return(tdip);
1714         }
1715
1716         /*
1717          * XXX not MPSAFE
1718          */
1719         for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) {
1720                 scan2 = tdip;
1721                 while (scan2->pmp == tdip->pmp) {
1722                         if (scan1 == scan2) {
1723                                 hammer2_inode_ref(scan1);
1724                                 return(scan1);
1725                         }
1726                         scan2 = scan2->pip;
1727                         if (scan2 == NULL)
1728                                 break;
1729                 }
1730         }
1731         panic("hammer2_inode_common_parent: no common parent %p %p\n",
1732               fdip, tdip);
1733         /* NOT REACHED */
1734         return(NULL);
1735 }
1736
1737 /*
1738  * Synchronize the inode's frontend state with the chain state prior
1739  * to any explicit flush of the inode or any strategy write call.
1740  *
1741  * Called with a locked inode.
1742  */
1743 void
1744 hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip, 
1745                     hammer2_chain_t **chainp)
1746 {
1747         hammer2_inode_data_t *ipdata;
1748         hammer2_chain_t *parent;
1749         hammer2_chain_t *chain;
1750         hammer2_key_t lbase;
1751         hammer2_key_t key_next;
1752         int cache_index;
1753
1754         ipdata = &ip->chain->data->ipdata;
1755
1756         if (ip->flags & HAMMER2_INODE_MTIME) {
1757                 ipdata = hammer2_chain_modify_ip(trans, ip, chainp, 0);
1758                 atomic_clear_int(&ip->flags, HAMMER2_INODE_MTIME);
1759                 ipdata->mtime = ip->mtime;
1760         }
1761         if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size < ipdata->size) {
1762                 ipdata = hammer2_chain_modify_ip(trans, ip, chainp, 0);
1763                 ipdata->size = ip->size;
1764                 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
1765
1766                 /*
1767                  * We must delete any chains beyond the EOF.  The chain
1768                  * straddling the EOF will be pending in the bioq.
1769                  */
1770                 lbase = (ipdata->size + HAMMER2_PBUFMASK64) &
1771                         ~HAMMER2_PBUFMASK64;
1772                 parent = hammer2_chain_lookup_init(ip->chain, 0);
1773                 chain = hammer2_chain_lookup(&parent, &key_next,
1774                                              lbase, (hammer2_key_t)-1,
1775                                              &cache_index,
1776                                              HAMMER2_LOOKUP_NODATA);
1777                 while (chain) {
1778                         /*
1779                          * Degenerate embedded case, nothing to loop on
1780                          */
1781                         if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
1782                                 hammer2_chain_unlock(chain);
1783                                 break;
1784                         }
1785                         if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
1786                                 hammer2_chain_delete(trans, chain, 0);
1787                         }
1788                         chain = hammer2_chain_next(&parent, chain, &key_next,
1789                                                    key_next, (hammer2_key_t)-1,
1790                                                    &cache_index,
1791                                                    HAMMER2_LOOKUP_NODATA);
1792                 }
1793                 hammer2_chain_lookup_done(parent);
1794         } else
1795         if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size > ipdata->size) {
1796                 ipdata = hammer2_chain_modify_ip(trans, ip, chainp, 0);
1797                 ipdata->size = ip->size;
1798                 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
1799
1800                 /*
1801                  * When resizing larger we may not have any direct-data
1802                  * available.
1803                  */
1804                 if ((ipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
1805                     ip->size > HAMMER2_EMBEDDED_BYTES) {
1806                         ipdata->op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
1807                         bzero(&ipdata->u.blockset, sizeof(ipdata->u.blockset));
1808                 }
1809         }
1810 }