543b7347278680a4f745bfc063009a3ceaaab929
[dragonfly.git] / sys / vfs / hammer2 / hammer2_inode.c
1 /*
2  * Copyright (c) 2011-2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 #include <sys/lock.h>
40 #include <sys/uuid.h>
41
42 #include "hammer2.h"
43
44 #define INODE_DEBUG     0
45
46 static void hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
47                                          hammer2_cluster_t **cparentp,
48                                          hammer2_cluster_t **clusterp,
49                                          hammer2_tid_t inum);
50
51 RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
52              hammer2_tid_t, inum);
53
54 int
55 hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2)
56 {
57         if (ip1->inum < ip2->inum)
58                 return(-1);
59         if (ip1->inum > ip2->inum)
60                 return(1);
61         return(0);
62 }
63
64 /*
65  * HAMMER2 inode locks
66  *
67  * HAMMER2 offers shared locks and exclusive locks on inodes.
68  *
69  * The inode locking function locks the inode itself, resolves any stale
70  * chains in the inode's cluster, and allocates a fresh copy of the
71  * cluster with 1 ref and all the underlying chains locked.  Duplication
72  * races are handled by this function.
73  *
74  * ip->cluster will be stable while the inode is locked.
75  *
76  * NOTE: We don't combine the inode/chain lock because putting away an
77  *       inode would otherwise confuse multiple lock holders of the inode.
78  *
79  * NOTE: Hardlinks are followed in the returned cluster but not in the
80  *       inode's internal cluster (ip->cluster).
81  */
82 hammer2_cluster_t *
83 hammer2_inode_lock_ex(hammer2_inode_t *ip)
84 {
85         const hammer2_inode_data_t *ipdata;
86         hammer2_cluster_t *cluster;
87         hammer2_chain_t *chain;
88         int error;
89         int i;
90
91         hammer2_inode_ref(ip);
92         ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE);
93         cluster = hammer2_cluster_copy(&ip->cluster,
94                                        HAMMER2_CLUSTER_COPY_NOCHAINS);
95
96         ip->cluster.focus = NULL;
97         cluster->focus = NULL;
98
99         for (i = 0; i < cluster->nchains; ++i) {
100                 chain = ip->cluster.array[i];
101                 if (chain == NULL) {
102                         kprintf("inode_lock: %p: missing chain\n", ip);
103                         continue;
104                 }
105
106                 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
107                 cluster->array[i] = chain;
108                 if (cluster->focus == NULL)
109                         cluster->focus = chain;
110                 if (ip->cluster.focus == NULL)
111                         ip->cluster.focus = chain;
112         }
113
114         /*
115          * Returned cluster must resolve hardlink pointers
116          */
117         ipdata = &hammer2_cluster_data(cluster)->ipdata;
118         if (ipdata->type == HAMMER2_OBJTYPE_HARDLINK &&
119             (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) {
120                 error = hammer2_hardlink_find(ip->pip, NULL, cluster);
121                 KKASSERT(error == 0);
122         }
123
124         return (cluster);
125 }
126
127 void
128 hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
129 {
130         if (cluster)
131                 hammer2_cluster_unlock(cluster);
132         ccms_thread_unlock(&ip->topo_cst);
133         hammer2_inode_drop(ip);
134 }
135
136 /*
137  * NOTE: We don't combine the inode/chain lock because putting away an
138  *       inode would otherwise confuse multiple lock holders of the inode.
139  *
140  *       Shared locks are especially sensitive to having too many shared
141  *       lock counts (from the same thread) on certain paths which might
142  *       need to upgrade them.  Only one count of a shared lock can be
143  *       upgraded.
144  */
145 hammer2_cluster_t *
146 hammer2_inode_lock_sh(hammer2_inode_t *ip)
147 {
148         const hammer2_inode_data_t *ipdata;
149         hammer2_cluster_t *cluster;
150         hammer2_chain_t *chain;
151         int error = 0;
152         int i;
153
154         hammer2_inode_ref(ip);
155         cluster = hammer2_cluster_copy(&ip->cluster,
156                                        HAMMER2_CLUSTER_COPY_NOCHAINS);
157         ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED);
158
159         cluster->focus = NULL;
160
161         for (i = 0; i < cluster->nchains; ++i) {
162                 chain = ip->cluster.array[i];
163
164                 if (chain == NULL) {
165                         kprintf("inode_lock: %p: missing chain\n", ip);
166                         continue;
167                 }
168
169                 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS |
170                                           HAMMER2_RESOLVE_SHARED);
171                 cluster->array[i] = chain;
172                 if (cluster->focus == NULL)
173                         cluster->focus = chain;
174         }
175
176         /*
177          * Returned cluster must resolve hardlink pointers
178          */
179         ipdata = &hammer2_cluster_data(cluster)->ipdata;
180         if (ipdata->type == HAMMER2_OBJTYPE_HARDLINK &&
181             (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) {
182                 error = hammer2_hardlink_find(ip->pip, NULL, cluster);
183                 KKASSERT(error == 0);
184         }
185
186         return (cluster);
187 }
188
189 void
190 hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
191 {
192         if (cluster)
193                 hammer2_cluster_unlock(cluster);
194         ccms_thread_unlock(&ip->topo_cst);
195         hammer2_inode_drop(ip);
196 }
197
198 ccms_state_t
199 hammer2_inode_lock_temp_release(hammer2_inode_t *ip)
200 {
201         return(ccms_thread_lock_temp_release(&ip->topo_cst));
202 }
203
204 void
205 hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, ccms_state_t ostate)
206 {
207         ccms_thread_lock_temp_restore(&ip->topo_cst, ostate);
208 }
209
210 ccms_state_t
211 hammer2_inode_lock_upgrade(hammer2_inode_t *ip)
212 {
213         return(ccms_thread_lock_upgrade(&ip->topo_cst));
214 }
215
216 void
217 hammer2_inode_lock_downgrade(hammer2_inode_t *ip, ccms_state_t ostate)
218 {
219         ccms_thread_lock_downgrade(&ip->topo_cst, ostate);
220 }
221
222 /*
223  * Lookup an inode by inode number
224  */
225 hammer2_inode_t *
226 hammer2_inode_lookup(hammer2_pfsmount_t *pmp, hammer2_tid_t inum)
227 {
228         hammer2_inode_t *ip;
229
230         KKASSERT(pmp);
231         if (pmp->spmp_hmp) {
232                 ip = NULL;
233         } else {
234                 spin_lock(&pmp->inum_spin);
235                 ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum);
236                 if (ip)
237                         hammer2_inode_ref(ip);
238                 spin_unlock(&pmp->inum_spin);
239         }
240         return(ip);
241 }
242
243 /*
244  * Adding a ref to an inode is only legal if the inode already has at least
245  * one ref.
246  */
247 void
248 hammer2_inode_ref(hammer2_inode_t *ip)
249 {
250         atomic_add_int(&ip->refs, 1);
251 }
252
253 /*
254  * Drop an inode reference, freeing the inode when the last reference goes
255  * away.
256  */
257 void
258 hammer2_inode_drop(hammer2_inode_t *ip)
259 {
260         hammer2_pfsmount_t *pmp;
261         hammer2_inode_t *pip;
262         u_int refs;
263
264         while (ip) {
265                 refs = ip->refs;
266                 cpu_ccfence();
267                 if (refs == 1) {
268                         /*
269                          * Transition to zero, must interlock with
270                          * the inode inumber lookup tree (if applicable).
271                          */
272                         pmp = ip->pmp;
273                         KKASSERT(pmp);
274                         spin_lock(&pmp->inum_spin);
275
276                         if (atomic_cmpset_int(&ip->refs, 1, 0)) {
277                                 KKASSERT(ip->topo_cst.count == 0);
278                                 if (ip->flags & HAMMER2_INODE_ONRBTREE) {
279                                         atomic_clear_int(&ip->flags,
280                                                      HAMMER2_INODE_ONRBTREE);
281                                         RB_REMOVE(hammer2_inode_tree,
282                                                   &pmp->inum_tree, ip);
283                                 }
284                                 spin_unlock(&pmp->inum_spin);
285
286                                 pip = ip->pip;
287                                 ip->pip = NULL;
288                                 ip->pmp = NULL;
289
290                                 /*
291                                  * Cleaning out ip->cluster isn't entirely
292                                  * trivial.
293                                  */
294                                 hammer2_inode_repoint(ip, NULL, NULL);
295
296                                 /*
297                                  * We have to drop pip (if non-NULL) to
298                                  * dispose of our implied reference from
299                                  * ip->pip.  We can simply loop on it.
300                                  */
301                                 kfree(ip, pmp->minode);
302                                 atomic_add_long(&pmp->inmem_inodes, -1);
303                                 ip = pip;
304                                 /* continue with pip (can be NULL) */
305                         } else {
306                                 spin_unlock(&ip->pmp->inum_spin);
307                         }
308                 } else {
309                         /*
310                          * Non zero transition
311                          */
312                         if (atomic_cmpset_int(&ip->refs, refs, refs - 1))
313                                 break;
314                 }
315         }
316 }
317
318 /*
319  * Get the vnode associated with the given inode, allocating the vnode if
320  * necessary.  The vnode will be returned exclusively locked.
321  *
322  * The caller must lock the inode (shared or exclusive).
323  *
324  * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
325  * races.
326  */
327 struct vnode *
328 hammer2_igetv(hammer2_inode_t *ip, hammer2_cluster_t *cparent, int *errorp)
329 {
330         const hammer2_inode_data_t *ipdata;
331         hammer2_pfsmount_t *pmp;
332         struct vnode *vp;
333         ccms_state_t ostate;
334
335         pmp = ip->pmp;
336         KKASSERT(pmp != NULL);
337         *errorp = 0;
338
339         ipdata = &hammer2_cluster_data(cparent)->ipdata;
340
341         for (;;) {
342                 /*
343                  * Attempt to reuse an existing vnode assignment.  It is
344                  * possible to race a reclaim so the vget() may fail.  The
345                  * inode must be unlocked during the vget() to avoid a
346                  * deadlock against a reclaim.
347                  */
348                 vp = ip->vp;
349                 if (vp) {
350                         /*
351                          * Inode must be unlocked during the vget() to avoid
352                          * possible deadlocks, but leave the ip ref intact.
353                          *
354                          * vnode is held to prevent destruction during the
355                          * vget().  The vget() can still fail if we lost
356                          * a reclaim race on the vnode.
357                          */
358                         vhold(vp);
359                         ostate = hammer2_inode_lock_temp_release(ip);
360                         if (vget(vp, LK_EXCLUSIVE)) {
361                                 vdrop(vp);
362                                 hammer2_inode_lock_temp_restore(ip, ostate);
363                                 continue;
364                         }
365                         hammer2_inode_lock_temp_restore(ip, ostate);
366                         vdrop(vp);
367                         /* vp still locked and ref from vget */
368                         if (ip->vp != vp) {
369                                 kprintf("hammer2: igetv race %p/%p\n",
370                                         ip->vp, vp);
371                                 vput(vp);
372                                 continue;
373                         }
374                         *errorp = 0;
375                         break;
376                 }
377
378                 /*
379                  * No vnode exists, allocate a new vnode.  Beware of
380                  * allocation races.  This function will return an
381                  * exclusively locked and referenced vnode.
382                  */
383                 *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
384                 if (*errorp) {
385                         kprintf("hammer2: igetv getnewvnode failed %d\n",
386                                 *errorp);
387                         vp = NULL;
388                         break;
389                 }
390
391                 /*
392                  * Lock the inode and check for an allocation race.
393                  */
394                 ostate = hammer2_inode_lock_upgrade(ip);
395                 if (ip->vp != NULL) {
396                         vp->v_type = VBAD;
397                         vx_put(vp);
398                         hammer2_inode_lock_downgrade(ip, ostate);
399                         continue;
400                 }
401
402                 switch (ipdata->type) {
403                 case HAMMER2_OBJTYPE_DIRECTORY:
404                         vp->v_type = VDIR;
405                         break;
406                 case HAMMER2_OBJTYPE_REGFILE:
407                         vp->v_type = VREG;
408                         vinitvmio(vp, ipdata->size,
409                                   HAMMER2_LBUFSIZE,
410                                   (int)ipdata->size & HAMMER2_LBUFMASK);
411                         break;
412                 case HAMMER2_OBJTYPE_SOFTLINK:
413                         /*
414                          * XXX for now we are using the generic file_read
415                          * and file_write code so we need a buffer cache
416                          * association.
417                          */
418                         vp->v_type = VLNK;
419                         vinitvmio(vp, ipdata->size,
420                                   HAMMER2_LBUFSIZE,
421                                   (int)ipdata->size & HAMMER2_LBUFMASK);
422                         break;
423                 case HAMMER2_OBJTYPE_CDEV:
424                         vp->v_type = VCHR;
425                         /* fall through */
426                 case HAMMER2_OBJTYPE_BDEV:
427                         vp->v_ops = &pmp->mp->mnt_vn_spec_ops;
428                         if (ipdata->type != HAMMER2_OBJTYPE_CDEV)
429                                 vp->v_type = VBLK;
430                         addaliasu(vp, ipdata->rmajor, ipdata->rminor);
431                         break;
432                 case HAMMER2_OBJTYPE_FIFO:
433                         vp->v_type = VFIFO;
434                         vp->v_ops = &pmp->mp->mnt_vn_fifo_ops;
435                         break;
436                 default:
437                         panic("hammer2: unhandled objtype %d", ipdata->type);
438                         break;
439                 }
440
441                 if (ip == pmp->iroot)
442                         vsetflags(vp, VROOT);
443
444                 vp->v_data = ip;
445                 ip->vp = vp;
446                 hammer2_inode_ref(ip);          /* vp association */
447                 hammer2_inode_lock_downgrade(ip, ostate);
448                 break;
449         }
450
451         /*
452          * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
453          */
454         if (hammer2_debug & 0x0002) {
455                 kprintf("igetv vp %p refs 0x%08x aux 0x%08x\n",
456                         vp, vp->v_refcnt, vp->v_auxrefs);
457         }
458         return (vp);
459 }
460
461 /*
462  * Returns the inode associated with the passed-in cluster, creating the
463  * inode if necessary and synchronizing it to the passed-in cluster otherwise.
464  *
465  * The passed-in chain must be locked and will remain locked on return.
466  * The returned inode will be locked and the caller may dispose of both
467  * via hammer2_inode_unlock_ex().  However, if the caller needs to resolve
468  * a hardlink it must ref/unlock/relock/drop the inode.
469  *
470  * The hammer2_inode structure regulates the interface between the high level
471  * kernel VNOPS API and the filesystem backend (the chains).
472  */
473 hammer2_inode_t *
474 hammer2_inode_get(hammer2_pfsmount_t *pmp, hammer2_inode_t *dip,
475                   hammer2_cluster_t *cluster)
476 {
477         hammer2_inode_t *nip;
478         const hammer2_inode_data_t *iptmp;
479         const hammer2_inode_data_t *nipdata;
480
481         KKASSERT(hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE);
482         KKASSERT(pmp);
483
484         /*
485          * Interlocked lookup/ref of the inode.  This code is only needed
486          * when looking up inodes with nlinks != 0 (TODO: optimize out
487          * otherwise and test for duplicates).
488          */
489 again:
490         for (;;) {
491                 iptmp = &hammer2_cluster_data(cluster)->ipdata;
492                 nip = hammer2_inode_lookup(pmp, iptmp->inum);
493                 if (nip == NULL)
494                         break;
495
496                 ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE);
497
498                 /*
499                  * Handle SMP race (not applicable to the super-root spmp
500                  * which can't index inodes due to duplicative inode numbers).
501                  */
502                 if (pmp->spmp_hmp == NULL &&
503                     (nip->flags & HAMMER2_INODE_ONRBTREE) == 0) {
504                         ccms_thread_unlock(&nip->topo_cst);
505                         hammer2_inode_drop(nip);
506                         continue;
507                 }
508                 hammer2_inode_repoint(nip, NULL, cluster);
509                 return nip;
510         }
511
512         /*
513          * We couldn't find the inode number, create a new inode.
514          */
515         nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO);
516         atomic_add_long(&pmp->inmem_inodes, 1);
517         hammer2_pfs_memory_inc(pmp);
518         hammer2_pfs_memory_wakeup(pmp);
519         if (pmp->spmp_hmp)
520                 nip->flags = HAMMER2_INODE_SROOT;
521
522         /*
523          * Initialize nip's cluster
524          */
525         nip->cluster.refs = 1;
526         nip->cluster.pmp = pmp;
527         nip->cluster.flags |= HAMMER2_CLUSTER_INODE;
528         hammer2_cluster_replace(&nip->cluster, cluster);
529
530         nipdata = &hammer2_cluster_data(cluster)->ipdata;
531         nip->inum = nipdata->inum;
532         nip->size = nipdata->size;
533         nip->mtime = nipdata->mtime;
534         hammer2_inode_repoint(nip, NULL, cluster);
535         nip->pip = dip;                         /* can be NULL */
536         if (dip)
537                 hammer2_inode_ref(dip); /* ref dip for nip->pip */
538
539         nip->pmp = pmp;
540
541         /*
542          * ref and lock on nip gives it state compatible to after a
543          * hammer2_inode_lock_ex() call.
544          */
545         nip->refs = 1;
546         ccms_cst_init(&nip->topo_cst, &nip->cluster);
547         ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE);
548         /* combination of thread lock and chain lock == inode lock */
549
550         /*
551          * Attempt to add the inode.  If it fails we raced another inode
552          * get.  Undo all the work and try again.
553          */
554         if (pmp->spmp_hmp == NULL) {
555                 spin_lock(&pmp->inum_spin);
556                 if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) {
557                         spin_unlock(&pmp->inum_spin);
558                         ccms_thread_unlock(&nip->topo_cst);
559                         hammer2_inode_drop(nip);
560                         goto again;
561                 }
562                 atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE);
563                 spin_unlock(&pmp->inum_spin);
564         }
565
566         return (nip);
567 }
568
569 /*
570  * Create a new inode in the specified directory using the vattr to
571  * figure out the type of inode.
572  *
573  * If no error occurs the new inode with its cluster locked is returned in
574  * *nipp, otherwise an error is returned and *nipp is set to NULL.
575  *
576  * If vap and/or cred are NULL the related fields are not set and the
577  * inode type defaults to a directory.  This is used when creating PFSs
578  * under the super-root, so the inode number is set to 1 in this case.
579  *
580  * dip is not locked on entry.
581  *
582  * NOTE: When used to create a snapshot, the inode is temporarily associated
583  *       with the super-root spmp. XXX should pass new pmp for snapshot.
584  */
585 hammer2_inode_t *
586 hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip,
587                      struct vattr *vap, struct ucred *cred,
588                      const uint8_t *name, size_t name_len,
589                      hammer2_cluster_t **clusterp, int *errorp)
590 {
591         const hammer2_inode_data_t *dipdata;
592         hammer2_inode_data_t *nipdata;
593         hammer2_cluster_t *cluster;
594         hammer2_cluster_t *cparent;
595         hammer2_inode_t *nip;
596         hammer2_key_t key_dummy;
597         hammer2_key_t lhc;
598         int error;
599         uid_t xuid;
600         uuid_t dip_uid;
601         uuid_t dip_gid;
602         uint32_t dip_mode;
603         uint8_t dip_algo;
604         int ddflag;
605
606         lhc = hammer2_dirhash(name, name_len);
607         *errorp = 0;
608
609         /*
610          * Locate the inode or indirect block to create the new
611          * entry in.  At the same time check for key collisions
612          * and iterate until we don't get one.
613          *
614          * NOTE: hidden inodes do not have iterators.
615          */
616 retry:
617         cparent = hammer2_inode_lock_ex(dip);
618         dipdata = &hammer2_cluster_data(cparent)->ipdata;
619         dip_uid = dipdata->uid;
620         dip_gid = dipdata->gid;
621         dip_mode = dipdata->mode;
622         dip_algo = dipdata->comp_algo;
623
624         error = 0;
625         while (error == 0) {
626                 cluster = hammer2_cluster_lookup(cparent, &key_dummy,
627                                                  lhc, lhc, 0, &ddflag);
628                 if (cluster == NULL)
629                         break;
630                 if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
631                         error = ENOSPC;
632                 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
633                         error = ENOSPC;
634                 hammer2_cluster_unlock(cluster);
635                 cluster = NULL;
636                 ++lhc;
637         }
638
639         if (error == 0) {
640                 error = hammer2_cluster_create(trans, cparent, &cluster,
641                                              lhc, 0,
642                                              HAMMER2_BREF_TYPE_INODE,
643                                              HAMMER2_INODE_BYTES);
644         }
645 #if INODE_DEBUG
646         kprintf("CREATE INODE %*.*s chain=%p\n",
647                 (int)name_len, (int)name_len, name,
648                 (cluster ? cluster->focus : NULL));
649 #endif
650
651         /*
652          * Cleanup and handle retries.
653          */
654         if (error == EAGAIN) {
655                 hammer2_cluster_ref(cparent);
656                 hammer2_inode_unlock_ex(dip, cparent);
657                 hammer2_cluster_wait(cparent);
658                 hammer2_cluster_drop(cparent);
659                 goto retry;
660         }
661         hammer2_inode_unlock_ex(dip, cparent);
662         cparent = NULL;
663
664         if (error) {
665                 KKASSERT(cluster == NULL);
666                 *errorp = error;
667                 return (NULL);
668         }
669
670         /*
671          * Set up the new inode.
672          *
673          * NOTE: *_get() integrates chain's lock into the inode lock.
674          *
675          * NOTE: Only one new inode can currently be created per
676          *       transaction.  If the need arises we can adjust
677          *       hammer2_trans_init() to allow more.
678          *
679          * NOTE: nipdata will have chain's blockset data.
680          */
681         KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_MODIFIED);
682         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
683         nipdata->inum = trans->inode_tid;
684         hammer2_cluster_modsync(cluster);
685         nip = hammer2_inode_get(dip->pmp, dip, cluster);
686         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
687
688         if (vap) {
689                 KKASSERT(trans->inodes_created == 0);
690                 nipdata->type = hammer2_get_obj_type(vap->va_type);
691                 nipdata->inum = trans->inode_tid;
692                 ++trans->inodes_created;
693
694                 switch (nipdata->type) {
695                 case HAMMER2_OBJTYPE_CDEV:
696                 case HAMMER2_OBJTYPE_BDEV:
697                         nipdata->rmajor = vap->va_rmajor;
698                         nipdata->rminor = vap->va_rminor;
699                         break;
700                 default:
701                         break;
702                 }
703         } else {
704                 nipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
705                 nipdata->inum = 1;
706         }
707         
708         /* Inherit parent's inode compression mode. */
709         nip->comp_heuristic = 0;
710         nipdata->comp_algo = dip_algo;
711         nipdata->version = HAMMER2_INODE_VERSION_ONE;
712         hammer2_update_time(&nipdata->ctime);
713         nipdata->mtime = nipdata->ctime;
714         if (vap)
715                 nipdata->mode = vap->va_mode;
716         nipdata->nlinks = 1;
717         if (vap) {
718                 if (dip && dip->pmp) {
719                         xuid = hammer2_to_unix_xid(&dip_uid);
720                         xuid = vop_helper_create_uid(dip->pmp->mp,
721                                                      dip_mode,
722                                                      xuid,
723                                                      cred,
724                                                      &vap->va_mode);
725                 } else {
726                         /* super-root has no dip and/or pmp */
727                         xuid = 0;
728                 }
729                 if (vap->va_vaflags & VA_UID_UUID_VALID)
730                         nipdata->uid = vap->va_uid_uuid;
731                 else if (vap->va_uid != (uid_t)VNOVAL)
732                         hammer2_guid_to_uuid(&nipdata->uid, vap->va_uid);
733                 else
734                         hammer2_guid_to_uuid(&nipdata->uid, xuid);
735
736                 if (vap->va_vaflags & VA_GID_UUID_VALID)
737                         nipdata->gid = vap->va_gid_uuid;
738                 else if (vap->va_gid != (gid_t)VNOVAL)
739                         hammer2_guid_to_uuid(&nipdata->gid, vap->va_gid);
740                 else if (dip)
741                         nipdata->gid = dip_gid;
742         }
743
744         /*
745          * Regular files and softlinks allow a small amount of data to be
746          * directly embedded in the inode.  This flag will be cleared if
747          * the size is extended past the embedded limit.
748          */
749         if (nipdata->type == HAMMER2_OBJTYPE_REGFILE ||
750             nipdata->type == HAMMER2_OBJTYPE_SOFTLINK) {
751                 nipdata->op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
752         }
753
754         KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
755         bcopy(name, nipdata->filename, name_len);
756         nipdata->name_key = lhc;
757         nipdata->name_len = name_len;
758         hammer2_cluster_modsync(cluster);
759         *clusterp = cluster;
760
761         return (nip);
762 }
763
764 /*
765  * The cluster has been removed from the original directory and replaced
766  * with a hardlink pointer.  Move the cluster to the specified parent
767  * directory, change the filename to "0xINODENUMBER", and adjust the key.
768  * The cluster becomes our invisible hardlink target.
769  *
770  * The original cluster must be deleted on entry.
771  */
772 static
773 void
774 hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_cluster_t *cluster,
775                         hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
776                         int nlinks, int *errorp)
777 {
778         const hammer2_inode_data_t *iptmp;
779         hammer2_inode_data_t *nipdata;
780         hammer2_cluster_t *xcluster;
781         hammer2_key_t key_dummy;
782         hammer2_key_t lhc;
783         hammer2_blockref_t bref;
784         int ddflag;
785
786         iptmp = &hammer2_cluster_data(cluster)->ipdata;
787         lhc = iptmp->inum;
788         KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
789
790         /*
791          * Locate the inode or indirect block to create the new
792          * entry in.  lhc represents the inode number so there is
793          * no collision iteration.
794          *
795          * There should be no key collisions with invisible inode keys.
796          *
797          * WARNING! Must use inode_lock_ex() on dip to handle a stale
798          *          dip->cluster cache.
799          */
800         *errorp = 0;
801         xcluster = hammer2_cluster_lookup(dcluster, &key_dummy,
802                                       lhc, lhc, 0, &ddflag);
803         if (xcluster) {
804                 kprintf("X3 chain %p dip %p dchain %p dip->chain %p\n",
805                         xcluster->focus, dip, dcluster->focus,
806                         dip->cluster.focus);
807                 hammer2_cluster_unlock(xcluster);
808                 xcluster = NULL;
809                 *errorp = ENOSPC;
810 #if 0
811                 Debugger("X3");
812 #endif
813         }
814
815         /*
816          * Handle the error case
817          */
818         if (*errorp) {
819                 panic("error2");
820                 KKASSERT(xcluster == NULL);
821                 return;
822         }
823
824         /*
825          * Use xcluster as a placeholder for (lhc).  Duplicate cluster to the
826          * same target bref as xcluster and then delete xcluster.  The
827          * duplication occurs after xcluster in flush order even though
828          * xcluster is deleted after the duplication. XXX
829          *
830          * WARNING! Duplications (to a different parent) can cause indirect
831          *          blocks to be inserted, refactor xcluster.
832          *
833          * WARNING! Only key and keybits is extracted from a passed-in bref.
834          */
835         hammer2_cluster_bref(cluster, &bref);
836         bref.key = lhc;                 /* invisible dir entry key */
837         bref.keybits = 0;
838         hammer2_cluster_rename(trans, &bref, dcluster, cluster);
839
840         /*
841          * cluster is now 'live' again.. adjust the filename.
842          *
843          * Directory entries are inodes but this is a hidden hardlink
844          * target.  The name isn't used but to ease debugging give it
845          * a name after its inode number.
846          */
847         hammer2_cluster_modify(trans, cluster, 0);
848         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
849         ksnprintf(nipdata->filename, sizeof(nipdata->filename),
850                   "0x%016jx", (intmax_t)nipdata->inum);
851         nipdata->name_len = strlen(nipdata->filename);
852         nipdata->name_key = lhc;
853         nipdata->nlinks += nlinks;
854         hammer2_cluster_modsync(cluster);
855 }
856
857 /*
858  * Connect the target inode represented by (cluster) to the media topology
859  * at (dip, name, len).  The caller can pass a rough *chainp, this function
860  * will issue lookup()s to position the parent chain properly for the
861  * chain insertion.
862  *
863  * If hlink is TRUE this function creates an OBJTYPE_HARDLINK directory
864  * entry instead of connecting (cluster).
865  *
866  * If hlink is FALSE this function expects (cluster) to be unparented.
867  */
868 int
869 hammer2_inode_connect(hammer2_trans_t *trans,
870                       hammer2_cluster_t **clusterp, int hlink,
871                       hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
872                       const uint8_t *name, size_t name_len,
873                       hammer2_key_t lhc)
874 {
875         hammer2_inode_data_t *wipdata;
876         hammer2_cluster_t *ocluster;
877         hammer2_cluster_t *ncluster;
878         hammer2_key_t key_dummy;
879         int ddflag;
880         int error;
881
882         /*
883          * Since ocluster is either disconnected from the topology or
884          * represents a hardlink terminus which is always a parent of or
885          * equal to dip, we should be able to safely lock dip->chain for
886          * our setup.
887          *
888          * WARNING! Must use inode_lock_ex() on dip to handle a stale
889          *          dip->cluster.
890          *
891          * If name is non-NULL we calculate lhc, else we use the passed-in
892          * lhc.
893          */
894         ocluster = *clusterp;
895
896         if (name) {
897                 lhc = hammer2_dirhash(name, name_len);
898
899                 /*
900                  * Locate the inode or indirect block to create the new
901                  * entry in.  At the same time check for key collisions
902                  * and iterate until we don't get one.
903                  */
904                 error = 0;
905                 while (error == 0) {
906                         ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
907                                                       lhc, lhc,
908                                                       0, &ddflag);
909                         if (ncluster == NULL)
910                                 break;
911                         if ((lhc & HAMMER2_DIRHASH_LOMASK) ==
912                             HAMMER2_DIRHASH_LOMASK) {
913                                 error = ENOSPC;
914                         }
915                         hammer2_cluster_unlock(ncluster);
916                         ncluster = NULL;
917                         ++lhc;
918                 }
919         } else {
920                 /*
921                  * Reconnect to specific key (used when moving
922                  * unlinked-but-open files into the hidden directory).
923                  */
924                 ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
925                                                   lhc, lhc,
926                                                   0, &ddflag);
927                 KKASSERT(ncluster == NULL);
928         }
929
930         if (error == 0) {
931                 if (hlink) {
932                         /*
933                          * Hardlink pointer needed, create totally fresh
934                          * directory entry.
935                          *
936                          * We must refactor ocluster because it might have
937                          * been shifted into an indirect cluster by the
938                          * create.
939                          */
940                         KKASSERT(ncluster == NULL);
941                         error = hammer2_cluster_create(trans,
942                                                        dcluster, &ncluster,
943                                                        lhc, 0,
944                                                        HAMMER2_BREF_TYPE_INODE,
945                                                        HAMMER2_INODE_BYTES);
946                 } else {
947                         /*
948                          * Reconnect the original cluster under the new name.
949                          * Original cluster must have already been deleted by
950                          * teh caller.
951                          *
952                          * WARNING! Can cause held-over clusters to require a
953                          *          refactor.  Fortunately we have none (our
954                          *          locked clusters are passed into and
955                          *          modified by the call).
956                          */
957                         ncluster = ocluster;
958                         ocluster = NULL;
959                         error = hammer2_cluster_create(trans,
960                                                        dcluster, &ncluster,
961                                                        lhc, 0,
962                                                        HAMMER2_BREF_TYPE_INODE,
963                                                        HAMMER2_INODE_BYTES);
964                 }
965         }
966
967         /*
968          * Unlock stuff.
969          */
970         KKASSERT(error != EAGAIN);
971
972         /*
973          * ncluster should be NULL on error, leave ocluster
974          * (ocluster == *clusterp) alone.
975          */
976         if (error) {
977                 KKASSERT(ncluster == NULL);
978                 return (error);
979         }
980
981         /*
982          * Directory entries are inodes so if the name has changed we have
983          * to update the inode.
984          *
985          * When creating an OBJTYPE_HARDLINK entry remember to unlock the
986          * cluster, the caller will access the hardlink via the actual hardlink
987          * target file and not the hardlink pointer entry, so we must still
988          * return ocluster.
989          */
990         if (hlink && hammer2_hardlink_enable >= 0) {
991                 /*
992                  * Create the HARDLINK pointer.  oip represents the hardlink
993                  * target in this situation.
994                  *
995                  * We will return ocluster (the hardlink target).
996                  */
997                 hammer2_cluster_modify(trans, ncluster, 0);
998                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
999                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1000                 bcopy(name, wipdata->filename, name_len);
1001                 wipdata->name_key = lhc;
1002                 wipdata->name_len = name_len;
1003                 wipdata->target_type =
1004                                 hammer2_cluster_data(ocluster)->ipdata.type;
1005                 wipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1006                 wipdata->inum = hammer2_cluster_data(ocluster)->ipdata.inum;
1007                 wipdata->nlinks = 1;
1008                 hammer2_cluster_modsync(ncluster);
1009                 hammer2_cluster_unlock(ncluster);
1010                 ncluster = ocluster;
1011                 ocluster = NULL;
1012         } else {
1013                 /*
1014                  * ncluster is a duplicate of ocluster at the new location.
1015                  * We must fixup the name stored in the inode data.
1016                  * The bref key has already been adjusted by inode_connect().
1017                  */
1018                 hammer2_cluster_modify(trans, ncluster, 0);
1019                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1020
1021                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1022                 bcopy(name, wipdata->filename, name_len);
1023                 wipdata->name_key = lhc;
1024                 wipdata->name_len = name_len;
1025                 wipdata->nlinks = 1;
1026                 hammer2_cluster_modsync(ncluster);
1027         }
1028
1029         /*
1030          * We are replacing ocluster with ncluster, unlock ocluster.  In the
1031          * case where ocluster is left unchanged the code above sets
1032          * ncluster to ocluster and ocluster to NULL, resulting in a NOP here.
1033          */
1034         if (ocluster)
1035                 hammer2_cluster_unlock(ocluster);
1036         *clusterp = ncluster;
1037
1038         return (0);
1039 }
1040
1041 /*
1042  * Repoint ip->cluster's chains to cluster's chains.  Caller must hold
1043  * the inode exclusively locked.  cluster may be NULL to clean out any
1044  * chains in ip->cluster.
1045  */
1046 void
1047 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
1048                       hammer2_cluster_t *cluster)
1049 {
1050         hammer2_chain_t *ochain;
1051         hammer2_chain_t *nchain;
1052         hammer2_inode_t *opip;
1053         int i;
1054
1055         /*
1056          * Replace chains in ip->cluster with chains from cluster and
1057          * adjust the focus if necessary.
1058          *
1059          * NOTE: nchain and/or ochain can be NULL due to gaps
1060          *       in the cluster arrays.
1061          */
1062         ip->cluster.focus = NULL;
1063         for (i = 0; cluster && i < cluster->nchains; ++i) {
1064                 nchain = cluster->array[i];
1065                 if (i < ip->cluster.nchains) {
1066                         ochain = ip->cluster.array[i];
1067                         if (ochain == nchain) {
1068                                 if (ip->cluster.focus == NULL)
1069                                         ip->cluster.focus = nchain;
1070                                 continue;
1071                         }
1072                 } else {
1073                         ochain = NULL;
1074                 }
1075
1076                 /*
1077                  * Make adjustments
1078                  */
1079                 ip->cluster.array[i] = nchain;
1080                 if (ip->cluster.focus == NULL)
1081                         ip->cluster.focus = nchain;
1082                 if (nchain)
1083                         hammer2_chain_ref(nchain);
1084                 if (ochain)
1085                         hammer2_chain_drop(ochain);
1086         }
1087
1088         /*
1089          * Release any left-over chains in ip->cluster.
1090          */
1091         while (i < ip->cluster.nchains) {
1092                 nchain = ip->cluster.array[i];
1093                 if (nchain) {
1094                         ip->cluster.array[i] = NULL;
1095                         hammer2_chain_drop(nchain);
1096                 }
1097                 ++i;
1098         }
1099         ip->cluster.nchains = cluster ? cluster->nchains : 0;
1100
1101         /*
1102          * Repoint ip->pip if requested (non-NULL pip).
1103          */
1104         if (pip && ip->pip != pip) {
1105                 opip = ip->pip;
1106                 hammer2_inode_ref(pip);
1107                 ip->pip = pip;
1108                 if (opip)
1109                         hammer2_inode_drop(opip);
1110         }
1111 }
1112
1113 /*
1114  * Unlink the file from the specified directory inode.  The directory inode
1115  * does not need to be locked.
1116  *
1117  * isdir determines whether a directory/non-directory check should be made.
1118  * No check is made if isdir is set to -1.
1119  *
1120  * isopen specifies whether special unlink-with-open-descriptor handling
1121  * must be performed.  If set to -1 the caller is deleting a PFS and we
1122  * check whether the chain is mounted or not (chain->pmp != NULL).  1 is
1123  * implied if it is mounted.
1124  *
1125  * If isopen is 1 and nlinks drops to 0 this function must move the chain
1126  * to a special hidden directory until last-close occurs on the file.
1127  *
1128  * NOTE!  The underlying file can still be active with open descriptors
1129  *        or if the chain is being manually held (e.g. for rename).
1130  *
1131  *        The caller is responsible for fixing up ip->chain if e.g. a
1132  *        rename occurs (see chain_duplicate()).
1133  *
1134  * NOTE!  The chain is not deleted if it is moved to the hidden directory,
1135  *        but otherwise will be deleted.
1136  */
1137 int
1138 hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
1139                     const uint8_t *name, size_t name_len,
1140                     int isdir, int *hlinkp, struct nchandle *nch,
1141                     int nlinks)
1142 {
1143         const hammer2_inode_data_t *ripdata;
1144         hammer2_inode_data_t *wipdata;
1145         hammer2_cluster_t *cparent;
1146         hammer2_cluster_t *hcluster;
1147         hammer2_cluster_t *hparent;
1148         hammer2_cluster_t *cluster;
1149         hammer2_cluster_t *dparent;
1150         hammer2_cluster_t *dcluster;
1151         hammer2_key_t key_dummy;
1152         hammer2_key_t key_next;
1153         hammer2_key_t lhc;
1154         int error;
1155         int ddflag;
1156         uint8_t type;
1157
1158         error = 0;
1159         hcluster = NULL;
1160         hparent = NULL;
1161         lhc = hammer2_dirhash(name, name_len);
1162
1163 again:
1164         /*
1165          * Search for the filename in the directory
1166          */
1167         if (hlinkp)
1168                 *hlinkp = 0;
1169         cparent = hammer2_inode_lock_ex(dip);
1170         cluster = hammer2_cluster_lookup(cparent, &key_next,
1171                                      lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1172                                      0, &ddflag);
1173         while (cluster) {
1174                 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) {
1175                         ripdata = &hammer2_cluster_data(cluster)->ipdata;
1176                         if (ripdata->name_len == name_len &&
1177                             bcmp(ripdata->filename, name, name_len) == 0) {
1178                                 break;
1179                         }
1180                 }
1181                 cluster = hammer2_cluster_next(cparent, cluster, &key_next,
1182                                                key_next,
1183                                                lhc + HAMMER2_DIRHASH_LOMASK,
1184                                                0);
1185         }
1186         hammer2_inode_unlock_ex(dip, NULL);     /* retain parent */
1187
1188         /*
1189          * Not found or wrong type (isdir < 0 disables the type check).
1190          * If a hardlink pointer, type checks use the hardlink target.
1191          */
1192         if (cluster == NULL) {
1193                 error = ENOENT;
1194                 goto done;
1195         }
1196         ripdata = &hammer2_cluster_data(cluster)->ipdata;
1197         type = ripdata->type;
1198         if (type == HAMMER2_OBJTYPE_HARDLINK) {
1199                 if (hlinkp)
1200                         *hlinkp = 1;
1201                 type = ripdata->target_type;
1202         }
1203
1204         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
1205                 error = ENOTDIR;
1206                 goto done;
1207         }
1208         if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir >= 1) {
1209                 error = EISDIR;
1210                 goto done;
1211         }
1212
1213         /*
1214          * Hardlink must be resolved.  We can't hold the parent locked
1215          * while we do this or we could deadlock.  The physical file will
1216          * be located at or above the current directory.
1217          *
1218          * We loop to reacquire the hardlink origination.
1219          *
1220          * NOTE: hammer2_hardlink_find() will locate the hardlink target,
1221          *       returning a modified hparent and hcluster.
1222          */
1223         if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) {
1224                 if (hcluster == NULL) {
1225                         hcluster = cluster;
1226                         hammer2_cluster_unlock(cparent);
1227                         cparent = NULL; /* safety */
1228                         error = hammer2_hardlink_find(dip, &hparent, hcluster);
1229                         cluster = NULL; /* safety */
1230                         KKASSERT(error == 0);
1231                         goto again;
1232                 }
1233         }
1234
1235         /*
1236          * If this is a directory the directory must be empty.  However, if
1237          * isdir < 0 we are doing a rename and the directory does not have
1238          * to be empty, and if isdir > 1 we are deleting a PFS/snapshot
1239          * and the directory does not have to be empty.
1240          *
1241          * NOTE: We check the full key range here which covers both visible
1242          *       and invisible entries.  Theoretically there should be no
1243          *       invisible (hardlink target) entries if there are no visible
1244          *       entries.
1245          */
1246         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
1247                 dparent = hammer2_cluster_lookup_init(cluster, 0);
1248                 dcluster = hammer2_cluster_lookup(dparent, &key_dummy,
1249                                                   0, (hammer2_key_t)-1,
1250                                                   HAMMER2_LOOKUP_NODATA,
1251                                                   &ddflag);
1252                 if (dcluster) {
1253                         hammer2_cluster_unlock(dcluster);
1254                         hammer2_cluster_lookup_done(dparent);
1255                         error = ENOTEMPTY;
1256                         goto done;
1257                 }
1258                 hammer2_cluster_lookup_done(dparent);
1259                 dparent = NULL;
1260                 /* dcluster NULL */
1261         }
1262
1263         /*
1264          * If this was a hardlink (cparent, cluster) is the hardlink
1265          * pointer, which we can simply destroy outright.  Discard the
1266          * clusters and replace with the hardlink target.
1267          */
1268         if (hcluster) {
1269                 hammer2_cluster_delete(trans, cparent, cluster,
1270                                        HAMMER2_DELETE_PERMANENT);
1271                 hammer2_cluster_unlock(cparent);
1272                 hammer2_cluster_unlock(cluster);
1273                 cparent = hparent;
1274                 cluster = hcluster;
1275                 hparent = NULL;
1276                 hcluster = NULL;
1277         }
1278
1279         /*
1280          * This leaves us with the hardlink target or non-hardlinked file
1281          * or directory in (cparent, cluster).
1282          *
1283          * Delete the target when nlinks reaches 0 with special handling
1284          * if (isopen) is set.
1285          *
1286          * NOTE! In DragonFly the vnops function calls cache_unlink() after
1287          *       calling us here to clean out the namecache association,
1288          *       (which does not represent a ref for the open-test), and to
1289          *       force finalization of the vnode if/when the last ref gets
1290          *       dropped.
1291          *
1292          * NOTE! Files are unlinked by rename and then relinked.  nch will be
1293          *       passed as NULL in this situation.  hammer2_inode_connect()
1294          *       will bump nlinks.
1295          */
1296         KKASSERT(cluster != NULL);
1297         hammer2_cluster_modify(trans, cluster, 0);
1298         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1299         ripdata = wipdata;
1300         wipdata->nlinks += nlinks;
1301         if ((int64_t)wipdata->nlinks < 0) {     /* XXX debugging */
1302                 wipdata->nlinks = 0;
1303         }
1304         hammer2_cluster_modsync(cluster);
1305
1306         if (wipdata->nlinks == 0) {
1307                 if ((cluster->focus->flags & HAMMER2_CHAIN_PFSROOT) &&
1308                     cluster->pmp) {
1309                         error = EINVAL;
1310                         kprintf("hammer2: PFS \"%s\" cannot be deleted "
1311                                 "while still mounted\n",
1312                                 wipdata->filename);
1313                         goto done;
1314                 }
1315                 if (nch && cache_isopen(nch)) {
1316                         hammer2_inode_move_to_hidden(trans, &cparent, &cluster,
1317                                                      wipdata->inum);
1318                 } else {
1319                         /*
1320                          * This won't get everything if a vnode is still
1321                          * present, but the cache_unlink() call the caller
1322                          * makes will.
1323                          */
1324                         hammer2_cluster_delete(trans, cparent, cluster,
1325                                                HAMMER2_DELETE_PERMANENT);
1326                 }
1327         } else if (*hlinkp == 0) {
1328                 /*
1329                  * If this wasn't a hardlinked file and wipdata->nlinks is
1330                  * still non-zero, the adjustment should be 0 (i.e. a rename),
1331                  * in which case we temporarily delete the object so the
1332                  * rename code can reconnect it elsewhere.
1333                  */
1334                 KKASSERT(nlinks == 0);
1335                 hammer2_cluster_delete(trans, cparent, cluster, 0);
1336         }
1337         error = 0;
1338 done:
1339         if (cparent)
1340                 hammer2_cluster_unlock(cparent);
1341         if (cluster)
1342                 hammer2_cluster_unlock(cluster);
1343         if (hparent)
1344                 hammer2_cluster_unlock(hparent);
1345         if (hcluster)
1346                 hammer2_cluster_unlock(hcluster);
1347
1348         return error;
1349 }
1350
1351 /*
1352  * This is called from the mount code to initialize pmp->ihidden
1353  */
1354 void
1355 hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp)
1356 {
1357         hammer2_trans_t trans;
1358         hammer2_cluster_t *cparent;
1359         hammer2_cluster_t *cluster;
1360         hammer2_cluster_t *scan;
1361         hammer2_inode_data_t *wipdata;
1362         hammer2_key_t key_dummy;
1363         hammer2_key_t key_next;
1364         int ddflag;
1365         int error;
1366         int count;
1367
1368         if (pmp->ihidden)
1369                 return;
1370
1371         /*
1372          * Find the hidden directory
1373          */
1374         bzero(&key_dummy, sizeof(key_dummy));
1375         hammer2_trans_init(&trans, pmp, 0);
1376
1377         cparent = hammer2_inode_lock_ex(pmp->iroot);
1378         cluster = hammer2_cluster_lookup(cparent, &key_dummy,
1379                                          HAMMER2_INODE_HIDDENDIR,
1380                                          HAMMER2_INODE_HIDDENDIR,
1381                                          0, &ddflag);
1382         if (cluster) {
1383                 pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1384                 hammer2_inode_ref(pmp->ihidden);
1385
1386                 /*
1387                  * Remove any unlinked files which were left open as-of
1388                  * any system crash.
1389                  */
1390                 count = 0;
1391                 scan = hammer2_cluster_lookup(cluster, &key_next,
1392                                               0, HAMMER2_TID_MAX,
1393                                               HAMMER2_LOOKUP_NODATA, &ddflag);
1394                 while (scan) {
1395                         if (hammer2_cluster_type(scan) ==
1396                             HAMMER2_BREF_TYPE_INODE) {
1397                                 hammer2_cluster_delete(&trans, cluster, scan,
1398                                                    HAMMER2_DELETE_PERMANENT);
1399                                 ++count;
1400                         }
1401                         scan = hammer2_cluster_next(cluster, scan, &key_next,
1402                                                     0, HAMMER2_TID_MAX,
1403                                                     HAMMER2_LOOKUP_NODATA);
1404                 }
1405
1406                 hammer2_inode_unlock_ex(pmp->ihidden, cluster);
1407                 hammer2_inode_unlock_ex(pmp->iroot, cparent);
1408                 hammer2_trans_done(&trans);
1409                 kprintf("hammer2: PFS loaded hidden dir, "
1410                         "removed %d dead entries\n", count);
1411                 return;
1412         }
1413
1414         /*
1415          * Create the hidden directory
1416          */
1417         error = hammer2_cluster_create(&trans, cparent, &cluster,
1418                                        HAMMER2_INODE_HIDDENDIR, 0,
1419                                        HAMMER2_BREF_TYPE_INODE,
1420                                        HAMMER2_INODE_BYTES);
1421         hammer2_inode_unlock_ex(pmp->iroot, cparent);
1422
1423         hammer2_cluster_modify(&trans, cluster, 0);
1424         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1425         wipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
1426         wipdata->inum = HAMMER2_INODE_HIDDENDIR;
1427         wipdata->nlinks = 1;
1428         hammer2_cluster_modsync(cluster);
1429         kprintf("hammer2: PFS root missing hidden directory, creating\n");
1430
1431         pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1432         hammer2_inode_ref(pmp->ihidden);
1433         hammer2_inode_unlock_ex(pmp->ihidden, cluster);
1434         hammer2_trans_done(&trans);
1435 }
1436
1437 /*
1438  * If an open file is unlinked H2 needs to retain the file in the topology
1439  * to ensure that its backing store is not recovered by the bulk free scan.
1440  * This also allows us to avoid having to special-case the CHAIN_DELETED flag.
1441  *
1442  * To do this the file is moved to a hidden directory in the PFS root and
1443  * renamed.  The hidden directory must be created if it does not exist.
1444  */
1445 static
1446 void
1447 hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
1448                              hammer2_cluster_t **cparentp,
1449                              hammer2_cluster_t **clusterp,
1450                              hammer2_tid_t inum)
1451 {
1452         hammer2_cluster_t *dcluster;
1453         hammer2_pfsmount_t *pmp;
1454         int error;
1455
1456         pmp = (*clusterp)->pmp;
1457         KKASSERT(pmp != NULL);
1458         KKASSERT(pmp->ihidden != NULL);
1459
1460         hammer2_cluster_delete(trans, *cparentp, *clusterp, 0);
1461         dcluster = hammer2_inode_lock_ex(pmp->ihidden);
1462         error = hammer2_inode_connect(trans, clusterp, 0,
1463                                       pmp->ihidden, dcluster,
1464                                       NULL, 0, inum);
1465         hammer2_inode_unlock_ex(pmp->ihidden, dcluster);
1466         KKASSERT(error == 0);
1467 }
1468
1469 /*
1470  * Given an exclusively locked inode and cluster we consolidate the cluster
1471  * for hardlink creation, adding (nlinks) to the file's link count and
1472  * potentially relocating the inode to (cdip) which is a parent directory
1473  * common to both the current location of the inode and the intended new
1474  * hardlink.
1475  *
1476  * Replaces (*clusterp) if consolidation occurred, unlocking the old cluster
1477  * and returning a new locked cluster.
1478  *
1479  * NOTE!  This function will also replace ip->cluster.
1480  */
1481 int
1482 hammer2_hardlink_consolidate(hammer2_trans_t *trans,
1483                              hammer2_inode_t *ip,
1484                              hammer2_cluster_t **clusterp,
1485                              hammer2_inode_t *cdip,
1486                              hammer2_cluster_t *cdcluster,
1487                              int nlinks)
1488 {
1489         const hammer2_inode_data_t *ripdata;
1490         hammer2_inode_data_t *wipdata;
1491         hammer2_cluster_t *cluster;
1492         hammer2_cluster_t *cparent;
1493         int error;
1494
1495         cluster = *clusterp;
1496         ripdata = &hammer2_cluster_data(cluster)->ipdata;
1497         if (nlinks == 0 &&                      /* no hardlink needed */
1498             (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE)) {
1499                 return (0);
1500         }
1501
1502         if (hammer2_hardlink_enable == 0) {     /* disallow hardlinks */
1503                 hammer2_cluster_unlock(cluster);
1504                 *clusterp = NULL;
1505                 return (ENOTSUP);
1506         }
1507
1508         cparent = NULL;
1509
1510         /*
1511          * If no change in the hardlink's target directory is required and
1512          * this is already a hardlink target, all we need to do is adjust
1513          * the link count.
1514          */
1515         ripdata = &hammer2_cluster_data(cluster)->ipdata;
1516         if (cdip == ip->pip &&
1517             (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1518                 if (nlinks) {
1519                         hammer2_cluster_modify(trans, cluster, 0);
1520                         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1521                         wipdata->nlinks += nlinks;
1522                         hammer2_cluster_modsync(cluster);
1523                         ripdata = wipdata;
1524                 }
1525                 error = 0;
1526                 goto done;
1527         }
1528
1529         /*
1530          * Cluster is the real inode.  The originating directory is locked
1531          * by the caller so we can manipulate it without worrying about races
1532          * against other lookups.
1533          *
1534          * If cluster is visible we need to delete it from the current
1535          * location and create a hardlink pointer in its place.  If it is
1536          * not visible we need only delete it.  Then later cluster will be
1537          * renamed to a parent directory and converted (if necessary) to
1538          * a hidden inode (via shiftup).
1539          *
1540          * NOTE! We must hold cparent locked through the delete/create/rename
1541          *       operation to ensure that other threads block resolving to
1542          *       the same hardlink, otherwise the other threads may not see
1543          *       the hardlink.
1544          */
1545         KKASSERT((cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0);
1546         cparent = hammer2_cluster_parent(cluster);
1547
1548         hammer2_cluster_delete(trans, cparent, cluster, 0);
1549
1550         ripdata = &hammer2_cluster_data(cluster)->ipdata;
1551         KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
1552         if (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) {
1553                 hammer2_cluster_t *ncluster;
1554                 hammer2_key_t lhc;
1555
1556                 ncluster = NULL;
1557                 lhc = cluster->focus->bref.key;
1558                 error = hammer2_cluster_create(trans, cparent, &ncluster,
1559                                              lhc, 0,
1560                                              HAMMER2_BREF_TYPE_INODE,
1561                                              HAMMER2_INODE_BYTES);
1562                 hammer2_cluster_modify(trans, ncluster, 0);
1563                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1564
1565                 wipdata->comp_algo = ripdata->comp_algo;
1566                 wipdata->version = HAMMER2_INODE_VERSION_ONE;
1567                 wipdata->inum = ripdata->inum;
1568                 wipdata->target_type = ripdata->type;
1569                 wipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1570                 wipdata->uflags = 0;
1571                 wipdata->rmajor = 0;
1572                 wipdata->rminor = 0;
1573                 wipdata->ctime = 0;
1574                 wipdata->mtime = 0;
1575                 wipdata->atime = 0;
1576                 wipdata->btime = 0;
1577                 bzero(&wipdata->uid, sizeof(wipdata->uid));
1578                 bzero(&wipdata->gid, sizeof(wipdata->gid));
1579                 wipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1580                 wipdata->cap_flags = 0;
1581                 wipdata->mode = 0;
1582                 wipdata->size = 0;
1583                 wipdata->nlinks = 1;
1584                 wipdata->iparent = 0;   /* XXX */
1585                 wipdata->pfs_type = 0;
1586                 wipdata->pfs_inum = 0;
1587                 bzero(&wipdata->pfs_clid, sizeof(wipdata->pfs_clid));
1588                 bzero(&wipdata->pfs_fsid, sizeof(wipdata->pfs_fsid));
1589                 wipdata->data_quota = 0;
1590                 wipdata->data_count = 0;
1591                 wipdata->inode_quota = 0;
1592                 wipdata->inode_count = 0;
1593                 wipdata->attr_tid = 0;
1594                 wipdata->dirent_tid = 0;
1595                 bzero(&wipdata->u, sizeof(wipdata->u));
1596                 bcopy(ripdata->filename, wipdata->filename, ripdata->name_len);
1597                 wipdata->name_key = ncluster->focus->bref.key;
1598                 wipdata->name_len = ripdata->name_len;
1599                 /* XXX transaction ids */
1600                 hammer2_cluster_modsync(ncluster);
1601                 hammer2_cluster_unlock(ncluster);
1602         }
1603         ripdata = wipdata;
1604
1605         /*
1606          * cluster represents the hardlink target and is now flagged deleted.
1607          * duplicate it to the parent directory and adjust nlinks.
1608          *
1609          * WARNING! The shiftup() call can cause ncluster to be moved into
1610          *          an indirect block, and our ncluster will wind up pointing
1611          *          to the older/original version.
1612          */
1613         KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_DELETED);
1614         hammer2_hardlink_shiftup(trans, cluster, cdip, cdcluster,
1615                                  nlinks, &error);
1616
1617         if (error == 0)
1618                 hammer2_inode_repoint(ip, cdip, cluster);
1619
1620 done:
1621         /*
1622          * Cleanup, cluster/ncluster already dealt with.
1623          *
1624          * Return the shifted cluster in *clusterp.
1625          */
1626         if (cparent)
1627                 hammer2_cluster_unlock(cparent);
1628         *clusterp = cluster;
1629         hammer2_inode_drop(cdip);
1630
1631         return (error);
1632 }
1633
1634 /*
1635  * If (*ochainp) is non-NULL it points to the forward OBJTYPE_HARDLINK
1636  * inode while (*chainp) points to the resolved (hidden hardlink
1637  * target) inode.  In this situation when nlinks is 1 we wish to
1638  * deconsolidate the hardlink, moving it back to the directory that now
1639  * represents the only remaining link.
1640  */
1641 int
1642 hammer2_hardlink_deconsolidate(hammer2_trans_t *trans,
1643                                hammer2_inode_t *dip,
1644                                hammer2_chain_t **chainp,
1645                                hammer2_chain_t **ochainp)
1646 {
1647         if (*ochainp == NULL)
1648                 return (0);
1649         /* XXX */
1650         return (0);
1651 }
1652
1653 /*
1654  * The caller presents a locked cluster with an obj_type of
1655  * HAMMER2_OBJTYPE_HARDLINK.  This routine will replace the cluster with
1656  * the target hardlink (which typically exists in some parent directory as
1657  * a hidden file).  If cparentp is not NULL a locked cluster representing
1658  * the hardlink's parent is also returned.
1659  *
1660  * If no match is found EIO is returned, *cparentp will be set to NULL,
1661  * and the cluster will be unlocked and eaten up.
1662  */
1663 int
1664 hammer2_hardlink_find(hammer2_inode_t *dip,
1665                       hammer2_cluster_t **cparentp, hammer2_cluster_t *cluster)
1666 {
1667         const hammer2_inode_data_t *ipdata;
1668         hammer2_cluster_t *cparent;
1669         hammer2_cluster_t *rcluster;
1670         hammer2_inode_t *ip;
1671         hammer2_inode_t *pip;
1672         hammer2_key_t key_dummy;
1673         hammer2_key_t lhc;
1674         int ddflag;
1675
1676         pip = dip;
1677         hammer2_inode_ref(pip);         /* for loop */
1678
1679         /*
1680          * Locate the hardlink.  pip is referenced and not locked.
1681          */
1682         ipdata = &hammer2_cluster_data(cluster)->ipdata;
1683         lhc = ipdata->inum;
1684
1685         /*
1686          * We don't need the cluster's chains, but we need to retain the
1687          * cluster structure itself so we can load the hardlink search
1688          * result into it.
1689          */
1690         KKASSERT(cluster->refs == 1);
1691         atomic_add_int(&cluster->refs, 1);
1692         hammer2_cluster_unlock(cluster);        /* hack */
1693         cluster->nchains = 0;                   /* hack */
1694
1695         rcluster = NULL;
1696         cparent = NULL;
1697
1698         while ((ip = pip) != NULL) {
1699                 cparent = hammer2_inode_lock_ex(ip);
1700                 hammer2_inode_drop(ip);                 /* loop */
1701                 KKASSERT(hammer2_cluster_type(cparent) ==
1702                          HAMMER2_BREF_TYPE_INODE);
1703                 rcluster = hammer2_cluster_lookup(cparent, &key_dummy,
1704                                              lhc, lhc, 0, &ddflag);
1705                 if (rcluster)
1706                         break;
1707                 hammer2_cluster_lookup_done(cparent);   /* discard parent */
1708                 pip = ip->pip;          /* safe, ip held locked */
1709                 if (pip)
1710                         hammer2_inode_ref(pip);         /* loop */
1711                 hammer2_inode_unlock_ex(ip, NULL);
1712         }
1713
1714         /*
1715          * chain is locked, ip is locked.  Unlock ip, return the locked
1716          * chain.  *ipp is already set w/a ref count and not locked.
1717          *
1718          * (cparent is already unlocked).
1719          */
1720         if (rcluster) {
1721                 hammer2_cluster_replace(cluster, rcluster);
1722                 hammer2_cluster_drop(rcluster);
1723                 if (cparentp)
1724                         *cparentp = cparent;
1725                 else
1726                         hammer2_inode_unlock_ex(ip, cparent);
1727                 return (0);
1728         } else {
1729                 if (cparentp)
1730                         *cparentp = NULL;
1731                 if (ip)
1732                         hammer2_inode_unlock_ex(ip, cparent);
1733                 return (EIO);
1734         }
1735 }
1736
1737 /*
1738  * Find the directory common to both fdip and tdip.
1739  *
1740  * Returns a held but not locked inode.  Caller typically locks the inode,
1741  * and when through unlocks AND drops it.
1742  */
1743 hammer2_inode_t *
1744 hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip)
1745 {
1746         hammer2_inode_t *scan1;
1747         hammer2_inode_t *scan2;
1748
1749         /*
1750          * We used to have a depth field but it complicated matters too
1751          * much for directory renames.  So now its ugly.  Check for
1752          * simple cases before giving up and doing it the expensive way.
1753          *
1754          * XXX need a bottom-up topology stability lock
1755          */
1756         if (fdip == tdip || fdip == tdip->pip) {
1757                 hammer2_inode_ref(fdip);
1758                 return(fdip);
1759         }
1760         if (fdip->pip == tdip) {
1761                 hammer2_inode_ref(tdip);
1762                 return(tdip);
1763         }
1764
1765         /*
1766          * XXX not MPSAFE
1767          */
1768         for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) {
1769                 scan2 = tdip;
1770                 while (scan2->pmp == tdip->pmp) {
1771                         if (scan1 == scan2) {
1772                                 hammer2_inode_ref(scan1);
1773                                 return(scan1);
1774                         }
1775                         scan2 = scan2->pip;
1776                         if (scan2 == NULL)
1777                                 break;
1778                 }
1779         }
1780         panic("hammer2_inode_common_parent: no common parent %p %p\n",
1781               fdip, tdip);
1782         /* NOT REACHED */
1783         return(NULL);
1784 }
1785
1786 /*
1787  * Synchronize the inode's frontend state with the chain state prior
1788  * to any explicit flush of the inode or any strategy write call.
1789  *
1790  * Called with a locked inode.
1791  */
1792 void
1793 hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip, 
1794                     hammer2_cluster_t *cparent)
1795 {
1796         const hammer2_inode_data_t *ripdata;
1797         hammer2_inode_data_t *wipdata;
1798         hammer2_cluster_t *dparent;
1799         hammer2_cluster_t *cluster;
1800         hammer2_key_t lbase;
1801         hammer2_key_t key_next;
1802         int dosync = 0;
1803         int ddflag;
1804
1805         ripdata = &hammer2_cluster_data(cparent)->ipdata;    /* target file */
1806
1807         if (ip->flags & HAMMER2_INODE_MTIME) {
1808                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1809                 atomic_clear_int(&ip->flags, HAMMER2_INODE_MTIME);
1810                 wipdata->mtime = ip->mtime;
1811                 dosync = 1;
1812                 ripdata = wipdata;
1813         }
1814         if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size < ripdata->size) {
1815                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1816                 wipdata->size = ip->size;
1817                 dosync = 1;
1818                 ripdata = wipdata;
1819                 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
1820
1821                 /*
1822                  * We must delete any chains beyond the EOF.  The chain
1823                  * straddling the EOF will be pending in the bioq.
1824                  */
1825                 lbase = (ripdata->size + HAMMER2_PBUFMASK64) &
1826                         ~HAMMER2_PBUFMASK64;
1827                 dparent = hammer2_cluster_lookup_init(&ip->cluster, 0);
1828                 cluster = hammer2_cluster_lookup(dparent, &key_next,
1829                                                  lbase, (hammer2_key_t)-1,
1830                                                  HAMMER2_LOOKUP_NODATA,
1831                                                  &ddflag);
1832                 while (cluster) {
1833                         /*
1834                          * Degenerate embedded case, nothing to loop on
1835                          */
1836                         switch (hammer2_cluster_type(cluster)) {
1837                         case HAMMER2_BREF_TYPE_INODE:
1838                                 hammer2_cluster_unlock(cluster);
1839                                 cluster = NULL;
1840                                 break;
1841                         case HAMMER2_BREF_TYPE_DATA:
1842                                 hammer2_cluster_delete(trans, dparent, cluster,
1843                                                    HAMMER2_DELETE_PERMANENT);
1844                                 /* fall through */
1845                         default:
1846                                 cluster = hammer2_cluster_next(dparent, cluster,
1847                                                    &key_next,
1848                                                    key_next, (hammer2_key_t)-1,
1849                                                    HAMMER2_LOOKUP_NODATA);
1850                                 break;
1851                         }
1852                 }
1853                 hammer2_cluster_lookup_done(dparent);
1854         } else
1855         if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size > ripdata->size) {
1856                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1857                 wipdata->size = ip->size;
1858                 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
1859
1860                 /*
1861                  * When resizing larger we may not have any direct-data
1862                  * available.
1863                  */
1864                 if ((wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
1865                     ip->size > HAMMER2_EMBEDDED_BYTES) {
1866                         wipdata->op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
1867                         bzero(&wipdata->u.blockset,
1868                               sizeof(wipdata->u.blockset));
1869                 }
1870                 dosync = 1;
1871                 ripdata = wipdata;
1872         }
1873         if (dosync)
1874                 hammer2_cluster_modsync(cparent);
1875 }