hammer2 - hardlink stabilization (3), data and inode count propagation.
authorMatthew Dillon <dillon@apollo.backplane.com>
Fri, 18 May 2012 01:41:51 +0000 (18:41 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Fri, 18 May 2012 01:41:51 +0000 (18:41 -0700)
* Files with cached chains have to be flushed before they can be copied
  to the hardlink target, because the original inode will become a
  OBJTYPE_HARDLINK pointer which isn't allowed to have any sub-chains
  under the inode.

* We also need to flush for the upcoming snapshot function to work properly
  or dirty in-memory data will not show up in the snapshot.

* Propagate the inode and byte use count up the chain.  Tie the inode count
  into df's inode count (per-PFS).  The byte count and quota fields are not
  yet tied in.

* Adjust stat[v]fs() to return filesystem space useage using the allocation
  iterator for now, to aid debugging.

* Adjust the allocation iterator to skip reserved areas at the beginning of
  each 2GB storage zone.

sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_chain.c
sys/vfs/hammer2/hammer2_freemap.c
sys/vfs/hammer2/hammer2_inode.c
sys/vfs/hammer2/hammer2_vfsops.c
sys/vfs/hammer2/hammer2_vnops.c

index 352b970..d0a6a0f 100644 (file)
@@ -125,6 +125,15 @@ typedef struct hammer2_chain hammer2_chain_t;
 int hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2);
 SPLAY_PROTOTYPE(hammer2_chain_splay, hammer2_chain, snode, hammer2_chain_cmp);
 
+/*
+ * MOVED - This bit is set during the flush when the MODIFIED bit is cleared,
+ *        indicating that the parent's blocktable must inherit a change to
+ *        the bref (typically a block reallocation)
+ *
+ *        It must also be set in situations where a chain is not MODIFIED
+ *        but whos bref has changed (typically due to fields other than
+ *        a block reallocation).
+ */
 #define HAMMER2_CHAIN_MODIFIED         0x00000001      /* active mods */
 #define HAMMER2_CHAIN_DIRTYEMBED       0x00000002      /* inode embedded */
 #define HAMMER2_CHAIN_DIRTYBP          0x00000004      /* dirty on unlock */
@@ -132,7 +141,7 @@ SPLAY_PROTOTYPE(hammer2_chain_splay, hammer2_chain, snode, hammer2_chain_cmp);
 #define HAMMER2_CHAIN_DELETED          0x00000010
 #define HAMMER2_CHAIN_INITIAL          0x00000020      /* initial create */
 #define HAMMER2_CHAIN_FLUSHED          0x00000040      /* flush on unlock */
-#define HAMMER2_CHAIN_MOVED            0x00000080      /* moved */
+#define HAMMER2_CHAIN_MOVED            0x00000080      /* bref changed */
 #define HAMMER2_CHAIN_IOFLUSH          0x00000100      /* bawrite on put */
 #define HAMMER2_CHAIN_DEFERRED         0x00000200      /* on a deferral list*/
 #define HAMMER2_CHAIN_DESTROYED                0x00000400      /* destroying */
@@ -220,6 +229,8 @@ struct hammer2_inode {
        struct hammer2_inode_data ip_data;
        struct lockf            advlock;
        u_int                   depth;          /* directory depth */
+       hammer2_off_t           delta_dcount;   /* adjust data_count */
+       hammer2_off_t           delta_icount;   /* adjust inode_count */
 };
 
 typedef struct hammer2_inode hammer2_inode_t;
@@ -402,7 +413,7 @@ int hammer2_chain_lock(hammer2_mount_t *hmp, hammer2_chain_t *chain, int how);
 void hammer2_chain_moved(hammer2_mount_t *hmp, hammer2_chain_t *chain);
 void hammer2_chain_modify(hammer2_mount_t *hmp, hammer2_chain_t *chain,
                                int flags);
-void hammer2_chain_resize(hammer2_mount_t *hmp, hammer2_chain_t *chain,
+void hammer2_chain_resize(hammer2_inode_t *ip, hammer2_chain_t *chain,
                                int nradix, int flags);
 void hammer2_chain_unlock(hammer2_mount_t *hmp, hammer2_chain_t *chain);
 hammer2_chain_t *hammer2_chain_find(hammer2_mount_t *hmp,
index a10e4fa..00b9ab1 100644 (file)
@@ -217,7 +217,10 @@ hammer2_chain_drop(hammer2_mount_t *hmp, hammer2_chain_t *chain)
                                lockmgr(&parent->lk, LK_EXCLUSIVE);
                        if (atomic_cmpset_int(&chain->refs, 1, 0)) {
                                /*
-                                * Succeeded, recurse and drop parent
+                                * Succeeded, recurse and drop parent.
+                                * These chain elements should be synchronized
+                                * so no delta data or inode count updates
+                                * should be needed.
                                 */
                                if (!(chain->flags & HAMMER2_CHAIN_DELETED)) {
                                        SPLAY_REMOVE(hammer2_chain_splay,
@@ -553,9 +556,10 @@ hammer2_chain_unlock(hammer2_mount_t *hmp, hammer2_chain_t *chain)
  * XXX flags currently ignored, uses chain->bp to detect data/no-data.
  */
 void
-hammer2_chain_resize(hammer2_mount_t *hmp, hammer2_chain_t *chain,
+hammer2_chain_resize(hammer2_inode_t *ip, hammer2_chain_t *chain,
                     int nradix, int flags)
 {
+       hammer2_mount_t *hmp = ip->hmp;
        struct buf *nbp;
        hammer2_off_t pbase;
        size_t obytes;
@@ -597,6 +601,7 @@ hammer2_chain_resize(hammer2_mount_t *hmp, hammer2_chain_t *chain,
        chain->bref.data_off = hammer2_freemap_alloc(hmp, chain->bref.type,
                                                     nbytes);
        chain->bytes = nbytes;
+       ip->delta_dcount += (ssize_t)(nbytes - obytes); /* XXX atomic */
 
        /*
         * The device buffer may be larger than the allocation size.
@@ -1417,6 +1422,8 @@ again:
         */
        switch(parent->bref.type) {
        case HAMMER2_BREF_TYPE_INODE:
+               KKASSERT((parent->u.ip->ip_data.op_flags &
+                         HAMMER2_OPFLAG_DIRECTDATA) == 0);
                KKASSERT(parent->data != NULL);
                base = &parent->data->ipdata.u.blockset.blockref[0];
                count = HAMMER2_SET_COUNT;
@@ -1494,7 +1501,9 @@ again:
        }
 
        /*
-        * Link the chain into its parent.
+        * Link the chain into its parent.  Later on we will have to set
+        * the MOVED bit in situations where we don't mark the new chain
+        * as being modified.
         */
        if (chain->parent != NULL)
                panic("hammer2: hammer2_chain_create: chain already connected");
@@ -1510,15 +1519,22 @@ again:
        /*
         * Additional linkage for inodes.  Reuse the parent pointer to
         * find the parent directory.
+        *
+        * Cumulative adjustments are inherited on [re]attach.
         */
        if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
                hammer2_chain_t *scan = parent;
+               hammer2_inode_t *ip = chain->u.ip;
+
                while (scan->bref.type == HAMMER2_BREF_TYPE_INDIRECT)
                        scan = scan->parent;
                if (scan->bref.type == HAMMER2_BREF_TYPE_INODE) {
-                       chain->u.ip->pip = scan->u.ip;
-                       chain->u.ip->pmp = scan->u.ip->pmp;
-                       chain->u.ip->depth = scan->u.ip->depth + 1;
+                       ip->pip = scan->u.ip;
+                       ip->pmp = scan->u.ip->pmp;
+                       ip->depth = scan->u.ip->depth + 1;
+                       ip->delta_icount += ip->ip_data.inode_count;
+                       ip->delta_dcount += ip->ip_data.data_count;
+                       ++ip->pip->delta_icount;
                }
        }
 
@@ -1853,13 +1869,13 @@ hammer2_chain_create_indirect(hammer2_mount_t *hmp, hammer2_chain_t *parent,
 
                /*
                 * Load the new indirect block by acquiring or allocating
-                * the related chain entries, then simply move it to the
+                * the related chain entries, then simply move them to the
                 * new parent (ichain).
                 *
-                * Flagging the new chain entry MOVED will cause a flush
-                * to synchronize its block into the new indirect block.
-                * The chain is unlocked after being moved but needs to
-                * retain a reference for the MOVED state
+                * When adjusting the parent/child relationship we must
+                * set the MOVED bit if we do not otherwise set the
+                * MODIFIED bit, and call setsubmod() to ensure that the
+                * parent sees the bref adjustment.
                 *
                 * We must still set SUBMODIFIED in the parent but we do
                 * that after the loop.
@@ -1890,6 +1906,10 @@ hammer2_chain_create_indirect(hammer2_mount_t *hmp, hammer2_chain_t *parent,
         * Insert the new indirect block into the parent now that we've
         * cleared out some entries in the parent.  We calculated a good
         * insertion index in the loop above (ichain->index).
+        *
+        * We don't have to set MOVED here because we mark ichain modified
+        * down below (so the normal modified -> flush -> set-moved sequence
+        * applies).
         */
        KKASSERT(ichain->index >= 0);
        if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, ichain))
@@ -1957,6 +1977,7 @@ hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t *parent,
                     hammer2_chain_t *chain)
 {
        hammer2_blockref_t *base;
+       hammer2_inode_t *ip;
        int count;
 
        if (chain->parent != parent)
@@ -2011,10 +2032,24 @@ hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t *parent,
        chain->parent = NULL;
 
        /*
-        * If this is an inode clear the pip.
+        * Cumulative adjustments must be propagated to the parent inode
+        * when deleting and synchronized to ip.  A future reattachment
+        * (e.g. during a rename) expects only to use ip_data.*_count.
+        *
+        * Clear the pointer to the parent inode.
         */
        if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
-               chain->u.ip->pip = NULL;
+               ip = chain->u.ip;
+               if (ip->pip) {
+                       ip->pip->delta_icount += ip->delta_icount;
+                       ip->pip->delta_dcount += ip->delta_dcount;
+                       ip->ip_data.inode_count += ip->delta_icount;
+                       ip->ip_data.data_count += ip->delta_dcount;
+                       ip->delta_icount = 0;
+                       ip->delta_dcount = 0;
+                       --ip->pip->delta_icount;
+                       ip->pip = NULL;
+               }
                chain->u.ip->depth = 0;
        }
 
@@ -2221,6 +2256,8 @@ hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *chain,
 
                        switch(chain->bref.type) {
                        case HAMMER2_BREF_TYPE_INODE:
+                               KKASSERT((chain->data->ipdata.op_flags &
+                                         HAMMER2_OPFLAG_DIRECTDATA) == 0);
                                base = &chain->data->ipdata.u.blockset.
                                        blockref[0];
                                count = HAMMER2_SET_COUNT;
@@ -2255,6 +2292,7 @@ hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *chain,
                                         child->index < count);
                                hammer2_chain_lock(hmp, child,
                                                   HAMMER2_RESOLVE_NEVER);
+                               KKASSERT(child->parent == chain);
                                if (child->flags & HAMMER2_CHAIN_MOVED) {
                                        base[child->index] = child->bref;
                                        if (chain->bref.mirror_tid <
@@ -2275,6 +2313,8 @@ hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *chain,
                                } else if (bcmp(&base[child->index],
                                           &child->bref,
                                           sizeof(child->bref)) != 0) {
+                                       kprintf("child %p index %d\n",
+                                               child, child->index);
                                        panic("hammer2: unflagged bref update");
                                }
                                hammer2_chain_unlock(hmp, child);
@@ -2327,6 +2367,24 @@ hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *chain,
        }
 
        /*
+        * Synchronize cumulative data and inode count adjustments to
+        * the inode and propagate the deltas upward to the parent.
+        */
+       if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
+               hammer2_inode_t *ip;
+
+               ip = chain->u.ip;
+               ip->ip_data.inode_count += ip->delta_icount;
+               ip->ip_data.data_count += ip->delta_dcount;
+               if (ip->pip) {
+                       ip->pip->delta_icount += ip->delta_icount;
+                       ip->pip->delta_dcount += ip->delta_dcount;
+               }
+               ip->delta_icount = 0;
+               ip->delta_dcount = 0;
+       }
+
+       /*
         * Clear MODIFIED and set HAMMER2_CHAIN_MOVED.  The caller
         * will re-test the MOVED bit.  We must also update the mirror_tid
         * and modify_tid fields as appropriate.
@@ -2496,7 +2554,9 @@ hammer2_chain_flush_pass2(hammer2_mount_t *hmp, hammer2_chain_t *chain)
 
 /*
  * Stand-alone flush.  If the chain is unable to completely flush we have
- * to be sure that SUBMODIFIED propagates up the parent chain.
+ * to be sure that SUBMODIFIED propagates up the parent chain.  We must not
+ * clear the MOVED bit after flushing in this situation or our desynchronized
+ * bref will not properly update in the parent.
  *
  * This routine can be called from several places but the most important
  * is from the hammer2_vop_reclaim() function.  We want to try to completely
@@ -2661,14 +2721,14 @@ hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain,
 
        /*
         * Update the blockref in the parent.  We do not have to set
-        * MOVED in the parent because SUBMODIFIED has already been
-        * set, so a normal flush will pick up the changes and propagate
-        * them upward for us.
+        * MOVED in the parent because the parent has been marked modified,
+        * so the flush sequence will pick up the bref change.
         *
         * We do have to propagate mirror_tid upward.
         */
        KKASSERT(chain->index >= 0 &&
                 chain->index < count);
+       KKASSERT(chain->parent == parent);
        if (chain->flags & HAMMER2_CHAIN_MOVED) {
                base[chain->index] = chain->bref;
                if (parent->bref.mirror_tid < chain->bref.mirror_tid)
index e39b274..242f7c4 100644 (file)
@@ -95,9 +95,15 @@ hammer2_freemap_alloc(hammer2_mount_t *hmp, int type, size_t bytes)
                /*
                 * Allocate from the allocation iterator using a SEGSIZE
                 * aligned block and reload the packing cache if possible.
+                *
+                * Skip reserved areas at the beginning of each zone.
                 */
                data_off = hmp->voldata.allocator_beg;
                data_off = (data_off + HAMMER2_SEGMASK64) & ~HAMMER2_SEGMASK64;
+               if ((data_off & HAMMER2_ZONE_MASK64) < HAMMER2_ZONE_SEG) {
+                       KKASSERT((data_off & HAMMER2_ZONE_MASK64) == 0);
+                       data_off += HAMMER2_ZONE_SEG64;
+               }
                data_next = data_off + bytes;
 
                if ((data_next & HAMMER2_SEGMASK) == 0) {
index aa5a387..8ae794d 100644 (file)
@@ -335,8 +335,9 @@ hammer2_inode_duplicate(hammer2_inode_t *dip, hammer2_inode_t *oip,
 {
        hammer2_mount_t *hmp = dip->hmp;
        hammer2_inode_t *nip;
-       hammer2_chain_t *chain;
        hammer2_chain_t *parent;
+       hammer2_chain_t *chain;
+       hammer2_chain_t *scan;
        hammer2_key_t lhc;
        int error;
 
@@ -393,6 +394,29 @@ hammer2_inode_duplicate(hammer2_inode_t *dip, hammer2_inode_t *oip,
        hammer2_chain_modify(hmp, chain, 0);
        nip->ip_data = oip->ip_data;
 
+       /*
+        * XXX This is currently a horrible hack.  Well, if we wanted to
+        *     duplicate a file, i.e. as in a snapshot, we definitely
+        *     would have to flush it first.
+        *
+        *     For hardlink target generation we can theoretically move any
+        *     active chain structures without flushing, but that gets really
+        *     iffy for code which follows chain->parent and ip->pip links.
+        *
+        * XXX only works with files.  Duplicating a directory hierarchy
+        *     requires a flush but doesn't deal with races post-flush.
+        *     Well, it would work I guess, but you might catch some files
+        *     mid-operation.
+        *
+        * We cannot leave oip with any in-memory chains because (for a
+        * hardlink), oip will become a OBJTYPE_HARDLINK which is just a
+        * pointer to the real hardlink's inum and can't have any sub-chains.
+        */
+       hammer2_inode_lock_ex(oip);
+       hammer2_chain_flush(hmp, &oip->chain, 0);
+       hammer2_inode_unlock_ex(oip);
+       KKASSERT(SPLAY_EMPTY(&oip->chain.shead));
+
        if (name) {
                /*
                 * Directory entries are inodes so if the name has changed
@@ -408,7 +432,6 @@ hammer2_inode_duplicate(hammer2_inode_t *dip, hammer2_inode_t *oip,
                 * target.  The name isn't used but to ease debugging give it
                 * a name after its inode number.
                 */
-               nip->ip_data = oip->ip_data;
                ksnprintf(nip->ip_data.filename, sizeof(nip->ip_data.filename),
                          "0x%016jx", (intmax_t)nip->ip_data.inum);
                nip->ip_data.name_len = strlen(nip->ip_data.filename);
@@ -805,6 +828,10 @@ hammer2_hardlink_consolidate(hammer2_inode_t **ipp, hammer2_inode_t *tdip)
        /*
         * Create a hidden inode directory entry in the parent, copying
         * (*oip)'s state.  Then replace oip with OBJTYPE_HARDLINK.
+        *
+        * The duplication function will either flush or move any chains
+        * under oip to the new hardlink target inode, retiring all chains
+        * related to oip before returning.  XXX vp->ip races.
         */
        error = hammer2_inode_duplicate(fdip, oip, &nip, NULL, 0);
        if (error == 0) {
index 81b15d0..7c6bf97 100644 (file)
@@ -621,16 +621,27 @@ hammer2_vfs_root(struct mount *mp, struct vnode **vpp)
        return (error);
 }
 
+/*
+ * Filesystem status
+ *
+ * XXX incorporate pmp->iroot->ip_data.inode_quota and data_quota
+ */
 static
 int
 hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
 {
+       hammer2_pfsmount_t *pmp;
        hammer2_mount_t *hmp;
 
+       pmp = MPTOPMP(mp);
        hmp = MPTOHMP(mp);
 
-       mp->mnt_stat.f_files = 10;
-       mp->mnt_stat.f_bfree = 10;
+       mp->mnt_stat.f_files = pmp->iroot->ip_data.inode_count +
+                              pmp->iroot->delta_icount;
+       mp->mnt_stat.f_ffree = 0;
+       mp->mnt_stat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE;
+       mp->mnt_stat.f_bfree = (hmp->voldata.allocator_size -
+                               hmp->voldata.allocator_beg) / HAMMER2_PBUFSIZE;
        mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree;
 
        *sbp = mp->mnt_stat;
@@ -641,13 +652,20 @@ static
 int
 hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred)
 {
+       hammer2_pfsmount_t *pmp;
        hammer2_mount_t *hmp;
 
+       pmp = MPTOPMP(mp);
        hmp = MPTOHMP(mp);
 
        mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE;
-       mp->mnt_vstat.f_files = 0;
-       mp->mnt_vstat.f_bavail = mp->mnt_stat.f_bfree;
+       mp->mnt_vstat.f_files = pmp->iroot->ip_data.inode_count +
+                               pmp->iroot->delta_icount;
+       mp->mnt_vstat.f_ffree = 0;
+       mp->mnt_vstat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE;
+       mp->mnt_vstat.f_bfree = (hmp->voldata.allocator_size -
+                                hmp->voldata.allocator_beg) / HAMMER2_PBUFSIZE;
+       mp->mnt_vstat.f_bavail = mp->mnt_vstat.f_bfree;
 
        *sbp = mp->mnt_vstat;
        return (0);
index b0066ac..307b130 100644 (file)
@@ -908,6 +908,8 @@ hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
  *
  * NOOFFSET is returned if the data is inode-embedded.  In this case the
  * strategy code will simply bcopy() the data into the inode.
+ *
+ * The inode's delta_dcount is adjusted.
  */
 static
 hammer2_off_t
@@ -947,6 +949,7 @@ hammer2_assign_physical(hammer2_inode_t *ip, hammer2_key_t lbase,
                                             HAMMER2_BREF_TYPE_DATA,
                                             lblksize);
                pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX;
+               ip->delta_dcount += lblksize;
        } else {
                switch (chain->bref.type) {
                case HAMMER2_BREF_TYPE_INODE:
@@ -1060,7 +1063,7 @@ hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
                        allocbuf(bp, nblksize);
                        switch(chain->bref.type) {
                        case HAMMER2_BREF_TYPE_DATA:
-                               hammer2_chain_resize(hmp, chain,
+                               hammer2_chain_resize(ip, chain,
                                             hammer2_bytes_to_radix(nblksize),
                                             HAMMER2_MODIFY_OPTDATA);
                                bzero(bp->b_data + loff, nblksize - loff);
@@ -1099,7 +1102,7 @@ hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
                if (chain) {
                        switch(chain->bref.type) {
                        case HAMMER2_BREF_TYPE_DATA:
-                               hammer2_chain_resize(hmp, chain,
+                               hammer2_chain_resize(ip, chain,
                                             hammer2_bytes_to_radix(nblksize),
                                             0);
                                hammer2_chain_modify(hmp, chain, 0);
@@ -1148,6 +1151,7 @@ hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
                 * Delete physical data blocks past the file EOF.
                 */
                if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
+                       ip->delta_dcount -= chain->bytes;
                        hammer2_chain_delete(hmp, parent, chain);
                }
                /* XXX check parent if empty indirect block & delete */
@@ -1266,9 +1270,10 @@ hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
                                                     obase, nblksize,
                                                     HAMMER2_BREF_TYPE_DATA,
                                                     nblksize);
+                       ip->delta_dcount += nblksize;
                } else {
                        KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA);
-                       hammer2_chain_resize(hmp, chain, nradix,
+                       hammer2_chain_resize(ip, chain, nradix,
                                             HAMMER2_MODIFY_OPTDATA);
                }
                bp->b_bio2.bio_offset = chain->bref.data_off &
@@ -1611,6 +1616,8 @@ hammer2_vop_nlink(struct vop_nlink_args *ap)
        /*
         * If the consolidation changed ip to a HARDLINK pointer we have
         * to adjust the vnode to point to the actual ip.
+        *
+        * XXX this can race against concurrent vnode ops.
         */
        if (oip != ip) {
                hammer2_chain_ref(hmp, &ip->chain);