hammer2 - Implement meta-data statistics rollup
authorMatthew Dillon <dillon@apollo.backplane.com>
Fri, 1 Aug 2014 00:22:04 +0000 (17:22 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Fri, 1 Aug 2014 00:22:04 +0000 (17:22 -0700)
* HAMMER2 keeps total recursive data and inode count statistics in each
  inode.  This means that one can determine how much storage is being
  used for an entire subdirectory tree simply by doing a 'hammer2 stat <dir>'.

* Implement this by storing temporary rollup adjustments in the hammer2_chain
  structure, then synchronizing those adjustments on insertions, deletions,
  and flushes.

  Generally speaking, the chain structure has a data_count, inode_count,
  data_count_up, and inode_count_up for temporary tracking.  The main count
  fields are applied to the current chain AND the parent, while the *_up
  fields are only applied to the parent.

  For example, when an inode is inserted its stored statistics must be
  applied to the parent (recursively), but not to itself.

* Preliminary implementation.

sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_chain.c
sys/vfs/hammer2/hammer2_cluster.c
sys/vfs/hammer2/hammer2_flush.c
sys/vfs/hammer2/hammer2_freemap.c
sys/vfs/hammer2/hammer2_inode.c
sys/vfs/hammer2/hammer2_vfsops.c

index ba0d86f..86a7542 100644 (file)
@@ -241,6 +241,8 @@ struct hammer2_chain {
        hammer2_xid_t   flush_xid;              /* flush sequencing */
        hammer2_key_t   data_count;             /* delta's to apply */
        hammer2_key_t   inode_count;            /* delta's to apply */
+       hammer2_key_t   data_count_up;          /* delta's to apply */
+       hammer2_key_t   inode_count_up;         /* delta's to apply */
        hammer2_io_t    *dio;                   /* physical data buffer */
        u_int           bytes;                  /* physical data size */
        u_int           flags;
@@ -338,6 +340,9 @@ RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp);
  * Flags passed to hammer2_chain_delete()
  */
 #define HAMMER2_DELETE_PERMANENT       0x0001
+#define HAMMER2_DELETE_NOSTATS         0x0002
+
+#define HAMMER2_INSERT_NOSTATS         0x0002
 
 /*
  * Flags passed to hammer2_chain_delete_duplicate()
@@ -861,10 +866,10 @@ int hammer2_chain_create(hammer2_trans_t *trans, hammer2_chain_t **parentp,
                                hammer2_chain_t **chainp,
                                hammer2_pfsmount_t *pmp,
                                hammer2_key_t key, int keybits,
-                               int type, size_t bytes);
+                               int type, size_t bytes, int flags);
 void hammer2_chain_rename(hammer2_trans_t *trans, hammer2_blockref_t *bref,
                                hammer2_chain_t **parentp,
-                               hammer2_chain_t *chain);
+                               hammer2_chain_t *chain, int flags);
 int hammer2_chain_snapshot(hammer2_trans_t *trans, hammer2_chain_t **chainp,
                                hammer2_ioc_pfs_t *pfs);
 void hammer2_chain_delete(hammer2_trans_t *trans, hammer2_chain_t *parent,
@@ -1014,9 +1019,11 @@ hammer2_cluster_t *hammer2_cluster_scan(hammer2_cluster_t *cparent,
                        hammer2_cluster_t *cluster, int flags);
 int hammer2_cluster_create(hammer2_trans_t *trans, hammer2_cluster_t *cparent,
                        hammer2_cluster_t **clusterp,
-                       hammer2_key_t key, int keybits, int type, size_t bytes);
+                       hammer2_key_t key, int keybits,
+                       int type, size_t bytes, int flags);
 void hammer2_cluster_rename(hammer2_trans_t *trans, hammer2_blockref_t *bref,
-                       hammer2_cluster_t *cparent, hammer2_cluster_t *cluster);
+                       hammer2_cluster_t *cparent, hammer2_cluster_t *cluster,
+                       int flags);
 void hammer2_cluster_delete(hammer2_trans_t *trans, hammer2_cluster_t *pcluster,
                        hammer2_cluster_t *cluster, int flags);
 int hammer2_cluster_snapshot(hammer2_trans_t *trans,
index 40c2e01..b09ff5f 100644 (file)
@@ -1014,6 +1014,7 @@ hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip,
        nbytes = 1U << nradix;
        if (obytes == nbytes)
                return;
+       chain->data_count += (ssize_t)(nbytes - obytes);
 
        /*
         * Make sure the old data is instantiated so we can copy it.  If this
@@ -2156,7 +2157,8 @@ done:
 int
 hammer2_chain_create(hammer2_trans_t *trans, hammer2_chain_t **parentp,
                     hammer2_chain_t **chainp, hammer2_pfsmount_t *pmp,
-                    hammer2_key_t key, int keybits, int type, size_t bytes)
+                    hammer2_key_t key, int keybits, int type, size_t bytes,
+                    int flags)
 {
        hammer2_mount_t *hmp;
        hammer2_chain_t *chain;
@@ -2238,6 +2240,20 @@ hammer2_chain_create(hammer2_trans_t *trans, hammer2_chain_t **parentp,
                        atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL);
                        break;
                }
+
+               /*
+                * Set statistics for pending updates.  These will be
+                * synchronized by the flush code.
+                */
+               switch(type) {
+               case HAMMER2_BREF_TYPE_INODE:
+                       chain->inode_count = 1;
+                       break;
+               case HAMMER2_BREF_TYPE_DATA:
+               case HAMMER2_BREF_TYPE_INDIRECT:
+                       chain->data_count = chain->bytes;
+                       break;
+               }
        } else {
                /*
                 * We are reattaching a previously deleted chain, possibly
@@ -2387,6 +2403,14 @@ again:
                        hammer2_chain_ref(chain);
                        atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE);
                }
+               if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
+                   (flags & HAMMER2_INSERT_NOSTATS) == 0) {
+                       KKASSERT(chain->data);
+                       chain->inode_count_up +=
+                               chain->data->ipdata.inode_count;
+                       chain->data_count_up +=
+                               chain->data->ipdata.data_count;
+               }
        }
 
        /*
@@ -2433,7 +2457,8 @@ done:
  */
 void
 hammer2_chain_rename(hammer2_trans_t *trans, hammer2_blockref_t *bref,
-                    hammer2_chain_t **parentp, hammer2_chain_t *chain)
+                    hammer2_chain_t **parentp, hammer2_chain_t *chain,
+                    int flags)
 {
        hammer2_mount_t *hmp;
        hammer2_chain_t *parent;
@@ -2477,7 +2502,7 @@ hammer2_chain_rename(hammer2_trans_t *trans, hammer2_blockref_t *bref,
 
                hammer2_chain_create(trans, parentp, &chain, chain->pmp,
                                     bref->key, bref->keybits, bref->type,
-                                    chain->bytes);
+                                    chain->bytes, flags);
                KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE);
                hammer2_chain_setflush(trans, *parentp);
        }
@@ -2491,7 +2516,8 @@ hammer2_chain_rename(hammer2_trans_t *trans, hammer2_blockref_t *bref,
  */
 static void
 _hammer2_chain_delete_helper(hammer2_trans_t *trans,
-                            hammer2_chain_t *parent, hammer2_chain_t *chain)
+                            hammer2_chain_t *parent, hammer2_chain_t *chain,
+                            int flags)
 {
        hammer2_mount_t *hmp;
 
@@ -2569,7 +2595,29 @@ _hammer2_chain_delete_helper(hammer2_trans_t *trans,
                              "unrecognized blockref type: %d",
                              parent->bref.type);
                }
+
+               /*
+                * delete blockmapped chain from its parent.
+                *
+                * The parent is not affected by any statistics in chain
+                * which are pending synchronization.  That is, there is
+                * nothing to undo in the parent since they have not yet
+                * been incorporated into the parent.
+                *
+                * The parent is affected by statistics stored in inodes.
+                * Those have already been synchronized, so they must be
+                * undone.  XXX split update possible w/delete in middle?
+                */
                if (base) {
+                       if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
+                           (flags & HAMMER2_DELETE_NOSTATS) == 0) {
+                               KKASSERT(chain->data != NULL);
+                               parent->data_count -=
+                                       chain->data->ipdata.data_count;
+                               parent->inode_count -=
+                                       chain->data->ipdata.inode_count;
+                       }
+
                        int cache_index = -1;
                        hammer2_base_delete(trans, parent, base, count,
                                            &cache_index, chain);
@@ -2580,8 +2628,20 @@ _hammer2_chain_delete_helper(hammer2_trans_t *trans,
                 * Chain is not blockmapped but a parent is present.
                 * Atomically remove the chain from the parent.  There is
                 * no blockmap entry to remove.
+                *
+                * Because chain was associated with a parent but not
+                * synchronized, the chain's *_count_up fields contain
+                * inode adjustment statistics which must be undone.
                 */
                spin_lock(&parent->core.cst.spin);
+               if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
+                   (flags & HAMMER2_DELETE_NOSTATS) == 0) {
+                       KKASSERT(chain->data != NULL);
+                       chain->data_count_up -=
+                               chain->data->ipdata.data_count;
+                       chain->inode_count_up -=
+                               chain->data->ipdata.inode_count;
+               }
                atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED);
                atomic_add_int(&parent->core.live_count, -1);
                ++parent->core.generation;
@@ -2908,12 +2968,15 @@ hammer2_chain_create_indirect(hammer2_trans_t *trans, hammer2_chain_t *parent,
                /*
                 * Shift the chain to the indirect block.
                 *
-                * WARNING! Can cause held-over chains to require a refactor.
-                *          Fortunately we have none (our locked chains are
-                *          passed into and modified by the call).
+                * WARNING! No reason for us to load chain data, pass NOSTATS
+                *          to prevent delete/insert from trying to access
+                *          inode stats (and thus asserting if there is no
+                *          chain->data loaded).
                 */
-               hammer2_chain_delete(trans, parent, chain, 0);
-               hammer2_chain_rename(trans, NULL, &ichain, chain);
+               hammer2_chain_delete(trans, parent, chain,
+                                    HAMMER2_DELETE_NOSTATS);
+               hammer2_chain_rename(trans, NULL, &ichain, chain,
+                                    HAMMER2_INSERT_NOSTATS);
                hammer2_chain_unlock(chain);
                KKASSERT(parent->refs > 0);
                chain = NULL;
@@ -3281,7 +3344,7 @@ hammer2_chain_delete(hammer2_trans_t *trans, hammer2_chain_t *parent,
        if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) {
                KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 &&
                         chain->parent == parent);
-               _hammer2_chain_delete_helper(trans, parent, chain);
+               _hammer2_chain_delete_helper(trans, parent, chain, flags);
        }
 
        if (flags & HAMMER2_DELETE_PERMANENT) {
index 5def0aa..591de55 100644 (file)
@@ -822,7 +822,8 @@ hammer2_cluster_scan(hammer2_cluster_t *cparent, hammer2_cluster_t *cluster,
 int
 hammer2_cluster_create(hammer2_trans_t *trans, hammer2_cluster_t *cparent,
                     hammer2_cluster_t **clusterp,
-                    hammer2_key_t key, int keybits, int type, size_t bytes)
+                    hammer2_key_t key, int keybits,
+                    int type, size_t bytes, int flags)
 {
        hammer2_cluster_t *cluster;
        hammer2_pfsmount_t *pmp;
@@ -853,7 +854,8 @@ hammer2_cluster_create(hammer2_trans_t *trans, hammer2_cluster_t *cparent,
                }
                error = hammer2_chain_create(trans, &cparent->array[i],
                                             &cluster->array[i], pmp,
-                                            key, keybits, type, bytes);
+                                            key, keybits,
+                                            type, bytes, flags);
                KKASSERT(error == 0);
                if (cparent->focus == NULL)
                        cparent->focus = cparent->array[i];
@@ -876,7 +878,8 @@ hammer2_cluster_create(hammer2_trans_t *trans, hammer2_cluster_t *cparent,
  */
 void
 hammer2_cluster_rename(hammer2_trans_t *trans, hammer2_blockref_t *bref,
-                      hammer2_cluster_t *cparent, hammer2_cluster_t *cluster)
+                      hammer2_cluster_t *cparent, hammer2_cluster_t *cluster,
+                      int flags)
 {
        hammer2_chain_t *chain;
        hammer2_blockref_t xbref;
@@ -894,11 +897,11 @@ hammer2_cluster_rename(hammer2_trans_t *trans, hammer2_blockref_t *bref,
                                xbref.keybits = bref->keybits;
                                hammer2_chain_rename(trans, &xbref,
                                                     &cparent->array[i],
-                                                    chain);
+                                                    chain, flags);
                        } else {
                                hammer2_chain_rename(trans, NULL,
                                                     &cparent->array[i],
-                                                    chain);
+                                                    chain, flags);
                        }
                        cluster->array[i] = chain;
                        if (cluster->focus == NULL)
index a47a42b..140150a 100644 (file)
@@ -746,6 +746,21 @@ again:
                                KKASSERT((chain->flags &
                                          HAMMER2_CHAIN_PFSROOT) == 0);
                        }
+
+                       /*
+                        * Update inode statistics.  Pending stats in chain
+                        * are cleared out on UPDATE so expect that bit to
+                        * be set here too or the statistics will not be
+                        * rolled-up properly.
+                        */
+                       {
+                               hammer2_inode_data_t *ipdata;
+
+                               KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE);
+                               ipdata = &chain->data->ipdata;
+                               ipdata->data_count += chain->data_count;
+                               ipdata->inode_count += chain->inode_count;
+                       }
                        KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0);
                        break;
                default:
@@ -871,21 +886,35 @@ again:
 
                /*
                 * Blocktable updates
+                *
+                * We synchronize pending statistics at this time.  Delta
+                * adjustments designated for the current and upper level
+                * are synchronized.
                 */
                if (base && (chain->flags & HAMMER2_CHAIN_BMAPUPD)) {
                        if (chain->flags & HAMMER2_CHAIN_BMAPPED) {
                                hammer2_base_delete(info->trans, parent,
                                                    base, count,
                                                    &info->cache_index, chain);
-                               hammer2_base_insert(info->trans, parent,
-                                                   base, count,
-                                                   &info->cache_index, chain);
+                               /* base_delete clears both bits */
+                       } else {
+                               atomic_clear_int(&chain->flags,
+                                                HAMMER2_CHAIN_BMAPUPD);
                        }
                }
                if (base && (chain->flags & HAMMER2_CHAIN_BMAPPED) == 0) {
+                       parent->data_count += chain->data_count +
+                                             chain->data_count_up;
+                       parent->inode_count += chain->inode_count +
+                                              chain->inode_count_up;
+                       chain->data_count = 0;
+                       chain->inode_count = 0;
+                       chain->data_count_up = 0;
+                       chain->inode_count_up = 0;
                        hammer2_base_insert(info->trans, parent,
                                            base, count,
                                            &info->cache_index, chain);
+                       /* base_insert sets BMAPPED */
                }
                hammer2_chain_unlock(parent);
        }
index 020b52a..633ce84 100644 (file)
@@ -346,7 +346,8 @@ hammer2_freemap_try_alloc(hammer2_trans_t *trans, hammer2_chain_t **parentp,
                error = hammer2_chain_create(trans, parentp, &chain, hmp->spmp,
                                     key, HAMMER2_FREEMAP_LEVEL1_RADIX,
                                     HAMMER2_BREF_TYPE_FREEMAP_LEAF,
-                                    HAMMER2_FREEMAP_LEVELN_PSIZE);
+                                    HAMMER2_FREEMAP_LEVELN_PSIZE,
+                                    0);
                KKASSERT(error == 0);
                if (error == 0) {
                        hammer2_chain_modify(trans, chain, 0);
@@ -807,7 +808,8 @@ hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_mount_t *hmp,
                error = hammer2_chain_create(trans, &parent, &chain, hmp->spmp,
                                     key, HAMMER2_FREEMAP_LEVEL1_RADIX,
                                     HAMMER2_BREF_TYPE_FREEMAP_LEAF,
-                                    HAMMER2_FREEMAP_LEVELN_PSIZE);
+                                    HAMMER2_FREEMAP_LEVELN_PSIZE,
+                                    0);
 
                if (hammer2_debug & 0x0040) {
                        kprintf("fixup create chain %p %016jx:%d\n",
index 71e7869..8a192cc 100644 (file)
@@ -646,7 +646,8 @@ retry:
                error = hammer2_cluster_create(trans, cparent, &cluster,
                                             lhc, 0,
                                             HAMMER2_BREF_TYPE_INODE,
-                                            HAMMER2_INODE_BYTES);
+                                            HAMMER2_INODE_BYTES,
+                                            0);
        }
 #if INODE_DEBUG
        kprintf("CREATE INODE %*.*s chain=%p\n",
@@ -841,7 +842,7 @@ hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_cluster_t *cluster,
        hammer2_cluster_bref(cluster, &bref);
        bref.key = lhc;                 /* invisible dir entry key */
        bref.keybits = 0;
-       hammer2_cluster_rename(trans, &bref, dcluster, cluster);
+       hammer2_cluster_rename(trans, &bref, dcluster, cluster, 0);
 
        /*
         * cluster is now 'live' again.. adjust the filename.
@@ -948,7 +949,8 @@ hammer2_inode_connect(hammer2_trans_t *trans,
                                                       dcluster, &ncluster,
                                                       lhc, 0,
                                                       HAMMER2_BREF_TYPE_INODE,
-                                                      HAMMER2_INODE_BYTES);
+                                                      HAMMER2_INODE_BYTES,
+                                                      0);
                } else {
                        /*
                         * Reconnect the original cluster under the new name.
@@ -966,7 +968,8 @@ hammer2_inode_connect(hammer2_trans_t *trans,
                                                       dcluster, &ncluster,
                                                       lhc, 0,
                                                       HAMMER2_BREF_TYPE_INODE,
-                                                      HAMMER2_INODE_BYTES);
+                                                      HAMMER2_INODE_BYTES,
+                                                      0);
                }
        }
 
@@ -1448,7 +1451,8 @@ hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp)
        error = hammer2_cluster_create(&trans, cparent, &cluster,
                                       HAMMER2_INODE_HIDDENDIR, 0,
                                       HAMMER2_BREF_TYPE_INODE,
-                                      HAMMER2_INODE_BYTES);
+                                      HAMMER2_INODE_BYTES,
+                                      0);
        hammer2_inode_unlock_ex(pmp->iroot, cparent);
 
        hammer2_cluster_modify(&trans, cluster, 0);
@@ -1589,7 +1593,8 @@ hammer2_hardlink_consolidate(hammer2_trans_t *trans,
                error = hammer2_cluster_create(trans, cparent, &ncluster,
                                             lhc, 0,
                                             HAMMER2_BREF_TYPE_INODE,
-                                            HAMMER2_INODE_BYTES);
+                                            HAMMER2_INODE_BYTES,
+                                            0);
                hammer2_cluster_modify(trans, ncluster, 0);
                wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
 
index cc10fd1..cc190d6 100644 (file)
@@ -1036,7 +1036,7 @@ retry:
                *errorp = hammer2_cluster_create(trans, dparent, &cluster,
                                               lbase, HAMMER2_PBUFRADIX,
                                               HAMMER2_BREF_TYPE_DATA,
-                                              pblksize);
+                                              pblksize, 0);
                if (cluster == NULL) {
                        hammer2_cluster_lookup_done(dparent);
                        panic("hammer2_cluster_create: par=%p error=%d\n",