HAMER VFS - Cache additional b-tree nodes in directory.
authorMatthew Dillon <dillon@apollo.backplane.com>
Sat, 20 Jun 2009 23:53:21 +0000 (16:53 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sat, 20 Jun 2009 23:53:21 +0000 (16:53 -0700)
* Cache inode record cursors in the directory at dip->cache[2] to reduce
  cpu overhead when iterating a directory.

* Cache file object record cursors in the file's governing directory at
  dip->cache[3] after reading the last data record in a file, to reduce
  cpu overhead when chain-reading files.

* Also add vfs.hammer.stats_btree_root_iterations, a counter which
  is incremented whenever a B-Tree search is initiated from the root
  node.

sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_cursor.c
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_vfsops.c
sys/vfs/hammer/hammer_vnops.c

index 3daea5b..70f2cbe 100644 (file)
@@ -297,7 +297,7 @@ struct hammer_inode {
        struct hammer_inode_data ino_data;      /* in-memory cache */
        struct hammer_rec_rb_tree rec_tree;     /* in-memory cache */
        int                     rec_generation;
-       struct hammer_node_cache cache[2];      /* search initiate cache */
+       struct hammer_node_cache cache[4];      /* search initiate cache */
 
        /*
         * When a demark is created to synchronize an inode to
@@ -826,6 +826,7 @@ extern int64_t hammer_stats_btree_deletes;
 extern int64_t hammer_stats_btree_elements;
 extern int64_t hammer_stats_btree_splits;
 extern int64_t hammer_stats_btree_iterations;
+extern int64_t hammer_stats_btree_root_iterations;
 extern int64_t hammer_stats_record_iterations;
 extern int64_t hammer_stats_file_read;
 extern int64_t hammer_stats_file_write;
@@ -863,6 +864,9 @@ struct hammer_inode *hammer_get_dummy_inode(hammer_transaction_t trans,
                        hammer_inode_t dip, int64_t obj_id,
                        hammer_tid_t asof, u_int32_t localization,
                        int flags, int *errorp);
+struct hammer_inode *hammer_find_inode(hammer_transaction_t trans,
+                       int64_t obj_id, hammer_tid_t asof,
+                       u_int32_t localization);
 void   hammer_scan_inode_snapshots(hammer_mount_t hmp,
                        hammer_inode_info_t iinfo,
                        int (*callback)(hammer_inode_t ip, void *data),
index 679eceb..02b5ef6 100644 (file)
@@ -82,8 +82,11 @@ hammer_init_cursor(hammer_transaction_t trans, hammer_cursor_t cursor,
                                node = NULL;
                        }
                }
+               if (node == NULL)
+                       ++hammer_stats_btree_root_iterations;
        } else {
                node = NULL;
+               ++hammer_stats_btree_root_iterations;
        }
 
        /*
index a123dff..5f99708 100644 (file)
@@ -351,6 +351,7 @@ hammer_get_inode(hammer_transaction_t trans, hammer_inode_t dip,
                 int flags, int *errorp)
 {
        hammer_mount_t hmp = trans->hmp;
+       struct hammer_node_cache *cachep;
        struct hammer_inode_info iinfo;
        struct hammer_cursor cursor;
        struct hammer_inode *ip;
@@ -400,6 +401,8 @@ loop:
        ip->flags = flags & HAMMER_INODE_RO;
        ip->cache[0].ip = ip;
        ip->cache[1].ip = ip;
+       ip->cache[2].ip = ip;
+       ip->cache[3].ip = ip;
        if (hmp->ronly)
                ip->flags |= HAMMER_INODE_RO;
        ip->sync_trunc_off = ip->trunc_off = ip->save_trunc_off =
@@ -413,9 +416,20 @@ loop:
         * access the current version of the root inode and (if it is not
         * a master) always access information under it with a snapshot
         * TID.
+        *
+        * We cache recent inode lookups in this directory in dip->cache[2].
+        * If we can't find it we assume the inode we are looking for is
+        * close to the directory inode.
         */
 retry:
-       hammer_init_cursor(trans, &cursor, (dip ? &dip->cache[0] : NULL), NULL);
+       cachep = NULL;
+       if (dip) {
+               if (dip->cache[2].node)
+                       cachep = &dip->cache[2];
+               else
+                       cachep = &dip->cache[0];
+       }
+       hammer_init_cursor(trans, &cursor, cachep, NULL);
        cursor.key_beg.localization = localization + HAMMER_LOCALIZE_INODE;
        cursor.key_beg.obj_id = ip->obj_id;
        cursor.key_beg.key = 0;
@@ -449,11 +463,21 @@ retry:
                 * The assumption is that it is near the directory inode.
                 *
                 * cache[1] tries to cache the location of the object data.
-                * The assumption is that it is near the directory data.
+                * We might have something in the governing directory from
+                * scan optimizations (see the strategy code in
+                * hammer_vnops.c).
+                *
+                * We update dip->cache[2], if possible, with the location
+                * of the object inode for future directory shortcuts.
                 */
                hammer_cache_node(&ip->cache[0], cursor.node);
-               if (dip && dip->cache[1].node)
-                       hammer_cache_node(&ip->cache[1], dip->cache[1].node);
+               if (dip) {
+                       if (dip->cache[3].node) {
+                               hammer_cache_node(&ip->cache[1],
+                                                 dip->cache[3].node);
+                       }
+                       hammer_cache_node(&dip->cache[2], cursor.node);
+               }
 
                /*
                 * The file should not contain any data past the file size
@@ -559,6 +583,8 @@ loop:
        ip->flags = flags | HAMMER_INODE_RO | HAMMER_INODE_DUMMY;
        ip->cache[0].ip = ip;
        ip->cache[1].ip = ip;
+       ip->cache[2].ip = ip;
+       ip->cache[3].ip = ip;
        ip->sync_trunc_off = ip->trunc_off = ip->save_trunc_off =
                0x7FFFFFFFFFFFFFFFLL;
        RB_INIT(&ip->rec_tree);
@@ -614,6 +640,33 @@ loop:
 }
 
 /*
+ * Return a referenced inode only if it is in our inode cache.
+ *
+ * Dummy inodes do not count.
+ */
+struct hammer_inode *
+hammer_find_inode(hammer_transaction_t trans, int64_t obj_id,
+                 hammer_tid_t asof, u_int32_t localization)
+{
+       hammer_mount_t hmp = trans->hmp;
+       struct hammer_inode_info iinfo;
+       struct hammer_inode *ip;
+
+       iinfo.obj_id = obj_id;
+       iinfo.obj_asof = asof;
+       iinfo.obj_localization = localization;
+loop:
+       ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo);
+       if (ip) {
+               if (ip->flags & HAMMER_INODE_DUMMY)
+                       ip = NULL;
+               else
+                       hammer_ref(&ip->lock);
+       }
+       return(ip);
+}
+
+/*
  * Create a new filesystem object, returning the inode in *ipp.  The
  * returned inode will be referenced.  The inode is created in-memory.
  *
@@ -655,6 +708,8 @@ hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
                    HAMMER_INODE_ATIME | HAMMER_INODE_MTIME;
        ip->cache[0].ip = ip;
        ip->cache[1].ip = ip;
+       ip->cache[2].ip = ip;
+       ip->cache[3].ip = ip;
 
        ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
        /* ip->save_trunc_off = 0; (already zero) */
@@ -798,6 +853,8 @@ hammer_free_inode(hammer_inode_t ip)
        KKASSERT(ip->lock.refs == 1);
        hammer_uncache_node(&ip->cache[0]);
        hammer_uncache_node(&ip->cache[1]);
+       hammer_uncache_node(&ip->cache[2]);
+       hammer_uncache_node(&ip->cache[3]);
        hammer_inode_wakereclaims(ip, 1);
        if (ip->objid_cache)
                hammer_clear_objid(ip);
index 8bf5f2e..bb2044c 100644 (file)
@@ -74,6 +74,7 @@ int64_t hammer_stats_btree_deletes;
 int64_t hammer_stats_btree_elements;
 int64_t hammer_stats_btree_splits;
 int64_t hammer_stats_btree_iterations;
+int64_t hammer_stats_btree_root_iterations;
 int64_t hammer_stats_record_iterations;
 
 int64_t hammer_stats_file_read;
@@ -163,6 +164,8 @@ SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_btree_splits, CTLFLAG_RD,
           &hammer_stats_btree_splits, 0, "");
 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_btree_iterations, CTLFLAG_RD,
           &hammer_stats_btree_iterations, 0, "");
+SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_btree_root_iterations, CTLFLAG_RD,
+          &hammer_stats_btree_root_iterations, 0, "");
 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, stats_record_iterations, CTLFLAG_RD,
           &hammer_stats_record_iterations, 0, "");
 
index adc9782..90c4a21 100644 (file)
@@ -2208,6 +2208,7 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap)
 {
        struct hammer_transaction trans;
        struct hammer_inode *ip;
+       struct hammer_inode *dip;
        struct hammer_cursor cursor;
        hammer_base_elm_t base;
        hammer_off_t disk_offset;
@@ -2416,8 +2417,27 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap)
        biodone(ap->a_bio);
 
 done:
+       /*
+        * Cache the b-tree node for the last data read in cache[1].
+        *
+        * If we hit the file EOF then also cache the node in the
+        * governing director's cache[3], it will be used to initialize
+        * the inode's cache[1] for any inodes looked up via the directory.
+        *
+        * This doesn't reduce disk accesses since the B-Tree chain is
+        * likely cached, but it does reduce cpu overhead when looking
+        * up file offsets for cpdup/tar/cpio style iterations.
+        */
        if (cursor.node)
                hammer_cache_node(&ip->cache[1], cursor.node);
+       if (ran_end >= ip->ino_data.size) {
+               dip = hammer_find_inode(&trans, ip->ino_data.parent_obj_id,
+                                       ip->obj_asof, ip->obj_localization);
+               if (dip) {
+                       hammer_cache_node(&dip->cache[3], cursor.node);
+                       hammer_rel_inode(dip, 0);
+               }
+       }
        hammer_done_cursor(&cursor);
        hammer_done_transaction(&trans);
        return(error);