From 18bee4a2762368921be967944760f01b314b01fd Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 2 Apr 2011 17:16:15 -0700 Subject: [PATCH] HAMMER VFS - Implement swapcache for HAMMER data in double_buffer mode * Support swapcache data caching when HAMMER's double_buffer mode is enabled. Typically the following sysctls: vfs.hammer.double_buffer=1 vm.swapcache.read_enable=1 vm.swapcache.data_enable=1 vm.swapcache.meta_enable=1 (optional) vm.swapcache.use_chflags=0 (optional - see man swapcache) * This causes swapcache to attempt to cache file data from HAMMER filesystems stored via the block device instead of the individual file vnodes. * This allows swapcache to more efficiently cache file data without vnode recycling from a limited kern.maxvnodes value getting in the way. If you have a large dataset spread across many smaller files which would normally overwhelm maxvnodes, and even on large systems handling very large data sets where you wish to cache the file data for some of the files (using use_chflags=1 mode), this makes it possible to cache ALL the file data AND meta-data on the SSD even though the related vnodes cached by the kernel get recycled. * Whereas it may have been inefficient to turn on vm.swapcache.data_enable before, due to filesystem scans and such, it may now be possible to this feature on with double buffering also enabled. Note that you must still be cognizant of the aggregate amount of file data being accessed by your system if you have set use_chflags to 0, you simply no longer need to worry about how many files that data belongs to. * Enabling HAMMER's double_buffer mode will reduce performance somewhat for the normal best-case file caching, but it will also greatly improve performance once you start blowing out your memory caches. --- sys/vfs/hammer/hammer_btree.c | 7 ++++++- sys/vfs/hammer/hammer_cursor.h | 1 + sys/vfs/hammer/hammer_object.c | 2 ++ sys/vfs/hammer/hammer_reblock.c | 1 + sys/vfs/hammer/hammer_vnops.c | 24 ++++++++++++++++++++++++ 5 files changed, 34 insertions(+), 1 deletion(-) diff --git a/sys/vfs/hammer/hammer_btree.c b/sys/vfs/hammer/hammer_btree.c index d0461d65cc..3d9992bb3e 100644 --- a/sys/vfs/hammer/hammer_btree.c +++ b/sys/vfs/hammer/hammer_btree.c @@ -805,7 +805,12 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags) switch(elm->leaf.base.rec_type) { case HAMMER_RECTYPE_DATA: case HAMMER_RECTYPE_DB: - hammer_io_notmeta(cursor->data_buffer); + if ((data_off & HAMMER_ZONE_LARGE_DATA) == 0) + break; + if (hammer_double_buffer == 0 || + (cursor->flags & HAMMER_CURSOR_NOSWAPCACHE)) { + hammer_io_notmeta(cursor->data_buffer); + } break; default: break; diff --git a/sys/vfs/hammer/hammer_cursor.h b/sys/vfs/hammer/hammer_cursor.h index 8888691b1f..b45b8bcb28 100644 --- a/sys/vfs/hammer/hammer_cursor.h +++ b/sys/vfs/hammer/hammer_cursor.h @@ -141,6 +141,7 @@ typedef struct hammer_cursor *hammer_cursor_t; #define HAMMER_CURSOR_TRACKED_RIPOUT 0x00080000 #define HAMMER_CURSOR_LASTWASMEM 0x00100000 /* hammer_ip_next logic */ #define HAMMER_CURSOR_ITERATE_CHECK 0x00200000 +#define HAMMER_CURSOR_NOSWAPCACHE 0x00400000 /* applies to LARGE_DATA */ /* * Flags we can clear when reusing a cursor (we can clear all of them) diff --git a/sys/vfs/hammer/hammer_object.c b/sys/vfs/hammer/hammer_object.c index 56b4a974da..778a5066a2 100644 --- a/sys/vfs/hammer/hammer_object.c +++ b/sys/vfs/hammer/hammer_object.c @@ -669,6 +669,8 @@ hammer_ip_add_directory(struct hammer_transaction *trans, int error; u_int32_t max_iterations; + KKASSERT(dip->ino_data.obj_type == HAMMER_OBJTYPE_DIRECTORY); + record = hammer_alloc_mem_record(dip, HAMMER_ENTRY_SIZE(bytes)); record->type = HAMMER_MEM_RECORD_ADD; diff --git a/sys/vfs/hammer/hammer_reblock.c b/sys/vfs/hammer/hammer_reblock.c index 64e28f6d46..b42febe03e 100644 --- a/sys/vfs/hammer/hammer_reblock.c +++ b/sys/vfs/hammer/hammer_reblock.c @@ -116,6 +116,7 @@ retry: cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; cursor.flags |= HAMMER_CURSOR_BACKEND; + cursor.flags |= HAMMER_CURSOR_NOSWAPCACHE; /* * This flag allows the btree scan code to return internal nodes, diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index 161aabce61..0d2f020135 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include "hammer.h" @@ -2752,6 +2753,17 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap) } cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; + /* + * Set NOSWAPCACHE for cursor data extraction if double buffering + * is disabled or (if the file is not marked cacheable via chflags + * and vm.swapcache_use_chflags is enabled). + */ + if (hammer_double_buffer == 0 || + ((ap->a_vp->v_flag & VSWAPCACHE) == 0 && + vm_swapcache_use_chflags)) { + cursor.flags |= HAMMER_CURSOR_NOSWAPCACHE; + } + error = hammer_ip_first(&cursor); boff = 0; @@ -2891,6 +2903,18 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap) bzero((char *)bp->b_data + boff, bp->b_bufsize - boff); /* boff = bp->b_bufsize; */ } + + /* + * Disallow swapcache operation on the vnode buffer if double + * buffering is enabled, the swapcache will get the data via + * the block device buffer. + */ + if (hammer_double_buffer) + bp->b_flags |= B_NOTMETA; + + /* + * Cleanup + */ bp->b_resid = 0; bp->b_error = error; if (error) -- 2.41.0