hammer2 - Limit bulkfree cpu and SSD I/O
authorMatthew Dillon <dillon@apollo.backplane.com>
Mon, 11 Sep 2017 21:46:31 +0000 (14:46 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Mon, 11 Sep 2017 21:51:00 +0000 (14:51 -0700)
* Limit resource utilization when running bulkfree.  The default is 5000
  tps (meta-data blocks per second) and can be changed via the
  vfs.hammer2.bulkfree_tps sysctl.

* Designed primarily to limit cpu utilization when meta-data is cached,
  and to limit SSD utilization otherwise.  This feature generally cannot
  be used to limit HDD utilization because it cannot currently distinguish
  between cached and uncached I/O.  Setting a low a number to accomodate
  a HDD will cause bulkfree to take way too long to run.

sbin/hammer2/hammer2.8
sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_bulkfree.c
sys/vfs/hammer2/hammer2_subr.c
sys/vfs/hammer2/hammer2_vfsops.c

index da1498a..c27b206 100644 (file)
@@ -333,13 +333,19 @@ This disables the H2 compression heuristic and forces H2 to always
 try to compress data blocks, even if they look uncompressable.
 Enabling this option reduces performance but has higher de-duplication
 repeatability.
-.It Va vfs.hammer2.cluster_read (default 4)
-Set the amount of read-ahead clustering to perform.
+.It Va vfs.hammer2.cluster_data_read (default 4)
+.It Va vfs.hammer2.cluster_meta_read (default 1)
+Set the amount of read-ahead clustering to perform on data and meta-data
+blocks.
 .It Va vfs.hammer2.cluster_write (default 0)
 Set the amount of write-behind clustering to perform.  This is disabled by
 default in order to give temporary files a chance to be deleted before
 media writes are committed.  Enabling this reduces buffer cache stress
 but causes file writes to flush to media more quickly.
+.It Va vfs.hammer2.bulkfree_tps (default 5000)
+Set bulkfree's maximum scan rate.  This is primarily intended to limit
+I/O utilization on SSDs and cpu utilization when the meta-data is mostly
+cached in memory.
 .El
 .Sh SETTING UP /etc/hammer2
 The 'rsainit' directive will create the
index 3ef1126..2fc2c7a 100644 (file)
@@ -1355,6 +1355,7 @@ extern int hammer2_flush_pipe;
 extern int hammer2_synchronous_flush;
 extern int hammer2_dio_count;
 extern int hammer2_limit_dio;
+extern int hammer2_bulkfree_tps;
 extern long hammer2_chain_allocs;
 extern long hammer2_chain_frees;
 extern long hammer2_limit_dirty_chains;
index 341846d..6525bfd 100644 (file)
@@ -87,6 +87,8 @@ typedef struct hammer2_bulkfree_info {
        long                    count_bytes_scanned;
        long                    count_chains_scanned;
        long                    count_chains_reported;
+       long                    bulkfree_calls;
+       int                     bulkfree_ticks;
        hammer2_off_t           adj_free;
        hammer2_tid_t           mtid;
        hammer2_tid_t           saved_mirror_tid;
@@ -144,6 +146,10 @@ hammer2_bulk_scan(hammer2_chain_t *parent,
                if (error)
                        break;
 
+               /*
+                * Account for dirents before thre data_off test, since most
+                * dirents do not need a data reference.
+                */
                if (bref.type == HAMMER2_BREF_TYPE_DIRENT)
                        ++info->count_dirents_scanned;
 
@@ -404,6 +410,8 @@ hammer2_bulkfree_pass(hammer2_dev_t *hmp, hammer2_chain_t *vchain,
        cbinfo.sbase &= ~HAMMER2_FREEMAP_LEVEL1_MASK;
        TAILQ_INIT(&cbinfo.list);
 
+       cbinfo.bulkfree_ticks = ticks;
+
        /*
         * Loop on a full meta-data scan as many times as required to
         * get through all available storage.
@@ -586,11 +594,33 @@ h2_bulkfree_callback(hammer2_bulkfree_info_t *cbinfo, hammer2_blockref_t *bref)
        int radix;
 
        /*
-        * Check for signal and allow yield to userland during scan
+        * Check for signal and allow yield to userland during scan.
         */
        if (hammer2_signal_check(&cbinfo->save_time))
                return HAMMER2_ERROR_ABORTED;
 
+       /*
+        * Deal with kernel thread cpu or I/O hogging by limiting the
+        * number of chains scanned per second to hammer2_bulkfree_tps.
+        * Ignore leaf records (DIRENT and DATA), no per-record I/O is
+        * involved for those since we don't load their data.
+        */
+       if (bref->type != HAMMER2_BREF_TYPE_DATA &&
+           bref->type != HAMMER2_BREF_TYPE_DIRENT) {
+               ++cbinfo->bulkfree_calls;
+               if (cbinfo->bulkfree_calls > hammer2_bulkfree_tps) {
+                       int dticks = ticks - cbinfo->bulkfree_ticks;
+                       if (dticks < 0)
+                               dticks = 0;
+                       if (dticks < hz) {
+                               tsleep(&cbinfo->bulkfree_ticks, 0,
+                                      "h2bw", hz - dticks);
+                       }
+                       cbinfo->bulkfree_calls = 0;
+                       cbinfo->bulkfree_ticks = ticks;
+               }
+       }
+
        /*
         * Calculate the data offset and determine if it is within
         * the current freemap range being gathered.
index c67f9cd..d9796e0 100644 (file)
@@ -416,6 +416,8 @@ hammer2_signal_check(time_t *timep)
                        if (CURSIG_NOBLOCK(curthread->td_lwp) != 0)
                                error = HAMMER2_ERROR_ABORTED;
                }
+       } else {
+               lwkt_yield();
        }
        return error;
 }
index d8d2d24..a1eaf63 100644 (file)
@@ -88,6 +88,7 @@ int hammer2_flush_pipe = 100;
 int hammer2_synchronous_flush = 1;
 int hammer2_dio_count;
 int hammer2_limit_dio = 256;
+int hammer2_bulkfree_tps = 5000;
 long hammer2_chain_allocs;
 long hammer2_chain_frees;
 long hammer2_limit_dirty_chains;
@@ -135,6 +136,8 @@ SYSCTL_INT(_vfs_hammer2, OID_AUTO, flush_pipe, CTLFLAG_RW,
           &hammer2_flush_pipe, 0, "");
 SYSCTL_INT(_vfs_hammer2, OID_AUTO, synchronous_flush, CTLFLAG_RW,
           &hammer2_synchronous_flush, 0, "");
+SYSCTL_INT(_vfs_hammer2, OID_AUTO, bulkfree_tps, CTLFLAG_RW,
+          &hammer2_bulkfree_tps, 0, "");
 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, chain_allocs, CTLFLAG_RW,
           &hammer2_chain_allocs, 0, "");
 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, chain_frees, CTLFLAG_RW,