kernel - VM rework part 16 - Optimization & cleanup pass
authorMatthew Dillon <dillon@apollo.backplane.com>
Mon, 20 May 2019 16:37:12 +0000 (09:37 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Mon, 20 May 2019 19:39:25 +0000 (12:39 -0700)
* Adjust __exclusive_cache_line to use 128-byte alignment as
  per suggestion by mjg.  Use this for the global vmstats.

* Add the vmmeter_neg_slop_cnt global, which is a more generous
  dynamic calculation verses -VMMETER_SLOP_COUNT.  The idea is to
  return how often vm_page_alloc() synchronizes its per-cpu statistics
  with the global vmstats.

sys/sys/cdefs.h
sys/vm/vm_meter.c
sys/vm/vm_page.c
sys/vm/vm_page.h
sys/vm/vm_pageout.c

index 2c1bff2..6ac4f90 100644 (file)
 #define        __section(x)    __attribute__((__section__(x)))
 #define __read_mostly          __section(".data.read_mostly")
 #define __read_frequently      __section(".data.read_frequently")
-#define __exclusive_cache_line __aligned(__VM_CACHELINE_SIZE)          \
+#define __exclusive_cache_line __aligned(__VM_CACHELINE_SIZE*2)        \
                                __section(".data.exclusive_cache_line")
 
 #else
index 031b63e..1fe96ef 100644 (file)
@@ -63,7 +63,7 @@
  *
  *         Other consumers should not expect perfect values.
  */
-__cachealign struct vmstats vmstats;
+__exclusive_cache_line struct vmstats vmstats;
 
 static int maxslp = MAXSLP;
 
index 01723c8..d0921d9 100644 (file)
@@ -127,6 +127,7 @@ static void vm_numa_add_topology_mem(cpu_node_t *cpup, int physid, long bytes);
  * Array of tailq lists
  */
 struct vpgqueues vm_page_queues[PQ_COUNT];
+__read_mostly long vmmeter_neg_slop_cnt = -VMMETER_SLOP_COUNT;
 
 static volatile int vm_pages_waiting;
 static struct alist vm_contig_alist;
@@ -1007,10 +1008,16 @@ _vm_page_rem_queue_spinlocked(vm_page_t m)
                 * The idea here is to reduce unnecessary SMP cache
                 * mastership changes in the global vmstats, which can be
                 * particularly bad in multi-socket systems.
+                *
+                * NOTE: The double *cnt test tries to avoid a global memory
+                *       read.  vmmeter_neg_slop_cnt is more generous than
+                *       the baseline define, we want to try to avoid atomic
+                *       ops on the global 'vmstats' structure as much as
+                *       possible.
                 */
                cnt = (long *)((char *)&mycpu->gd_vmstats_adj + pq->cnt_offset);
                atomic_add_long(cnt, -1);
-               if (*cnt < -VMMETER_SLOP_COUNT) {
+               if (*cnt < -VMMETER_SLOP_COUNT && *cnt < vmmeter_neg_slop_cnt) {
                        u_long copy = atomic_swap_long(cnt, 0);
                        cnt = (long *)((char *)&vmstats + pq->cnt_offset);
                        atomic_add_long(cnt, copy);
index d30751d..8634dc8 100644 (file)
@@ -241,6 +241,7 @@ struct vpgqueues {
 } __aligned(64);
 
 extern struct vpgqueues vm_page_queues[PQ_COUNT];
+extern long vmmeter_neg_slop_cnt;
 
 /*
  * These are the flags defined for vm_page.
index 8c6de80..d501963 100644 (file)
@@ -104,7 +104,7 @@ static int vm_pageout_page(vm_page_t m, long *max_launderp,
                           long *vnodes_skippedp, struct vnode **vpfailedp,
                           int pass, int vmflush_flags);
 static int vm_pageout_clean_helper (vm_page_t, int);
-static int vm_pageout_free_page_calc (vm_size_t count);
+static void vm_pageout_free_page_calc (vm_size_t count);
 static void vm_pageout_page_free(vm_page_t m) ;
 struct thread *emergpager;
 struct thread *pagethread;
@@ -1956,11 +1956,9 @@ next:
        vm_page_queues_spin_unlock(PQ_ACTIVE + q);
 }
 
-static int
+static void
 vm_pageout_free_page_calc(vm_size_t count)
 {
-       if (count < vmstats.v_page_count)
-                return 0;
        /*
         * v_free_min           normal allocations
         * v_free_reserved      system allocations
@@ -1972,16 +1970,25 @@ vm_pageout_free_page_calc(vm_size_t count)
        else
                vmstats.v_free_min = 64;
 
+       /*
+        * vmmeter_neg_slop_cnt controls when the per-cpu page stats are
+        * synchronized with the global stats (incuring serious cache
+        * contention).
+        */
+       vmmeter_neg_slop_cnt = -vmstats.v_page_count / ncpus / 128;
+       if (vmmeter_neg_slop_cnt > -VMMETER_SLOP_COUNT)
+               vmmeter_neg_slop_cnt = -VMMETER_SLOP_COUNT;
+
        /*
         * Make sure the vmmeter slop can't blow out our global minimums.
         *
         * However, to accomodate weird configurations (vkernels with many
         * cpus and little memory, or artifically reduced hw.physmem), do
         * not allow v_free_min to exceed 1/20 of ram or the pageout demon
-        * will go out of control.
+        * might go out of control.
         */
-       if (vmstats.v_free_min < VMMETER_SLOP_COUNT * ncpus * 10)
-               vmstats.v_free_min = VMMETER_SLOP_COUNT * ncpus * 10;
+       if (vmstats.v_free_min < -vmmeter_neg_slop_cnt * ncpus * 10)
+               vmstats.v_free_min = -vmmeter_neg_slop_cnt * ncpus * 10;
        if (vmstats.v_free_min > vmstats.v_page_count / 20)
                vmstats.v_free_min = vmstats.v_page_count / 20;
 
@@ -1989,8 +1996,6 @@ vm_pageout_free_page_calc(vm_size_t count)
        vmstats.v_free_severe = vmstats.v_free_min * 4 / 8 + 0;
        vmstats.v_pageout_free_min = vmstats.v_free_min * 2 / 8 + 7;
        vmstats.v_interrupt_free_min = vmstats.v_free_min * 1 / 8 + 7;
-
-       return 1;
 }