From 78831f77f06ae028d9024642403b6f8055df8af3 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 20 May 2019 09:37:12 -0700 Subject: [PATCH] kernel - VM rework part 16 - Optimization & cleanup pass * Adjust __exclusive_cache_line to use 128-byte alignment as per suggestion by mjg. Use this for the global vmstats. * Add the vmmeter_neg_slop_cnt global, which is a more generous dynamic calculation verses -VMMETER_SLOP_COUNT. The idea is to return how often vm_page_alloc() synchronizes its per-cpu statistics with the global vmstats. --- sys/sys/cdefs.h | 2 +- sys/vm/vm_meter.c | 2 +- sys/vm/vm_page.c | 9 ++++++++- sys/vm/vm_page.h | 1 + sys/vm/vm_pageout.c | 23 ++++++++++++++--------- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/sys/sys/cdefs.h b/sys/sys/cdefs.h index 2c1bff2cd8..6ac4f90917 100644 --- a/sys/sys/cdefs.h +++ b/sys/sys/cdefs.h @@ -193,7 +193,7 @@ #define __section(x) __attribute__((__section__(x))) #define __read_mostly __section(".data.read_mostly") #define __read_frequently __section(".data.read_frequently") -#define __exclusive_cache_line __aligned(__VM_CACHELINE_SIZE) \ +#define __exclusive_cache_line __aligned(__VM_CACHELINE_SIZE*2) \ __section(".data.exclusive_cache_line") #else diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index 031b63e957..1fe96ef5c4 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -63,7 +63,7 @@ * * Other consumers should not expect perfect values. */ -__cachealign struct vmstats vmstats; +__exclusive_cache_line struct vmstats vmstats; static int maxslp = MAXSLP; diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 01723c8ddb..d0921d94f0 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -127,6 +127,7 @@ static void vm_numa_add_topology_mem(cpu_node_t *cpup, int physid, long bytes); * Array of tailq lists */ struct vpgqueues vm_page_queues[PQ_COUNT]; +__read_mostly long vmmeter_neg_slop_cnt = -VMMETER_SLOP_COUNT; static volatile int vm_pages_waiting; static struct alist vm_contig_alist; @@ -1007,10 +1008,16 @@ _vm_page_rem_queue_spinlocked(vm_page_t m) * The idea here is to reduce unnecessary SMP cache * mastership changes in the global vmstats, which can be * particularly bad in multi-socket systems. + * + * NOTE: The double *cnt test tries to avoid a global memory + * read. vmmeter_neg_slop_cnt is more generous than + * the baseline define, we want to try to avoid atomic + * ops on the global 'vmstats' structure as much as + * possible. */ cnt = (long *)((char *)&mycpu->gd_vmstats_adj + pq->cnt_offset); atomic_add_long(cnt, -1); - if (*cnt < -VMMETER_SLOP_COUNT) { + if (*cnt < -VMMETER_SLOP_COUNT && *cnt < vmmeter_neg_slop_cnt) { u_long copy = atomic_swap_long(cnt, 0); cnt = (long *)((char *)&vmstats + pq->cnt_offset); atomic_add_long(cnt, copy); diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index d30751d841..8634dc88c7 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -241,6 +241,7 @@ struct vpgqueues { } __aligned(64); extern struct vpgqueues vm_page_queues[PQ_COUNT]; +extern long vmmeter_neg_slop_cnt; /* * These are the flags defined for vm_page. diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 8c6de80aad..d501963cc5 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -104,7 +104,7 @@ static int vm_pageout_page(vm_page_t m, long *max_launderp, long *vnodes_skippedp, struct vnode **vpfailedp, int pass, int vmflush_flags); static int vm_pageout_clean_helper (vm_page_t, int); -static int vm_pageout_free_page_calc (vm_size_t count); +static void vm_pageout_free_page_calc (vm_size_t count); static void vm_pageout_page_free(vm_page_t m) ; struct thread *emergpager; struct thread *pagethread; @@ -1956,11 +1956,9 @@ next: vm_page_queues_spin_unlock(PQ_ACTIVE + q); } -static int +static void vm_pageout_free_page_calc(vm_size_t count) { - if (count < vmstats.v_page_count) - return 0; /* * v_free_min normal allocations * v_free_reserved system allocations @@ -1972,16 +1970,25 @@ vm_pageout_free_page_calc(vm_size_t count) else vmstats.v_free_min = 64; + /* + * vmmeter_neg_slop_cnt controls when the per-cpu page stats are + * synchronized with the global stats (incuring serious cache + * contention). + */ + vmmeter_neg_slop_cnt = -vmstats.v_page_count / ncpus / 128; + if (vmmeter_neg_slop_cnt > -VMMETER_SLOP_COUNT) + vmmeter_neg_slop_cnt = -VMMETER_SLOP_COUNT; + /* * Make sure the vmmeter slop can't blow out our global minimums. * * However, to accomodate weird configurations (vkernels with many * cpus and little memory, or artifically reduced hw.physmem), do * not allow v_free_min to exceed 1/20 of ram or the pageout demon - * will go out of control. + * might go out of control. */ - if (vmstats.v_free_min < VMMETER_SLOP_COUNT * ncpus * 10) - vmstats.v_free_min = VMMETER_SLOP_COUNT * ncpus * 10; + if (vmstats.v_free_min < -vmmeter_neg_slop_cnt * ncpus * 10) + vmstats.v_free_min = -vmmeter_neg_slop_cnt * ncpus * 10; if (vmstats.v_free_min > vmstats.v_page_count / 20) vmstats.v_free_min = vmstats.v_page_count / 20; @@ -1989,8 +1996,6 @@ vm_pageout_free_page_calc(vm_size_t count) vmstats.v_free_severe = vmstats.v_free_min * 4 / 8 + 0; vmstats.v_pageout_free_min = vmstats.v_free_min * 2 / 8 + 7; vmstats.v_interrupt_free_min = vmstats.v_free_min * 1 / 8 + 7; - - return 1; } -- 2.41.0