kernel - Implement CPU localization hinting for low level page allocations
authorMatthew Dillon <dillon@apollo.backplane.com>
Sat, 7 Jan 2017 02:06:14 +0000 (18:06 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sat, 7 Jan 2017 02:13:55 +0000 (18:13 -0800)
* By default vm_page_alloc() and kmem_alloc*() localize to the calling cpu.

* A cpu override may be passed in the flags to make these functions localize
  differently.

* Currently implemented as a test only for the pcpu globaldata, idle
  thread, and stacks for kernel threads targetted to specific cpus.

sys/kern/lwkt_thread.c
sys/platform/pc64/x86_64/mp_machdep.c
sys/vm/vm_extern.h
sys/vm/vm_kern.c
sys/vm/vm_kern.h
sys/vm/vm_page.c
sys/vm/vm_page.h
sys/vm/vm_zeroidle.c

index d68adc8..267a911 100644 (file)
@@ -370,7 +370,11 @@ lwkt_alloc_thread(struct thread *td, int stksize, int cpu, int flags)
        }
     }
     if (stack == NULL) {
-       stack = (void *)kmem_alloc_stack(&kernel_map, stksize);
+       if (cpu < 0)
+               stack = (void *)kmem_alloc_stack(&kernel_map, stksize, 0);
+       else
+               stack = (void *)kmem_alloc_stack(&kernel_map, stksize,
+                                                KM_CPU(cpu));
        flags |= TDF_ALLOCATED_STACK;
     }
     if (cpu < 0) {
index 7bb55a8..200a65e 100644 (file)
@@ -449,7 +449,8 @@ start_all_aps(u_int boot_addr)
                /* This is a bit verbose, it will go away soon.  */
 
                pssize = sizeof(struct privatespace);
-               ps = (void *)kmem_alloc(&kernel_map, pssize, VM_SUBSYS_GD);
+               ps = (void *)kmem_alloc3(&kernel_map, pssize, VM_SUBSYS_GD,
+                                        KM_CPU(x));
                CPU_prvspace[x] = ps;
 #if 0
                kprintf("ps %d %p %d\n", x, ps, pssize);
@@ -462,8 +463,9 @@ start_all_aps(u_int boot_addr)
                mi_gdinit(&gd->mi, x);
                cpu_gdinit(gd, x);
                ipiq_size = sizeof(struct lwkt_ipiq) * (naps + 1);
-               gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, ipiq_size,
-                                                   VM_SUBSYS_IPIQ);
+               gd->mi.gd_ipiq = (void *)kmem_alloc3(&kernel_map, ipiq_size,
+                                                   VM_SUBSYS_IPIQ,
+                                                   KM_CPU(x));
                bzero(gd->mi.gd_ipiq, ipiq_size);
 
                gd->gd_acpi_id = CPUID_TO_ACPIID(gd->mi.gd_cpuid);
index 7417054..461c161 100644 (file)
@@ -135,9 +135,9 @@ kmem_alloc (vm_map_t map, vm_size_t size, vm_subsys_t id)
 
 static __inline
 vm_offset_t
-kmem_alloc_stack (vm_map_t map, vm_size_t size)
+kmem_alloc_stack (vm_map_t map, vm_size_t size, int kmflags)
 {
-       return(kmem_alloc3(map, size, VM_SUBSYS_STACK, KM_STACK));
+       return(kmem_alloc3(map, size, VM_SUBSYS_STACK, kmflags|KM_STACK));
 }
 
 #endif                         /* _KERNEL */
index 21282cf..f94a0a6 100644 (file)
@@ -88,6 +88,15 @@ struct vm_map kernel_map;
 struct vm_map clean_map;
 struct vm_map buffer_map;
 
+static __inline
+int
+KMVMCPU(int kmflags)
+{
+       if ((kmflags & KM_CPU_SPEC) == 0)
+               return 0;
+       return VM_ALLOC_CPU(KM_GETCPU(kmflags));
+}
+
 /*
  * Allocate pageable swap-backed anonymous memory
  */
@@ -273,7 +282,7 @@ kmem_alloc3(vm_map_t map, vm_size_t size, vm_subsys_t id, int kmflags)
 
                mem = vm_page_grab(&kernel_object, OFF_TO_IDX(addr + i),
                                   VM_ALLOC_FORCE_ZERO | VM_ALLOC_NORMAL |
-                                  VM_ALLOC_RETRY);
+                                  VM_ALLOC_RETRY | KMVMCPU(kmflags));
                vm_page_unqueue_nowakeup(mem);
                vm_page_wakeup(mem);
        }
index be51efe..ce54255 100644 (file)
@@ -88,10 +88,15 @@ typedef struct kmem_anon_desc kmem_anon_desc_t;
 /*
  * kmem_alloc3() flags
  */
-#define KM_PAGEABLE    0x0001
-#define KM_KRESERVE    0x0002
-#define KM_STACK       0x0004
-#define KM_NOTLBSYNC   0x0008
+#define KM_PAGEABLE    0x00000001
+#define KM_KRESERVE    0x00000002
+#define KM_STACK       0x00000004
+#define KM_NOTLBSYNC   0x00000008
+#define KM_CPU_SPEC    0x00000010
+
+#define KM_CPU_SHIFT   16
+#define KM_CPU(n)      (((n) << KM_CPU_SHIFT) | KM_CPU_SPEC)
+#define KM_GETCPU(flags) ((flags) >> KM_CPU_SHIFT)
 
 /* Kernel memory management definitions. */
 extern struct vm_map buffer_map;
index 006f913..29b1b9b 100644 (file)
@@ -864,15 +864,15 @@ vm_page_sleep_busy(vm_page_t m, int also_m_busy, const char *msg)
  * WARNING! The caller must mask the returned value with PQ_L2_MASK.
  */
 u_short
-vm_get_pg_color(globaldata_t gd, vm_object_t object, vm_pindex_t pindex)
+vm_get_pg_color(int cpuid, vm_object_t object, vm_pindex_t pindex)
 {
        u_short pg_color;
        int phys_id;
        int core_id;
        int object_pg_color;
 
-       phys_id = get_cpu_phys_id(gd->gd_cpuid);
-       core_id = get_cpu_core_id(gd->gd_cpuid);
+       phys_id = get_cpu_phys_id(cpuid);
+       core_id = get_cpu_core_id(cpuid);
        object_pg_color = object ? object->pg_color : 0;
 
        if (cpu_topology_phys_ids && cpu_topology_core_ids) {
@@ -906,7 +906,7 @@ vm_get_pg_color(globaldata_t gd, vm_object_t object, vm_pindex_t pindex)
                /*
                 * Unknown topology, distribute things evenly.
                 */
-               pg_color = gd->gd_cpuid * PQ_L2_SIZE / ncpus;
+               pg_color = cpuid * PQ_L2_SIZE / ncpus;
                pg_color += pindex + object_pg_color;
        }
        return pg_color;
@@ -1726,6 +1726,8 @@ vm_page_select_free(u_short pg_color, boolean_t prefer_zero)
  *                             (see vm_page_grab())
  *     VM_ALLOC_USE_GD         ok to use per-gd cache
  *
+ *     VM_ALLOC_CPU(n)         allocate using specified cpu localization
+ *
  * The object must be held if not NULL
  * This routine may not block
  *
@@ -1736,10 +1738,10 @@ vm_page_select_free(u_short pg_color, boolean_t prefer_zero)
 vm_page_t
 vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int page_req)
 {
-       globaldata_t gd = mycpu;
        vm_object_t obj;
        vm_page_t m;
        u_short pg_color;
+       int cpuid_local;
 
 #if 0
        /*
@@ -1768,8 +1770,16 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int page_req)
         * This is nowhere near perfect, for example the last pindex in a
         * subgroup will overflow into the next cpu or package.  But this
         * should get us good page reuse locality in heavy mixed loads.
+        *
+        * (may be executed before the APs are started, so other GDs might
+        *  not exist!)
         */
-       pg_color = vm_get_pg_color(gd, object, pindex);
+       if (page_req & VM_ALLOC_CPU_SPEC)
+               cpuid_local = VM_ALLOC_GETCPU(page_req);
+       else
+               cpuid_local = mycpu->gd_cpuid;
+
+       pg_color = vm_get_pg_color(cpuid_local, object, pindex);
 
        KKASSERT(page_req & 
                (VM_ALLOC_NORMAL|VM_ALLOC_QUICK|
index 06905ce..62c1e66 100644 (file)
@@ -370,6 +370,12 @@ extern long first_page;                    /* first physical page number */
 #define VM_ALLOC_NULL_OK       0x0040  /* ok to return NULL on collision */
 #define        VM_ALLOC_RETRY          0x0080  /* indefinite block (vm_page_grab()) */
 #define VM_ALLOC_USE_GD                0x0100  /* use per-gd cache */
+#define VM_ALLOC_CPU_SPEC      0x0200
+
+#define VM_ALLOC_CPU_SHIFT     16
+#define VM_ALLOC_CPU(n)                (((n) << VM_ALLOC_CPU_SHIFT) | \
+                                VM_ALLOC_CPU_SPEC)
+#define VM_ALLOC_GETCPU(flags) ((flags) >> VM_ALLOC_CPU_SHIFT)
 
 void vm_page_queue_spin_lock(vm_page_t);
 void vm_page_queues_spin_lock(u_short);
@@ -446,8 +452,7 @@ void VM_PAGE_DEBUG_EXT(vm_page_busy_wait)(vm_page_t m,
                        int also_m_busy, const char *wmsg VM_PAGE_DEBUG_ARGS);
 int VM_PAGE_DEBUG_EXT(vm_page_busy_try)(vm_page_t m,
                        int also_m_busy VM_PAGE_DEBUG_ARGS);
-u_short vm_get_pg_color(globaldata_t gd, vm_object_t object,
-                       vm_pindex_t pindex);
+u_short vm_get_pg_color(int cpuid, vm_object_t object, vm_pindex_t pindex);
 
 #ifdef VM_PAGE_DEBUG
 
index da07165..c137cf8 100644 (file)
@@ -126,7 +126,7 @@ vm_page_zero_check(int *zero_countp, int *zero_statep)
        if (idlezero_enable == 0)
                return (0);
 
-       base = vm_get_pg_color(mycpu, NULL, 0) & PQ_L2_MASK;
+       base = vm_get_pg_color(mycpu->gd_cpuid, NULL, 0) & PQ_L2_MASK;
        count = 16;
        while (count < PQ_L2_SIZE / ncpus)
                count <<= 1;