From b8d5441dd100292b031428ff862426356755d8ea Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 12 Jul 2014 17:31:48 -0700 Subject: [PATCH] kernel - Add two features to improve qemu emulation (64-bit only) * Implement a tunable for machdep.cpu_idle_hlt, allowing it to be set in /boot/loader.conf. For qemu the admin might want to set the value to 4 (always use HLT) instead of the default 2. * Implement a tunable and new sysctl, machdep.pmap_fast_kernel_cpusync, which defaults to disabled (0). Setting this to 1 in /boot/loader.conf or at anytime via sysctl tells the kernel to use a one-stage pmap invalidation for kernel_pmap updates. User pmaps are not affected and will still use two-stage invalidations. One-stage pmap invalidations only have to spin on the originating cpu, but all other cpus will not be quiesced when updating a kernel_map pmap entry. This is untested as there might be situations where the kernel pmap is updated without an interlock (though most should be interlocked already). This second sysctl/tunable, if enabled, greatly improves qemu performance particularly when the number of qemu cpus is greater than the number of real cpus. It probably improves real hardware system performance as well, but is not recommended for production at this time. --- sys/platform/pc64/include/pmap.h | 1 + sys/platform/pc64/include/pmap_inval.h | 1 + sys/platform/pc64/x86_64/machdep.c | 1 + sys/platform/pc64/x86_64/pmap.c | 8 ++++++- sys/platform/pc64/x86_64/pmap_inval.c | 31 ++++++++++++++++++++++---- 5 files changed, 37 insertions(+), 5 deletions(-) diff --git a/sys/platform/pc64/include/pmap.h b/sys/platform/pc64/include/pmap.h index ba276660cd..84b94ebe01 100644 --- a/sys/platform/pc64/include/pmap.h +++ b/sys/platform/pc64/include/pmap.h @@ -183,6 +183,7 @@ #define PML4pml4e ((pd_entry_t *)(addr_PML4pml4e)) extern u_int64_t KPML4phys; /* physical address of kernel level 4 */ +extern int pmap_fast_kernel_cpusync; #endif #ifdef _KERNEL diff --git a/sys/platform/pc64/include/pmap_inval.h b/sys/platform/pc64/include/pmap_inval.h index 5dcbc8bda0..1a89d61069 100644 --- a/sys/platform/pc64/include/pmap_inval.h +++ b/sys/platform/pc64/include/pmap_inval.h @@ -52,6 +52,7 @@ typedef pmap_inval_info *pmap_inval_info_t; #define PIRF_INVLTLB 0x0001 /* request invalidation of whole table */ #define PIRF_INVL1PG 0x0002 /* else request invalidation of one page */ #define PIRF_CPUSYNC 0x0004 /* cpusync is currently active */ +#define PIRF_QUICK 0x0008 /* quick (deinterlock only) */ #ifdef _KERNEL diff --git a/sys/platform/pc64/x86_64/machdep.c b/sys/platform/pc64/x86_64/machdep.c index 630e6e79a4..8e37b39459 100644 --- a/sys/platform/pc64/x86_64/machdep.c +++ b/sys/platform/pc64/x86_64/machdep.c @@ -2078,6 +2078,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) TUNABLE_INT_FETCH("hw.apic_io_enable", &ioapic_enable); /* for compat */ TUNABLE_INT_FETCH("hw.ioapic_enable", &ioapic_enable); TUNABLE_INT_FETCH("hw.lapic_enable", &lapic_enable); + TUNABLE_INT_FETCH("machdep.cpu_idle_hlt", &cpu_idle_hlt); /* * Some of the virtual machines do not work w/ I/O APIC diff --git a/sys/platform/pc64/x86_64/pmap.c b/sys/platform/pc64/x86_64/pmap.c index 2dc7aac4e2..28a6deda56 100644 --- a/sys/platform/pc64/x86_64/pmap.c +++ b/sys/platform/pc64/x86_64/pmap.c @@ -241,6 +241,9 @@ SYSCTL_INT(_machdep, OID_AUTO, pmap_yield_count, CTLFLAG_RW, static int pmap_mmu_optimize = 0; SYSCTL_INT(_machdep, OID_AUTO, pmap_mmu_optimize, CTLFLAG_RW, &pmap_mmu_optimize, 0, "Share page table pages when possible"); +int pmap_fast_kernel_cpusync = 0; +SYSCTL_INT(_machdep, OID_AUTO, pmap_fast_kernel_cpusync, CTLFLAG_RW, + &pmap_fast_kernel_cpusync, 0, "Share page table pages when possible"); #define DISABLE_PSE @@ -987,8 +990,11 @@ pmap_bootstrap(vm_paddr_t *firstaddr) /* Initialize the PAT MSR */ pmap_init_pat(); - pmap_pinit_defaults(&kernel_pmap); + + TUNABLE_INT_FETCH("machdep.pmap_fast_kernel_cpusync", + &pmap_fast_kernel_cpusync); + } /* diff --git a/sys/platform/pc64/x86_64/pmap_inval.c b/sys/platform/pc64/x86_64/pmap_inval.c index 75fe327230..77e33f66a3 100644 --- a/sys/platform/pc64/x86_64/pmap_inval.c +++ b/sys/platform/pc64/x86_64/pmap_inval.c @@ -85,6 +85,13 @@ pmap_inval_init(pmap_inval_info_t info) * CPULOCK_EXCL is used to interlock thread switchins, otherwise another * cpu can switch in a pmap that we are unaware of and interfere with our * pte operation. + * + * NOTE! If pmap_fast_kernel_cpusync is enabled, interlocks on kernel_pmap + * are effectively NOPs and will not quiesce target cpus. The + * deinterlock will then issue the IPI and wait for completion, + * which avoids spinning all AP cpus at once. + * + * This needs testing before it can be enabled by default. */ void pmap_inval_interlock(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) @@ -111,8 +118,12 @@ pmap_inval_interlock(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) info->pir_flags = PIRF_CPUSYNC; lwkt_cpusync_init(&info->pir_cpusync, pmap->pm_active, pmap_inval_callback, info); - lwkt_cpusync_interlock(&info->pir_cpusync); - atomic_add_acq_long(&pmap->pm_invgen, 1); + if (pmap == &kernel_pmap && pmap_fast_kernel_cpusync) { + info->pir_flags |= PIRF_QUICK; + } else { + lwkt_cpusync_interlock(&info->pir_cpusync); + atomic_add_acq_long(&pmap->pm_invgen, 1); + } } void @@ -121,12 +132,25 @@ pmap_inval_invltlb(pmap_inval_info_t info) info->pir_va = (vm_offset_t)-1; } +/* + * Deinterlock a pmap after making a change to a PTE. + * + * WARNING! We currently do not use a fully synchronous cpusync for + * kernel_map adjustments. We assume that all use cases for + * accesses via the kernel map are locally interlocked, so instead + * we use a semi-synchronous smp_invltlb(). + */ void pmap_inval_deinterlock(pmap_inval_info_t info, pmap_t pmap) { KKASSERT(info->pir_flags & PIRF_CPUSYNC); atomic_clear_int(&pmap->pm_active_lock, CPULOCK_EXCL); - lwkt_cpusync_deinterlock(&info->pir_cpusync); + if (info->pir_flags & PIRF_QUICK) { + atomic_add_acq_long(&pmap->pm_invgen, 1); + lwkt_cpusync_quick(&info->pir_cpusync); + } else { + lwkt_cpusync_deinterlock(&info->pir_cpusync); + } info->pir_flags = 0; } @@ -147,4 +171,3 @@ pmap_inval_done(pmap_inval_info_t info) KKASSERT((info->pir_flags & PIRF_CPUSYNC) == 0); crit_exit_id("inval"); } - -- 2.41.0