From: Alex Hornung Date: Sat, 29 Dec 2012 23:52:54 +0000 (+0000) Subject: x86_64 - support for AVX instructions X-Git-Tag: v3.4.0rc~584 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/5cf56a8d44d83fcd14e7af7e5eaf6af2356c3d7a x86_64 - support for AVX instructions * CPU will be checked for XSAVE and AVX support on boot. If both are found, they will be enabled. * If enabled, the kernel will use the XSAVE and XRSTOR instructions to save and restore FPU, SSE and AVX registers. Originally-Submitted-by: Adam Sakareassen (with modifications) --- diff --git a/sys/config/LINT64 b/sys/config/LINT64 index 9f3b07ae07..198d967792 100644 --- a/sys/config/LINT64 +++ b/sys/config/LINT64 @@ -117,7 +117,10 @@ cpu HAMMER_CPU # CPU_ENABLE_EST enables support for Enhanced SpeedStep technology # found in Pentium(tm) M processors. # +# CPU_DISABLE_AVX disables AVX instruction set. +# #options CPU_DISABLE_SSE +options CPU_DISABLE_AVX options CPU_ENABLE_EST ##################################################################### diff --git a/sys/config/X86_64_GENERIC b/sys/config/X86_64_GENERIC index 4b554c3b9c..ca54d7a070 100644 --- a/sys/config/X86_64_GENERIC +++ b/sys/config/X86_64_GENERIC @@ -10,6 +10,7 @@ machine_arch x86_64 cpu HAMMER_CPU ident X86_64_GENERIC maxusers 0 +#options CPU_DISABLE_AVX makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols diff --git a/sys/cpu/x86_64/include/cpufunc.h b/sys/cpu/x86_64/include/cpufunc.h index 3cbb3cfcfc..cd79b0d02e 100644 --- a/sys/cpu/x86_64/include/cpufunc.h +++ b/sys/cpu/x86_64/include/cpufunc.h @@ -590,6 +590,14 @@ wrmsr(u_int msr, u_int64_t newval) __asm __volatile("wrmsr" : : "a" (low), "d" (high), "c" (msr)); } +static __inline void +xsetbv(u_int ecx, u_int eax, u_int edx) +{ + __asm __volatile(".byte 0x0f,0x01,0xd1" + : + : "a" (eax), "c" (ecx), "d" (edx)); +} + static __inline void load_cr0(u_long data) { diff --git a/sys/cpu/x86_64/include/npx.h b/sys/cpu/x86_64/include/npx.h index 2c36be7994..fd0e961280 100644 --- a/sys/cpu/x86_64/include/npx.h +++ b/sys/cpu/x86_64/include/npx.h @@ -46,6 +46,8 @@ #ifndef _CPU_NPX_H_ #define _CPU_NPX_H_ +#include "opt_cpu.h" + #ifndef _SYS_TYPES_H_ #include #endif @@ -124,9 +126,19 @@ struct savexmm { u_char sv_pad[220]; } __attribute__((aligned(16))); +#ifndef CPU_DISABLE_AVX +struct saveymm { + u_char xsavedata[832]; +} __attribute__((aligned(64))); + +#endif + union savefpu { struct save87 sv_87; struct savexmm sv_xmm; +#ifndef CPU_DISABLE_AVX + struct saveymm sv_ymm; +#endif }; /* diff --git a/sys/cpu/x86_64/include/specialreg.h b/sys/cpu/x86_64/include/specialreg.h index b039cc9305..d5321f4e29 100644 --- a/sys/cpu/x86_64/include/specialreg.h +++ b/sys/cpu/x86_64/include/specialreg.h @@ -41,40 +41,40 @@ #define CR0_MP 0x00000002 /* "Math" (fpu) Present */ #define CR0_EM 0x00000004 /* EMulate FPU instructions. (trap ESC only) */ #define CR0_TS 0x00000008 /* Task Switched (if MP, trap ESC and WAIT) */ -#define CR0_PG 0x80000000 /* PaGing enable */ +#define CR0_PG 0x80000000 /* Paging enable */ /* * Bits in 486 special registers: */ #define CR0_NE 0x00000020 /* Numeric Error enable (EX16 vs IRQ13) */ -#define CR0_WP 0x00010000 /* Write Protect (honor page protect in - all modes) */ +#define CR0_WP 0x00010000 /* Write Protect (honor page protect in all modes) */ #define CR0_AM 0x00040000 /* Alignment Mask (set to enable AC flag) */ #define CR0_NW 0x20000000 /* Not Write-through */ #define CR0_CD 0x40000000 /* Cache Disable */ /* - * Bits in PPro special registers + * Bits in CR4 special register */ -#define CR4_VME 0x00000001 /* Virtual 8086 mode extensions */ -#define CR4_PVI 0x00000002 /* Protected-mode virtual interrupts */ -#define CR4_TSD 0x00000004 /* Time stamp disable */ -#define CR4_DE 0x00000008 /* Debugging extensions */ -#define CR4_PSE 0x00000010 /* Page size extensions */ -#define CR4_PAE 0x00000020 /* Physical address extension */ -#define CR4_MCE 0x00000040 /* Machine check enable */ -#define CR4_PGE 0x00000080 /* Page global enable */ -#define CR4_PCE 0x00000100 /* Performance monitoring counter enable */ -#define CR4_FXSR 0x00000200 /* Fast FPU save/restore used by OS */ -#define CR4_XMM 0x00000400 /* enable SIMD/MMX2 to use except 16 */ +#define CR4_VME 0x00000001 /* Virtual 8086 mode extensions */ +#define CR4_PVI 0x00000002 /* Protected-mode virtual interrupts */ +#define CR4_TSD 0x00000004 /* Time stamp disable */ +#define CR4_DE 0x00000008 /* Debugging extensions */ +#define CR4_PSE 0x00000010 /* Page size extensions */ +#define CR4_PAE 0x00000020 /* Physical address extension */ +#define CR4_MCE 0x00000040 /* Machine check enable */ +#define CR4_PGE 0x00000080 /* Page global enable */ +#define CR4_PCE 0x00000100 /* Performance monitoring counter enable */ +#define CR4_FXSR 0x00000200 /* Fast FPU save/restore used by OS */ +#define CR4_XMM 0x00000400 /* Enable SIMD/MMX2 to use except 16 */ +#define CR4_XSAVE 0x00040000 /* Enable XSave (for AVX Instructions)*/ /* * Bits in x86_64 special registers. EFER is 64 bits wide. */ -#define EFER_SCE 0x000000001 /* System Call Extensions (R/W) */ -#define EFER_LME 0x000000100 /* Long mode enable (R/W) */ -#define EFER_LMA 0x000000400 /* Long mode active (R) */ -#define EFER_NXE 0x000000800 /* PTE No-Execute bit enable (R/W) */ +#define EFER_SCE 0x000000001 /* System Call Extensions (R/W) */ +#define EFER_LME 0x000000100 /* Long mode enable (R/W) */ +#define EFER_LMA 0x000000400 /* Long mode active (R) */ +#define EFER_NXE 0x000000800 /* PTE No-Execute bit enable (R/W) */ /* * CPUID instruction features register @@ -132,10 +132,19 @@ #define CPUID2_SSE42 0x00100000 #define CPUID2_X2APIC 0x00200000 #define CPUID2_POPCNT 0x00800000 -#define CPUID2_AESNI 0x02000000 -#define CPUID2_RDRAND 0x40000000 +#define CPUID2_AESNI 0x02000000 /* AES Instruction Set */ +#define CPUID2_XSAVE 0x04000000 /* XSave supported by CPU */ +#define CPUID2_OSXSAVE 0x08000000 /* XSave and AVX supported by OS */ +#define CPUID2_AVX 0x10000000 /* AVX instruction set support */ +#define CPUID2_F16C 0x20000000 /* CVT16 instruction set support */ +#define CPUID2_RDRAND 0x40000000 /* RdRand. On chip random numbers */ #define CPUID2_VMM 0x80000000 /* AMD 25481 2.34 page 11 */ +/*Bits related to the XFEATURE_ENABLED_MASK control register*/ +#define CPU_XFEATURE_X87 0x00000001 +#define CPU_XFEATURE_SSE 0x00000002 +#define CPU_XFEATURE_YMM 0x00000004 + /* * Important bits in the AMD extended cpuid flags */ diff --git a/sys/platform/pc64/conf/options b/sys/platform/pc64/conf/options index b149a4e0af..60b8039e08 100644 --- a/sys/platform/pc64/conf/options +++ b/sys/platform/pc64/conf/options @@ -18,6 +18,7 @@ NDISAPI opt_dontuse.h # x86_64 SMP options CPU_ENABLE_EST opt_cpu.h +CPU_DISABLE_AVX opt_cpu.h # The cpu type # diff --git a/sys/platform/pc64/include/md_var.h b/sys/platform/pc64/include/md_var.h index f929264455..8a98f537d8 100644 --- a/sys/platform/pc64/include/md_var.h +++ b/sys/platform/pc64/include/md_var.h @@ -51,6 +51,7 @@ extern u_int amd_feature; extern u_int amd_feature2; extern u_int cpu_clflush_line_size; extern u_int cpu_fxsr; +extern u_int cpu_xsave; extern u_int cpu_high; extern u_int cpu_id; extern u_int cpu_procinfo; diff --git a/sys/platform/pc64/x86_64/initcpu.c b/sys/platform/pc64/x86_64/initcpu.c index 7a49bdd370..44eb5f1f5d 100644 --- a/sys/platform/pc64/x86_64/initcpu.c +++ b/sys/platform/pc64/x86_64/initcpu.c @@ -62,6 +62,7 @@ u_int cpu_procinfo2; /* Multicore info */ char cpu_vendor[20]; /* CPU Origin code */ u_int cpu_vendor_id; /* CPU vendor ID */ u_int cpu_fxsr; /* SSE enabled */ +u_int cpu_xsave; /* AVX enabled by OS*/ u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */ u_int cpu_clflush_line_size = 32; /* Default CLFLUSH line size */ @@ -152,11 +153,24 @@ initializecpu(void) { uint64_t msr; + /*Check for FXSR and SSE support and enable if available.*/ if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) { load_cr4(rcr4() | CR4_FXSR | CR4_XMM); cpu_fxsr = hw_instruction_sse = 1; } +#if !defined(CPU_DISABLE_AVX) + /*Check for XSAVE and AVX support and enable if available.*/ + if ((cpu_feature2 & CPUID2_AVX) && (cpu_feature2 & CPUID2_XSAVE) + && (cpu_feature & CPUID_SSE)) { + load_cr4(rcr4() | CR4_XSAVE); + + /* Adjust size of savefpu in npx.h before adding to mask.*/ + xsetbv(0, CPU_XFEATURE_X87 | CPU_XFEATURE_SSE | CPU_XFEATURE_YMM, 0); + cpu_xsave = 1; + } +#endif + if (cpu_vendor_id == CPU_VENDOR_AMD) { switch((cpu_id & 0xFF0000)) { case 0x100000: diff --git a/sys/platform/pc64/x86_64/npx.c b/sys/platform/pc64/x86_64/npx.c index 20e4bafb07..e1133a6f9e 100644 --- a/sys/platform/pc64/x86_64/npx.c +++ b/sys/platform/pc64/x86_64/npx.c @@ -37,6 +37,7 @@ */ #include "opt_debug_npx.h" +#include "opt_cpu.h" #include #include @@ -76,6 +77,10 @@ #define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #endif +#ifndef CPU_DISABLE_AVX +#define xrstor(eax,edx,addr) __asm __volatile(".byte 0x0f,0xae,0x2f" : : "D" (addr), "a" (eax), "d" (edx)) +#define xsave(eax,edx,addr) __asm __volatile(".byte 0x0f,0xae,0x27" : : "D" (addr), "a" (eax), "d" (edx) : "memory") +#endif #define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ : : "n" (CR0_TS) : "ax") #define stop_emulating() __asm("clts") @@ -96,7 +101,8 @@ static void fpurstor (union savefpu *); void npxinit(u_short control) { - static union savefpu dummy __aligned(16); + /*64-Byte alignment required for xsave*/ + static union savefpu dummy __aligned(64); /* * fninit has the same h/w bugs as fnsave. Use the detoxified @@ -405,6 +411,11 @@ npxsave(union savefpu *addr) static void fpusave(union savefpu *addr) { +#ifndef CPU_DISABLE_AVX + if (cpu_xsave) + xsave(CPU_XFEATURE_X87 | CPU_XFEATURE_SSE | CPU_XFEATURE_YMM, 0, addr); + else +#endif #ifndef CPU_DISABLE_SSE if (cpu_fxsr) fxsave(addr); @@ -545,6 +556,11 @@ fpu_clean_state(void) static void fpurstor(union savefpu *addr) { +#ifndef CPU_DISABLE_AVX + if (cpu_xsave) + xrstor(CPU_XFEATURE_X87 | CPU_XFEATURE_SSE | CPU_XFEATURE_YMM, 0, addr); + else +#endif #ifndef CPU_DISABLE_SSE if (cpu_fxsr) { fpu_clean_state();