kernel - Flesh out Spectre mitigation support
authorMatthew Dillon <dillon@apollo.backplane.com>
Tue, 1 May 2018 03:50:13 +0000 (20:50 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Tue, 8 May 2018 17:01:20 +0000 (10:01 -0700)
* Add handling for all modes for AMD CPUs, including support for
  IBRS_AUTO and STIBP_AUTO (always-on) bits which will be added
  to future cpus.

* Add STIBP handling to Intel CPUs.  I can't find definitions for
  AUTO (always-on) modes for Intel, so those are still not supported
  (no current CPU has AUTO support yet anyway).

* Current DragonFlyBSD defaults: Will enable IBRS_AUTO and STIBP_AUTO
  (always on) support by default if the cpu has it.  Will NOT enable IBRS
  or STIBP (non-auto) toggling by default.  Will not enable IBPB by default.

  IBPB is currently not enabled by default.  The overhead is an enormous
  ~2uS.  We will follow Linux in this regard.

* Change the machdep.spectre_mitigation sysctl to take a string of
  features to enable.  Change machdep.spectre_support to display a
  string of features supported.  Possible features are:

  IBRS          Indirect Branch Restricted Speculation (U->K and K->U)
  STIBP         Single Thread Indirect Branch Prediction (U->K and K->U)
  IBPB          Branch Prediction Barrier (U->K)
  IBRS_AUTO     IBRS always-on (set once and forget)
  STIBP_AUTO    STIBP always-on (set once and forget)

  The machdep.spectre_support sysctl tells you whats available.

* Refactor tr_pcb_gflags into tr_pcb_spec_ctrl[2] to make it easier
  for the assembly code to program the SPEC_CTRL MSR.

* Note that some of the above bits will never be supported by current
  hardware and exist to allow future hardware to support these features
  in a less expensive manner.

* Also note that for Meltdown, AMD is immune and the meltdown mitigation
  will not be enabled.  Intel is vulnerable and the mitigation will be
  enabled by default.  See sysctl machdep.meltdown_mitigation.

sys/cpu/x86_64/include/asmacros.h
sys/cpu/x86_64/include/frame.h
sys/cpu/x86_64/include/specialreg.h
sys/platform/pc64/include/pcb.h
sys/platform/pc64/x86_64/genassym.c
sys/platform/pc64/x86_64/machdep.c
sys/platform/pc64/x86_64/vm_machdep.c

index d46fe35..ba7f468 100644 (file)
  *               and IBRS2.
  */
 #define KMMUENTER_CORE                                                 \
-       testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
+       testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
        je      40f ;                                                   \
        movq    PCPU(trampoline)+TR_PCB_CR3,%rcx ;                      \
        movq    %rcx,%cr3 ;                                             \
-40:    testq   $PCB_IBRS1|PCB_IBRS2|PCB_IBPB,PCPU(trampoline)+TR_PCB_GFLAGS ;\
+40:    movl    PCPU(trampoline)+TR_PCB_SPEC_CTRL,%edx ;                \
+       testq   %rdx, %rdx ;                                            \
        je      43f ;                                                   \
        movq    %rax, PCPU(trampoline)+TR_RAX ;                         \
-       testq   $PCB_IBRS1|PCB_IBRS2,PCPU(trampoline)+TR_PCB_GFLAGS ;   \
+       testq   $SPEC_CTRL_DUMMY_ENABLE,%rdx ;                          \
        je      41f ;                                                   \
-       movl    $MSR_SPEC_CTRL,%ecx ;                                   \
-       movl    $MSR_IBRS_ENABLE,%eax ;                                 \
+       movq    %rdx, %rax ;                                            \
+       andq    $SPEC_CTRL_IBRS|SPEC_CTRL_STIBP, %rax ;                 \
+       movq    $MSR_SPEC_CTRL,%rcx ;                                   \
        xorl    %edx,%edx ;                                             \
        wrmsr ;                                                         \
-41:    testq   $PCB_IBPB,PCPU(trampoline)+TR_PCB_GFLAGS ;              \
+       movl    PCPU(trampoline)+TR_PCB_SPEC_CTRL,%edx ;                \
+41:    testq   $SPEC_CTRL_DUMMY_IBPB,%rdx ;                            \
        je      42f ;                                                   \
        movl    $MSR_PRED_CMD,%ecx ;                                    \
-       movl    $MSR_IBPB_BARRIER,%eax ;                                \
+       movl    $1,%eax ;                                               \
        xorl    %edx,%edx ;                                             \
        wrmsr ;                                                         \
 42:    movq    PCPU(trampoline)+TR_RAX, %rax ;                         \
 43:                                                                    \
 
-
 /*
  * Enter with trampoline, hardware pushed up to %rip
  */
        movq    PCPU(trampoline)+TR_RCX, %rcx                           \
 
 /*
- * KMMUEXIT_CORE handles IBRS and IBPB, but not ISOMMU
+ * KMMUEXIT_CORE handles IBRS and STIBP, but not ISOMMU
  *
  * We don't re-execute the IBPB barrier on exit atm.
  */
 #define KMMUEXIT_CORE                                                  \
-       testq   $PCB_IBRS1,PCPU(trampoline)+TR_PCB_GFLAGS ;             \
+       testq   $SPEC_CTRL_DUMMY_ENABLE,PCPU(trampoline)+TR_PCB_SPEC_CTRL+4 ; \
        je      41f ;                                                   \
        movq    %rax, PCPU(trampoline)+TR_RAX ;                         \
        movq    %rcx, PCPU(trampoline)+TR_RCX ;                         \
        movq    %rdx, PCPU(trampoline)+TR_RDX ;                         \
-       movl    $MSR_SPEC_CTRL,%ecx ;                                   \
-       movl    $MSR_IBRS_DISABLE,%eax ;                                \
+       movl    PCPU(trampoline)+TR_PCB_SPEC_CTRL+4, %eax ;             \
+       andq    $SPEC_CTRL_IBRS|SPEC_CTRL_STIBP, %rax ;                 \
+       movq    $MSR_SPEC_CTRL,%rcx ;                                   \
        xorl    %edx,%edx ;                                             \
        wrmsr ;                                                         \
        movq    PCPU(trampoline)+TR_RDX, %rdx ;                         \
 #define KMMUEXIT                                                       \
        addq    $TF_RIP,%rsp ;                                          \
        KMMUEXIT_CORE ;                                                 \
-       testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
+       testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
        je      40f ;                                                   \
        movq    %rcx, PCPU(trampoline)+TR_ERR ; /* save in TR_ERR */    \
        popq    %rcx ;                          /* copy %rip */         \
  */
 #define KMMUEXIT_SYSCALL                                               \
        KMMUEXIT_CORE ;                                                 \
-       testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
+       testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
        je      40f ;                                                   \
        movq    %rcx, PCPU(trampoline)+TR_RCX ;                         \
        movq    PCPU(trampoline)+TR_PCB_CR3_ISO,%rcx ;                  \
index e485326..8429a08 100644 (file)
@@ -144,7 +144,7 @@ struct trampframe {
        register_t      tr_pcb_flags;   /* copy of pcb control flags */
        register_t      tr_pcb_cr3_iso; /* copy of isolated pml4e */
        register_t      tr_pcb_cr3;     /* copy of primary pml4e */
-       register_t      tr_pcb_gflags;  /* global flags (IBRS support) */
+       uint32_t        tr_pcb_spec_ctrl[2];/* SPEC_CTRL + ficticious flags */
        register_t      tr_pcb_gs_kernel; /* (used by nmi, dbg) */
        register_t      tr_pcb_gs_saved;  /* (used by nmi) */
        register_t      tr_pcb_cr3_saved; /* (used by nmi) */
index 9ef1213..d13a867 100644 (file)
 /*
  * IBRS and IBPB Spectre mitigation
  *
- * NOTE: Either CPUID_80000008_I1_IBPB_SUPPORT or CPUID_7_0_I3_SPEC_CTRL
- *      indicates IBPB support.  However, note that MSR_PRED_CMD is
- *      a command register that may only be written, not read.
+ * Intel: Either CPUID_80000008_I1_IBPB_SUPPORT or CPUID_7_0_I3_SPEC_CTRL
+ *       indicates IBPB support.  However, note that MSR_PRED_CMD is
+ *       a command register that may only be written, not read.
  *
- *      MSR_IBPB_BARRIER is written to MSR_PRED_CMD unconditionally.
- *      Writing 0 has no effect.
+ * IBPB: (barrier)
+ *       $1 is written to MSR_PRED_CMD unconditionally, writing 0
+ *       has no effect.
+ *
+ * IBRS and STIBP
+ *       Serves as barrier and mode, set on entry to kernel and clear
+ *       on exit.  Be sure to clear before going idle (else hyperthread
+ *       performance will drop).
  */
-#define MSR_IBRS_DISABLE               0       /* MSR_SPEC_CTRL (bit 0) */
-#define MSR_IBRS_ENABLE                        1
-#define MSR_IBPB_BARRIER               1       /* MSR_PRED_CMD */
 
 #define CPUID_7_0_I3_SPEC_CTRL         0x04000000      /* in EDX (index 3) */
 #define CPUID_7_0_I3_STIBP             0x08000000      /* in EDX (index 3) */
-#define CPUID_80000008_I1_IBPB_SUPPORT 0x00001000      /* in EBX (index 1) */
+
+#define SPEC_CTRL_IBRS                 0x00000001
+#define SPEC_CTRL_STIBP                        0x00000002
+#define SPEC_CTRL_DUMMY1               0x00010000      /* ficticious */
+#define SPEC_CTRL_DUMMY2               0x00020000      /* ficticious */
+#define SPEC_CTRL_DUMMY3               0x00040000      /* ficticious */
+#define SPEC_CTRL_DUMMY4               0x00080000      /* ficticious */
+#define SPEC_CTRL_DUMMY5               0x00100000      /* ficticious */
+#define SPEC_CTRL_DUMMY6               0x00200000      /* ficticious */
+
+/*
+ * In EBX (index 1)
+ */
+#define CPUID_INTEL_80000008_I1_IBPB_SUPPORT   0x00001000
+
+#define CPUID_AMD_80000008_I1_IBPB_SUPPORT     0x00001000
+#define CPUID_AMD_80000008_I1_IBRS_SUPPORT     0x00004000
+#define CPUID_AMD_80000008_I1_STIBP_SUPPORT    0x00008000
+
+#define CPUID_AMD_80000008_I1_IBRS_AUTO                0x00010000
+#define CPUID_AMD_80000008_I1_STIBP_AUTO       0x00020000
+#define CPUID_AMD_80000008_I1_IBRS_REQUESTED   0x00040000
 
 /*
  * PAT modes.
index 123ee41..ce0c172 100644 (file)
@@ -86,13 +86,14 @@ struct pcb {
 
 #define        PCB_DBREGS      0x00000002      /* process using debug registers */
 #define        PCB_FPUINITDONE 0x00000008      /* fpu state is initialized */
-#define PCB_ISOMMU     0x00000010      /* isolated mmu context active */
-#define PCB_IBRS1      0x00000020      /* IBRS mode 1 (kernel only) */
-#define PCB_IBRS2      0x00000040      /* IBRS mode 2 (always) */
-#define PCB_IBPB       0x00000080      /* IBPB barrier user->kernel */
+#define PCB_ISOMMU     0x00000010      /* MMU isolation */
+
 #define FP_SOFTFP       0x01           /* process using soft flt emulator */
 #define        FP_VIRTFP       0x04            /* vkernel wants exception */
 
+#define SPEC_CTRL_DUMMY_IBPB   SPEC_CTRL_DUMMY1
+#define SPEC_CTRL_DUMMY_ENABLE SPEC_CTRL_DUMMY2
+
 #ifdef _KERNEL
 void   savectx(struct pcb *);
 #endif
index 1ae1e1e..fbdae28 100644 (file)
@@ -135,9 +135,13 @@ ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
 
 ASSYM(PCB_DBREGS, PCB_DBREGS);
 ASSYM(PCB_ISOMMU, PCB_ISOMMU);
-ASSYM(PCB_IBRS1, PCB_IBRS1);
-ASSYM(PCB_IBRS2, PCB_IBRS2);
-ASSYM(PCB_IBPB, PCB_IBPB);
+
+#if 0 /* we get this from specialreg.h */
+ASSYM(SPEC_CTRL_IBRS, SPEC_CTRL_IBRS);
+ASSYM(SPEC_CTRL_STIBP, SPEC_CTRL_STIBP);
+#endif
+ASSYM(SPEC_CTRL_DUMMY_IBPB, SPEC_CTRL_DUMMY_IBPB);
+ASSYM(SPEC_CTRL_DUMMY_ENABLE, SPEC_CTRL_DUMMY_ENABLE);
 
 ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
 ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
@@ -227,7 +231,7 @@ ASSYM(TR_PCB_RSP, offsetof(struct trampframe, tr_pcb_rsp));
 ASSYM(TR_PCB_FLAGS, offsetof(struct trampframe, tr_pcb_flags));
 ASSYM(TR_PCB_CR3_ISO, offsetof(struct trampframe, tr_pcb_cr3_iso));
 ASSYM(TR_PCB_CR3, offsetof(struct trampframe, tr_pcb_cr3));
-ASSYM(TR_PCB_GFLAGS, offsetof(struct trampframe, tr_pcb_gflags));
+ASSYM(TR_PCB_SPEC_CTRL, offsetof(struct trampframe, tr_pcb_spec_ctrl[0]));
 ASSYM(TR_PCB_GS_KERNEL, offsetof(struct trampframe, tr_pcb_gs_kernel));
 ASSYM(TR_PCB_GS_SAVED, offsetof(struct trampframe, tr_pcb_gs_saved));
 ASSYM(TR_PCB_CR3_SAVED, offsetof(struct trampframe, tr_pcb_cr3_saved));
index b61761f..5100f24 100644 (file)
@@ -1217,7 +1217,8 @@ cpu_idle(void)
                 * General idle thread halt code
                 *
                 * IBRS NOTES - IBRS is a SPECTRE mitigation.  When going
-                *              idle, IBRS
+                *              idle, disable IBRS to reduce hyperthread
+                *              overhead.
                 */
                ++gd->gd_idle_repeat;
 
@@ -1256,15 +1257,18 @@ do_spin:
                                        goto do_spin;
                                crit_enter_gd(gd);
                                ATOMIC_CPUMASK_ORBIT(smp_idleinvl_mask, gd->gd_cpuid);
-                               if (pscpu->trampoline.tr_pcb_gflags &
-                                   (PCB_IBRS1 | PCB_IBRS2)) {
-                                       wrmsr(0x48, 0); /* IBRS (spectre) */
+                               /*
+                                * IBRS/STIBP
+                                */
+                               if (pscpu->trampoline.tr_pcb_spec_ctrl[1] &
+                                   SPEC_CTRL_DUMMY_ENABLE) {
+                                       wrmsr(MSR_SPEC_CTRL, pscpu->trampoline.tr_pcb_spec_ctrl[1] & (SPEC_CTRL_IBRS|SPEC_CTRL_STIBP));
                                }
                                cpu_mmw_pause_int(&gd->gd_reqflags, reqflags,
                                                  cpu_mwait_cx_hint(stat), 0);
-                               if (pscpu->trampoline.tr_pcb_gflags &
-                                   (PCB_IBRS1 | PCB_IBRS2)) {
-                                       wrmsr(0x48, 1); /* IBRS (spectre) */
+                               if (pscpu->trampoline.tr_pcb_spec_ctrl[0] &
+                                   SPEC_CTRL_DUMMY_ENABLE) {
+                                       wrmsr(MSR_SPEC_CTRL, pscpu->trampoline.tr_pcb_spec_ctrl[0] & (SPEC_CTRL_IBRS|SPEC_CTRL_STIBP));
                                }
                                stat->halt++;
                                ATOMIC_CPUMASK_NANDBIT(smp_idleinvl_mask, gd->gd_cpuid);
@@ -1287,16 +1291,14 @@ do_spin:
                        if ((gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) {
                                ATOMIC_CPUMASK_ORBIT(smp_idleinvl_mask,
                                                     gd->gd_cpuid);
-                               if (pscpu->trampoline.tr_pcb_gflags &
-                                   (PCB_IBRS1 | PCB_IBRS2)) {
-                                       /* IBRS (spectre) */
-                                       wrmsr(0x48, 0);
+                               if (pscpu->trampoline.tr_pcb_spec_ctrl[1] &
+                                   SPEC_CTRL_DUMMY_ENABLE) {
+                                       wrmsr(MSR_SPEC_CTRL, pscpu->trampoline.tr_pcb_spec_ctrl[1] & (SPEC_CTRL_IBRS|SPEC_CTRL_STIBP));
                                }
                                cpu_idle_default_hook();
-                               if (pscpu->trampoline.tr_pcb_gflags &
-                                   (PCB_IBRS1 | PCB_IBRS2)) {
-                                       /* IBRS (spectre) */
-                                       wrmsr(0x48, 1);
+                               if (pscpu->trampoline.tr_pcb_spec_ctrl[0] &
+                                   SPEC_CTRL_DUMMY_ENABLE) {
+                                       wrmsr(MSR_SPEC_CTRL, pscpu->trampoline.tr_pcb_spec_ctrl[0] & (SPEC_CTRL_IBRS|SPEC_CTRL_STIBP));
                                }
                                ATOMIC_CPUMASK_NANDBIT(smp_idleinvl_mask,
                                                       gd->gd_cpuid);
@@ -1322,14 +1324,14 @@ do_acpi:
                        if ((gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) {
                                ATOMIC_CPUMASK_ORBIT(smp_idleinvl_mask,
                                                     gd->gd_cpuid);
-                               if (pscpu->trampoline.tr_pcb_gflags &
-                                   (PCB_IBRS1 | PCB_IBRS2)) {
-                                       wrmsr(0x48, 0);
+                               if (pscpu->trampoline.tr_pcb_spec_ctrl[1] &
+                                   SPEC_CTRL_DUMMY_ENABLE) {
+                                       wrmsr(MSR_SPEC_CTRL, pscpu->trampoline.tr_pcb_spec_ctrl[1] & (SPEC_CTRL_IBRS|SPEC_CTRL_STIBP));
                                }
                                cpu_idle_hook();
-                               if (pscpu->trampoline.tr_pcb_gflags &
-                                   (PCB_IBRS1 | PCB_IBRS2)) {
-                                       wrmsr(0x48, 1);
+                               if (pscpu->trampoline.tr_pcb_spec_ctrl[0] &
+                                   SPEC_CTRL_DUMMY_ENABLE) {
+                                       wrmsr(MSR_SPEC_CTRL, pscpu->trampoline.tr_pcb_spec_ctrl[0] & (SPEC_CTRL_IBRS|SPEC_CTRL_STIBP));
                                }
                                ATOMIC_CPUMASK_NANDBIT(smp_idleinvl_mask,
                                                       gd->gd_cpuid);
index efc6894..6031888 100644 (file)
 
 static void    cpu_reset_real (void);
 
-int spectre_mitigation = -1;
-
-static int spectre_ibrs_mode = 0;
-SYSCTL_INT(_machdep, OID_AUTO, spectre_ibrs_mode, CTLFLAG_RD,
-       &spectre_ibrs_mode, 0, "current IBRS mode");
-static int spectre_ibpb_mode = 0;
-SYSCTL_INT(_machdep, OID_AUTO, spectre_ibpb_mode, CTLFLAG_RD,
-       &spectre_ibpb_mode, 0, "current IBPB mode");
-static int spectre_ibrs_supported = 0;
-SYSCTL_INT(_machdep, OID_AUTO, spectre_ibrs_supported, CTLFLAG_RD,
-       &spectre_ibrs_supported, 0, "IBRS mode supported");
-static int spectre_ibpb_supported = 0;
-SYSCTL_INT(_machdep, OID_AUTO, spectre_ibpb_supported, CTLFLAG_RD,
-       &spectre_ibpb_supported, 0, "IBPB mode supported");
+static int spectre_mitigation = -1;
+static int spectre_support = 0;
+
+static int spectre_mode = 0;
+SYSCTL_INT(_machdep, OID_AUTO, spectre_mode, CTLFLAG_RD,
+       &spectre_mode, 0, "current Spectre enablements");
 
 /*
  * Finish a fork operation, with lwp lp2 nearly set up.
@@ -425,9 +417,15 @@ void spectre_vm_setup(void *arg);
 /*
  * Check for IBPB and IBRS support
  *
- * Returns a mask:     0x1     IBRS supported
- *                     0x2     IBPB supported
+ * This bits also specify desired modes in the spectre_mitigation sysctl.
  */
+#define IBRS_SUPPORTED         0x0001
+#define STIBP_SUPPORTED                0x0002
+#define IBPB_SUPPORTED         0x0004
+#define IBRS_AUTO_SUPPORTED    0x0008
+#define STIBP_AUTO_SUPPORTED   0x0010
+#define IBRS_PREFERRED_REQUEST 0x0020
+
 static
 int
 spectre_check_support(void)
@@ -436,33 +434,72 @@ spectre_check_support(void)
        int rv = 0;
 
        /*
-        * SPEC_CTRL (bit 26) and STIBP support (bit 27)
+        * Spectre mitigation hw bits
         *
-        * XXX Not sure what the STIBP flag is meant to be used for.
+        * IBRS         Indirect Branch Restricted Speculation   (isolation)
+        * STIBP        Single Thread Indirect Branch Prediction (isolation)
+        * IBPB         Branch Prediction Barrier                (barrier)
         *
-        * SPEC_CTRL indicates IBRS and IBPB support.
-        */
-       p[0] = 0;
-       p[1] = 0;
-       p[2] = 0;
-       p[3] = 0;
-       cpuid_count(7, 0, p);
-       if (p[3] & CPUID_7_0_I3_SPEC_CTRL)
-               rv |= 3;
-
-       /*
-        * 0x80000008 p[1] bit 12 indicates IBPB support
+        * IBRS and STIBP must be toggled (enabled on entry to kernel,
+        * disabled on exit, as well as disabled during any MWAIT/HLT).
+        * When *_AUTO bits are available, IBRS and STIBP may be left
+        * turned on and do not have to be toggled on kernel entry/exit.
         *
-        * This bit might be set even though SPEC_CTRL is not set.
+        * All this shit has enormous overhead.  IBPB in particular, and
+        * non-auto modes are disabled by default.
         */
        if (cpu_vendor_id == CPU_VENDOR_INTEL) {
                p[0] = 0;
                p[1] = 0;
                p[2] = 0;
                p[3] = 0;
+               cpuid_count(7, 0, p);
+               if (p[3] & CPUID_7_0_I3_SPEC_CTRL)
+                       rv |= IBRS_SUPPORTED | IBPB_SUPPORTED;
+               if (p[3] & CPUID_7_0_I3_STIBP)
+                       rv |= STIBP_SUPPORTED;
+
+               /*
+                * 0x80000008 p[1] bit 12 indicates IBPB support
+                *
+                * This bit might be set even though SPEC_CTRL is not set.
+                */
+               p[0] = 0;
+               p[1] = 0;
+               p[2] = 0;
+               p[3] = 0;
+               do_cpuid(0x80000008U, p);
+               if (p[1] & CPUID_INTEL_80000008_I1_IBPB_SUPPORT)
+                       rv |= IBPB_SUPPORTED;
+       } else if (cpu_vendor_id == CPU_VENDOR_AMD) {
+               /*
+                * 0x80000008 p[1] bit 12 indicates IBPB support
+                *            p[1] bit 14 indicates IBRS support
+                *            p[1] bit 15 indicates STIBP support
+                *
+                *            p[1] bit 16 indicates IBRS auto support
+                *            p[1] bit 17 indicates STIBP auto support
+                *            p[1] bit 18 indicates processor prefers using
+                *              IBRS instead of retpoline.
+                */
+               p[0] = 0;
+               p[1] = 0;
+               p[2] = 0;
+               p[3] = 0;
                do_cpuid(0x80000008U, p);
-               if (p[1] & CPUID_80000008_I1_IBPB_SUPPORT)
-                       rv |= 2;
+               if (p[1] & CPUID_AMD_80000008_I1_IBPB_SUPPORT)
+                       rv |= IBPB_SUPPORTED;
+               if (p[1] & CPUID_AMD_80000008_I1_IBRS_SUPPORT)
+                       rv |= IBRS_SUPPORTED;
+               if (p[1] & CPUID_AMD_80000008_I1_STIBP_SUPPORT)
+                       rv |= STIBP_SUPPORTED;
+
+               if (p[1] & CPUID_AMD_80000008_I1_IBRS_AUTO)
+                       rv |= IBRS_AUTO_SUPPORTED;
+               if (p[1] & CPUID_AMD_80000008_I1_STIBP_AUTO)
+                       rv |= STIBP_AUTO_SUPPORTED;
+               if (p[1] & CPUID_AMD_80000008_I1_IBRS_REQUESTED)
+                       rv |= IBRS_PREFERRED_REQUEST;
        }
 
        return rv;
@@ -472,54 +509,22 @@ spectre_check_support(void)
  * Iterate CPUs and adjust MSR for global operations, since
  * the KMMU* code won't do it if spectre_mitigation is 0 or 2.
  */
+#define CHECK(flag)    (spectre_mitigation & spectre_support & (flag))
+
 static
 void
 spectre_sysctl_changed(void)
 {
        globaldata_t save_gd;
        struct trampframe *tr;
+       int spec_ctrl;
+       int mode;
        int n;
 
-       /*
-        * Console message on mitigation mode change
-        */
-       kprintf("machdep.spectre_mitigation=%d: ", spectre_mitigation);
-
-       if (spectre_ibrs_supported == 0) {
-               kprintf("IBRS=NOSUPPORT, ");
-       } else {
-               switch(spectre_mitigation & 3) {
-               case 0:
-                       kprintf("IBRS=0 (disabled), ");
-                       break;
-               case 1:
-                       kprintf("IBRS=1 (kern-only), ");
-                       break;
-               case 2:
-                       kprintf("IBRS=2 (always-on), ");
-                       break;
-               case 3:
-                       kprintf("IBRS=?, ");
-                       break;
-               }
-       }
-
-       if (spectre_ibpb_supported == 0) {
-               kprintf("IBPB=NOSUPPORT\n");
-       } else {
-               switch(spectre_mitigation & 4) {
-               case 0:
-                       kprintf("IBPB=0 (disabled)\n");
-                       break;
-               case 4:
-                       kprintf("IBPB=1 (enabled)\n");
-                       break;
-               }
-       }
-
        /*
         * Fixup state
         */
+       mode = 0;
        save_gd = mycpu;
        for (n = 0; n < ncpus; ++n) {
                lwkt_setcpu_self(globaldata_find(n));
@@ -532,9 +537,8 @@ spectre_sysctl_changed(void)
                 * XXX cleanup, reusing globals inside the loop (they get
                 * set to the same thing each loop)
                 */
-               tr->tr_pcb_gflags &= ~(PCB_IBRS1 | PCB_IBRS2 | PCB_IBPB);
-               spectre_ibrs_mode = 0;
-               spectre_ibpb_mode = 0;
+               tr->tr_pcb_spec_ctrl[0] = 0;    /* kernel entry (idle exit) */
+               tr->tr_pcb_spec_ctrl[1] = 0;    /* kernel exit  (idle entry) */
 
                /*
                 * Don't try to parse if not available
@@ -543,53 +547,108 @@ spectre_sysctl_changed(void)
                        continue;
 
                /*
-                * IBRS mode
+                * IBRS mode.  Auto overrides toggling.
+                *
+                * Only set the ENABLE flag if we have to toggle something
+                * on entry and exit.
                 */
-               switch(spectre_mitigation & 3) {
-               case 0:
-                       /*
-                        * Disable IBRS
-                        *
-                        * Make sure IBRS is turned off in case we were in
-                        * a global mode before.
-                        */
-                       if (spectre_ibrs_supported)
-                               wrmsr(MSR_SPEC_CTRL, 0);
-                       break;
-               case 1:
-                       /*
-                        * IBRS in kernel
-                        */
-                       if (spectre_ibrs_supported) {
-                               tr->tr_pcb_gflags |= PCB_IBRS1;
-                               wrmsr(MSR_SPEC_CTRL, 1);
-                               spectre_ibrs_mode = 1;
-                       }
-                       break;
-               case 2:
-                       /*
-                        * IBRS at all times
-                        */
-                       if (spectre_ibrs_supported) {
-                               tr->tr_pcb_gflags |= PCB_IBRS2;
-                               wrmsr(MSR_SPEC_CTRL, 1);
-                               spectre_ibrs_mode = 2;
-                       }
-                       break;
+               spec_ctrl = 0;
+               if (CHECK(IBRS_AUTO_SUPPORTED)) {
+                       spec_ctrl |= SPEC_CTRL_IBRS;
+                       mode |= IBRS_AUTO_SUPPORTED;
+               } else if (CHECK(IBRS_SUPPORTED)) {
+                       spec_ctrl |= SPEC_CTRL_IBRS | SPEC_CTRL_DUMMY_ENABLE;
+                       mode |= IBRS_SUPPORTED;
+               }
+               if (CHECK(STIBP_AUTO_SUPPORTED)) {
+                       spec_ctrl |= SPEC_CTRL_STIBP;
+                       mode |= STIBP_AUTO_SUPPORTED;
+               } else if (CHECK(STIBP_SUPPORTED)) {
+                       spec_ctrl |= SPEC_CTRL_STIBP | SPEC_CTRL_DUMMY_ENABLE;
+                       mode |= STIBP_SUPPORTED;
                }
 
                /*
-                * IBPB mode
+                * IBPB requested and supported.
                 */
-               if (spectre_mitigation & 4) {
-                       if (spectre_ibpb_supported) {
-                               tr->tr_pcb_gflags |= PCB_IBPB;
-                               spectre_ibpb_mode = 1;
-                       }
+               if (CHECK(IBPB_SUPPORTED)) {
+                       spec_ctrl |= SPEC_CTRL_DUMMY_IBPB;
+                       mode |= IBPB_SUPPORTED;
+               }
+
+               /*
+                * Update the MSR if the cpu supports the modes to ensure
+                * proper disablement if the user disabled the mode.
+                */
+               if (spectre_support & (IBRS_SUPPORTED | IBRS_AUTO_SUPPORTED |
+                                   STIBP_SUPPORTED | STIBP_AUTO_SUPPORTED)) {
+                       wrmsr(MSR_SPEC_CTRL,
+                             spec_ctrl & (SPEC_CTRL_IBRS|SPEC_CTRL_STIBP));
                }
+
+               /*
+                * Update spec_ctrl fields in the trampoline.
+                *
+                * [0] on-kernel-entry (on-idle-exit)
+                * [1] on-kernel-exit  (on-idle-entry)
+                *
+                * When auto mode is supported we leave the bit set, otherwise
+                * we clear the bits.
+                */
+               tr->tr_pcb_spec_ctrl[0] = spec_ctrl;
+               if (CHECK(IBRS_AUTO_SUPPORTED) == 0)
+                       spec_ctrl &= ~SPEC_CTRL_IBRS;
+               if (CHECK(STIBP_AUTO_SUPPORTED) == 0)
+                       spec_ctrl &= ~SPEC_CTRL_STIBP;
+               tr->tr_pcb_spec_ctrl[1] = spec_ctrl;
+
+               /*
+                * Make sure we set this on the first loop.  It will be
+                * the same value on remaining loops.
+                */
+               spectre_mode = mode;
        }
        lwkt_setcpu_self(save_gd);
        cpu_ccfence();
+
+       /*
+        * Console message on mitigation mode change
+        */
+       kprintf("Spectre: support=(");
+       if (spectre_support == 0) {
+               kprintf(" none");
+       } else {
+               if (spectre_support & IBRS_SUPPORTED)
+                       kprintf(" IBRS");
+               if (spectre_support & STIBP_SUPPORTED)
+                       kprintf(" STIBP");
+               if (spectre_support & IBPB_SUPPORTED)
+                       kprintf(" IBPB");
+               if (spectre_support & IBRS_AUTO_SUPPORTED)
+                       kprintf(" IBRS_AUTO");
+               if (spectre_support & STIBP_AUTO_SUPPORTED)
+                       kprintf(" STIBP_AUTO");
+               if (spectre_support & IBRS_PREFERRED_REQUEST)
+                       kprintf(" IBRS_REQUESTED");
+       }
+       kprintf(" ) req=%04x operating=(", (uint16_t)spectre_mitigation);
+       if (spectre_mode == 0) {
+               kprintf(" none");
+       } else {
+               if (spectre_mode & IBRS_SUPPORTED)
+                       kprintf(" IBRS");
+               if (spectre_mode & STIBP_SUPPORTED)
+                       kprintf(" STIBP");
+               if (spectre_mode & IBPB_SUPPORTED)
+                       kprintf(" IBPB");
+               if (spectre_mode & IBRS_AUTO_SUPPORTED)
+                       kprintf(" IBRS_AUTO");
+               if (spectre_mode & STIBP_AUTO_SUPPORTED)
+                       kprintf(" STIBP_AUTO");
+               if (spectre_mode & IBRS_PREFERRED_REQUEST)
+                       kprintf(" IBRS_REQUESTED");
+       }
+       kprintf(" )\n");
 }
 
 /*
@@ -598,21 +657,109 @@ spectre_sysctl_changed(void)
 static int
 sysctl_spectre_mitigation(SYSCTL_HANDLER_ARGS)
 {
+       char buf[128];
+       char *ptr;
+       char *iter;
+       size_t len;
        int spectre;
-       int error;
+       int error = 0;
+       int loop = 0;
+
+       /*
+        * Return current operating mode or support.
+        */
+       if (oidp->oid_kind & CTLFLAG_WR)
+               spectre = spectre_mode;
+       else
+               spectre = spectre_support;
+
+       spectre &= (IBRS_SUPPORTED | IBRS_AUTO_SUPPORTED |
+                   STIBP_SUPPORTED | STIBP_AUTO_SUPPORTED |
+                   IBPB_SUPPORTED);
+       while (spectre) {
+               if (error)
+                       break;
+               if (loop++) {
+                       error = SYSCTL_OUT(req, " ", 1);
+                       if (error)
+                               break;
+               }
+               if (spectre & IBRS_SUPPORTED) {
+                       spectre &= ~IBRS_SUPPORTED;
+                       error = SYSCTL_OUT(req, "IBRS", 4);
+               } else
+               if (spectre & IBRS_AUTO_SUPPORTED) {
+                       spectre &= ~IBRS_AUTO_SUPPORTED;
+                       error = SYSCTL_OUT(req, "IBRS_AUTO", 9);
+               } else
+               if (spectre & STIBP_SUPPORTED) {
+                       spectre &= ~STIBP_SUPPORTED;
+                       error = SYSCTL_OUT(req, "STIBP", 5);
+               } else
+               if (spectre & STIBP_AUTO_SUPPORTED) {
+                       spectre &= ~STIBP_AUTO_SUPPORTED;
+                       error = SYSCTL_OUT(req, "STIBP_AUTO", 10);
+               } else
+               if (spectre & IBPB_SUPPORTED) {
+                       spectre &= ~IBPB_SUPPORTED;
+                       error = SYSCTL_OUT(req, "IBPB", 4);
+               }
+       }
+       if (loop == 0) {
+               error = SYSCTL_OUT(req, "NONE", 4);
+       }
 
-       spectre = spectre_mitigation;
-       error = sysctl_handle_int(oidp, &spectre, 0, req);
        if (error || req->newptr == NULL)
                return error;
-       spectre_mitigation = spectre;
-       spectre_sysctl_changed();
+       if ((oidp->oid_kind & CTLFLAG_WR) == 0)
+               return error;
 
-       return 0;
+       /*
+        * Change current operating mode
+        */
+       len = req->newlen - req->newidx;
+       if (len >= sizeof(buf)) {
+               error = EINVAL;
+               len = 0;
+       } else {
+               error = SYSCTL_IN(req, buf, len);
+       }
+       buf[len] = 0;
+       iter = &buf[0];
+       spectre = 0;
+
+       while (error == 0 && iter) {
+               ptr = strsep(&iter, " ,\t\r\n");
+               if (*ptr == 0)
+                       continue;
+               if (strcasecmp(ptr, "NONE") == 0)
+                       spectre |= 0;
+               else if (strcasecmp(ptr, "IBRS") == 0)
+                       spectre |= IBRS_SUPPORTED;
+               else if (strcasecmp(ptr, "IBRS_AUTO") == 0)
+                       spectre |= IBRS_AUTO_SUPPORTED;
+               else if (strcasecmp(ptr, "STIBP") == 0)
+                       spectre |= STIBP_SUPPORTED;
+               else if (strcasecmp(ptr, "STIBP_AUTO") == 0)
+                       spectre |= STIBP_AUTO_SUPPORTED;
+               else if (strcasecmp(ptr, "IBPB") == 0)
+                       spectre |= IBPB_SUPPORTED;
+               else
+                       error = ENOENT;
+       }
+       if (error == 0) {
+               spectre_mitigation = spectre;
+               spectre_sysctl_changed();
+       }
+       return error;
 }
 
-SYSCTL_PROC(_machdep, OID_AUTO, spectre_mitigation, CTLTYPE_INT | CTLFLAG_RW,
-       0, 0, sysctl_spectre_mitigation, "I", "Spectre exploit mitigation");
+SYSCTL_PROC(_machdep, OID_AUTO, spectre_mitigation,
+       CTLTYPE_STRING | CTLFLAG_RW,
+       0, 0, sysctl_spectre_mitigation, "A", "Spectre exploit mitigation");
+SYSCTL_PROC(_machdep, OID_AUTO, spectre_support,
+       CTLTYPE_STRING | CTLFLAG_RD,
+       0, 0, sysctl_spectre_mitigation, "A", "Spectre supported features");
 
 /*
  * NOTE: Called at SI_BOOT2_MACHDEP and also when the microcode is
@@ -668,26 +815,14 @@ spectre_vm_setup(void *arg)
         */
        if (inconsistent) {
                spectre_mitigation = -1;
-               spectre_ibrs_supported = 0;
-               spectre_ibpb_supported = 0;
+               spectre_support = 0;
                return;
        }
 
        /*
         * IBRS support
         */
-       if (supmask & 1)
-               spectre_ibrs_supported = 1;
-       else
-               spectre_ibrs_supported = 0;
-
-       /*
-        * IBPB support.
-        */
-       if (supmask & 2)
-               spectre_ibpb_supported = 1;
-       else
-               spectre_ibpb_supported = 0;
+       spectre_support = supmask;
 
        /*
         * Enable spectre_mitigation, set defaults if -1, adjust
@@ -696,20 +831,35 @@ spectre_vm_setup(void *arg)
         * NOTE!  We do not enable IBPB for user->kernel transitions
         *        by default, so this code is commented out for now.
         */
-       if (spectre_ibrs_supported || spectre_ibpb_supported) {
+       if (spectre_support) {
                if (spectre_mitigation < 0) {
                        spectre_mitigation = 0;
-                       if (spectre_ibrs_supported)
-                               spectre_mitigation |= 1;
-#if 0
-                       if (spectre_ibpb_supported)
-                               spectre_mitigation |= 4;
-#endif
+
+                       /*
+                        * IBRS toggling not currently recommended as a
+                        * default.
+                        */
+                       if (spectre_support & IBRS_AUTO_SUPPORTED)
+                               spectre_mitigation |= IBRS_AUTO_SUPPORTED;
+                       else if (spectre_support & IBRS_SUPPORTED)
+                               spectre_mitigation |= 0;
+
+                       /*
+                        * STIBP toggling not currently recommended as a
+                        * default.
+                        */
+                       if (spectre_support & STIBP_AUTO_SUPPORTED)
+                               spectre_mitigation |= STIBP_AUTO_SUPPORTED;
+                       else if (spectre_support & STIBP_SUPPORTED)
+                               spectre_mitigation |= 0;
+
+                       /*
+                        * IBPB adds enormous (~2uS) overhead to system
+                        * calls etc, we do not enable it by default.
+                        */
+                       if (spectre_support & IBPB_SUPPORTED)
+                               spectre_mitigation |= 0;
                }
-               if (spectre_ibrs_supported == 0)
-                       spectre_mitigation &= ~3;
-               if (spectre_ibpb_supported == 0)
-                       spectre_mitigation &= ~4;
        } else {
                spectre_mitigation = -1;
        }