kernel - Add MDS mitigation support for Intel side-channel attack
authorMatthew Dillon <dillon@apollo.backplane.com>
Wed, 15 May 2019 00:33:39 +0000 (17:33 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Wed, 15 May 2019 00:46:34 +0000 (17:46 -0700)
* Add MDS (Microarchitectural Data Sampling) attack mitigation to
  the kernel.  This is an attack against Intel CPUs made from 2011
  to date.  The attack is not currently known to work against AMD CPUs.

  With an intel microcode update the mitigation can be enabled with

  sysctl machdep.mds_mitigation=MD_CLEAR

* Without the intel microcode update, only disabling hyper-threading
  gives you any protection.  Older architectures might not get
  support.  If sysctl machdep.mds_support does not show support,
  then the currently loaded microcode does not have support for the
  feature.

* DragonFlyBSD only supports the MD_CLEAR mode, and it will only
  be available with a microcode update from Intel.

  Updating the microcode alone does not protect against the attack.
  The microcode must be updated AND the mode must be turned on in
  DragonFlyBSD to protect against the attack.

  This mitigation burns around 250nS of additional latency on kernel->user
  transitions (system calls and interrupts primarily).  The additional
  latency will not be present if the microcode has support but it is disabled
  in the kernel, so you should be able to safely update your microcode
  even if you do not intend to use the mitigation.

* It is unclear whether the microcode + mitigation completely protects
  the machine.  The attack is supposedly a sibling hyper-thread
  attack and it may be that the only way to completely protect your
  machine is to disable hyper-threading entirely.  Or buy AMD.

Templated-from: NetBSD

sys/cpu/x86_64/include/asmacros.h
sys/cpu/x86_64/include/specialreg.h
sys/dev/misc/cpuctl/cpuctl.c
sys/platform/pc64/include/pcb.h
sys/platform/pc64/x86_64/genassym.c
sys/platform/pc64/x86_64/vm_machdep.c

index ef8c8ab..00c65f5 100644 (file)
        movq    PCPU(trampoline)+TR_RCX, %rcx                           \
 
 /*
- * KMMUEXIT_CORE handles IBRS and STIBP, but not ISOMMU
+ * KMMUEXIT_CORE handles IBRS, STIBP, and MDS, but not ISOMMU
  *
  * We don't re-execute the IBPB barrier on exit atm.
+ *
+ * The MDS barrier (Microarchitectural Data Sampling) should be executed
+ * prior to any return to user-mode, if supported and enabled.  This is
+ * Intel-only.
+ *
+ * WARNING! %rsp may not be usable (it could be pointing to the user
+ *         stack at this point).  And we must save/restore any registers
+ *         we use.
  */
 #define KMMUEXIT_CORE                                                  \
-       testq   $SPEC_CTRL_DUMMY_ENABLE,PCPU(trampoline)+TR_PCB_SPEC_CTRL+4 ; \
-       je      41f ;                                                   \
+       testl   $SPEC_CTRL_DUMMY_ENABLE|SPEC_CTRL_MDS_ENABLE, PCPU(trampoline)+TR_PCB_SPEC_CTRL+4 ; \
+       je      43f ;                                                   \
        movq    %rax, PCPU(trampoline)+TR_RAX ;                         \
+       movl    PCPU(trampoline)+TR_PCB_SPEC_CTRL+4, %eax ;             \
+       testq   $SPEC_CTRL_MDS_ENABLE, %rax ;                           \
+       je      41f ;                                                   \
+       movq    $GSEL(GDATA_SEL, SEL_KPL), PCPU(trampoline)+TR_RCX ;    \
+       verw    PCPU(trampoline)+TR_RCX ;                               \
+41:    testq   $SPEC_CTRL_DUMMY_ENABLE, %rax ;                         \
+       je      42f ;                                                   \
        movq    %rcx, PCPU(trampoline)+TR_RCX ;                         \
        movq    %rdx, PCPU(trampoline)+TR_RDX ;                         \
-       movl    PCPU(trampoline)+TR_PCB_SPEC_CTRL+4, %eax ;             \
        andq    $SPEC_CTRL_IBRS|SPEC_CTRL_STIBP, %rax ;                 \
        movq    $MSR_SPEC_CTRL,%rcx ;                                   \
        xorl    %edx,%edx ;                                             \
        wrmsr ;                                                         \
        movq    PCPU(trampoline)+TR_RDX, %rdx ;                         \
        movq    PCPU(trampoline)+TR_RCX, %rcx ;                         \
-       movq    PCPU(trampoline)+TR_RAX, %rax ;                         \
-41:
+42:    movq    PCPU(trampoline)+TR_RAX, %rax ;                         \
+43:
 
 /*
  * We are positioned at the base of the trapframe.  Advance the trapframe
        addq    $TF_RIP,%rsp ;                                          \
        KMMUEXIT_CORE ;                                                 \
        testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
-       je      40f ;                                                   \
+       je      50f ;                                                   \
        movq    %rcx, PCPU(trampoline)+TR_ERR ; /* save in TR_ERR */    \
        popq    %rcx ;                          /* copy %rip */         \
        movq    %rcx, PCPU(trampoline)+TR_RIP ;                         \
        movq    PCPU(trampoline)+TR_PCB_CR3_ISO,%rcx ;                  \
        movq    %rcx,%cr3 ;                                             \
        popq    %rcx ;          /* positioned at TR_RIP after this */   \
-40:                                                                    \
+50:                                                                    \
 
 /*
  * Warning: user stack pointer already loaded into %rsp at this
 #define KMMUEXIT_SYSCALL                                               \
        KMMUEXIT_CORE ;                                                 \
        testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
-       je      40f ;                                                   \
+       je      50f ;                                                   \
        movq    %rcx, PCPU(trampoline)+TR_RCX ;                         \
        movq    PCPU(trampoline)+TR_PCB_CR3_ISO,%rcx ;                  \
        movq    %rcx,%cr3 ;                                             \
        movq    PCPU(trampoline)+TR_RCX, %rcx ;                         \
-40:                                                                    \
+50:                                                                    \
 
 /*
  * Macros to create and destroy a trap frame.  rsp has already been shifted
index 5b6f898..5f03bbf 100644 (file)
 #define        MSR_PERFCTR1            0x0c2
 #define        MSR_IA32_EXT_CONFIG     0x0ee   /* Undocumented. Core Solo/Duo only */
 #define        MSR_MTRRcap             0x0fe
+#define MSR_IA32_ARCH_CAPABILITIES 0x10a
 #define        MSR_BBL_CR_ADDR         0x116
 #define        MSR_BBL_CR_DECC         0x118
 #define        MSR_BBL_CR_CTL          0x119
 #define CPUID_AMD_80000008_I1_STIBP_AUTO       0x00020000
 #define CPUID_AMD_80000008_I1_IBRS_REQUESTED   0x00040000
 
+/*
+ * MDS mitigation in microcode (Intel only) in EDX (index 3)
+ */
+#define CPUID_SEF_AVX512_4VNNIW                0x00000004
+#define CPUID_SEF_AVX512_4FMAPS                0x00000008
+#define CPUID_SEF_MD_CLEAR             0x00000400
+#define CPUID_SEF_TSX_FORCE_ABORT      0x00002000
+
+#define CPUID_SEF_ARCH_CAP             0x20000000
+
+/*
+ * MSR_IA32_ARCH_CAPABILITIES
+ */
+#define IA32_ARCH_SSB_NO               0x10
+#define IA32_ARCH_MDS_NO               0x20
+
 /*
  * PAT modes.
  */
index 27a8dd3..dbd3418 100644 (file)
@@ -216,7 +216,7 @@ cpuctl_do_msr(int cpu, cpuctl_msr_args_t *data, u_long cmd)
 /*
  * Actually perform microcode update.
  */
-extern void spectre_vm_setup(void *arg);
+extern void mitigation_vm_setup(void *arg);
 
 static int
 cpuctl_do_update(int cpu, cpuctl_update_args_t *data)
@@ -246,7 +246,7 @@ cpuctl_do_update(int cpu, cpuctl_update_args_t *data)
                ret = ENXIO;
 
        if (ret == 0)
-               spectre_vm_setup((void *)(intptr_t)1);
+               mitigation_vm_setup((void *)(intptr_t)1);
 
        return (ret);
 }
index ce0c172..01a4d4d 100644 (file)
@@ -93,6 +93,7 @@ struct pcb {
 
 #define SPEC_CTRL_DUMMY_IBPB   SPEC_CTRL_DUMMY1
 #define SPEC_CTRL_DUMMY_ENABLE SPEC_CTRL_DUMMY2
+#define SPEC_CTRL_MDS_ENABLE   SPEC_CTRL_DUMMY3
 
 #ifdef _KERNEL
 void   savectx(struct pcb *);
index f9a1248..68efc7f 100644 (file)
@@ -139,6 +139,7 @@ ASSYM(SPEC_CTRL_STIBP, SPEC_CTRL_STIBP);
 #endif
 ASSYM(SPEC_CTRL_DUMMY_IBPB, SPEC_CTRL_DUMMY_IBPB);
 ASSYM(SPEC_CTRL_DUMMY_ENABLE, SPEC_CTRL_DUMMY_ENABLE);
+ASSYM(SPEC_CTRL_MDS_ENABLE, SPEC_CTRL_MDS_ENABLE);
 
 ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
 ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
index 6031888..3be09c7 100644 (file)
@@ -83,11 +83,16 @@ static void cpu_reset_real (void);
 
 static int spectre_mitigation = -1;
 static int spectre_support = 0;
-
 static int spectre_mode = 0;
 SYSCTL_INT(_machdep, OID_AUTO, spectre_mode, CTLFLAG_RD,
        &spectre_mode, 0, "current Spectre enablements");
 
+static int mds_mitigation = -1;
+static int mds_support = 0;
+static int mds_mode = 0;
+SYSCTL_INT(_machdep, OID_AUTO, mds_mode, CTLFLAG_RD,
+       &mds_mode, 0, "current MDS enablements");
+
 /*
  * Finish a fork operation, with lwp lp2 nearly set up.
  * Copy and update the pcb, set up the stack so that the child
@@ -412,7 +417,7 @@ SYSINIT(swi_vm_setup, SI_BOOT2_MACHDEP, SI_ORDER_ANY, swi_vm_setup, NULL);
  * NOTE: This routine is also called after a successful microcode
  *      reload on cpu 0.
  */
-void spectre_vm_setup(void *arg);
+void mitigation_vm_setup(void *arg);
 
 /*
  * Check for IBPB and IBRS support
@@ -518,9 +523,14 @@ spectre_sysctl_changed(void)
        globaldata_t save_gd;
        struct trampframe *tr;
        int spec_ctrl;
+       int spec_mask;
        int mode;
        int n;
 
+
+       spec_mask = SPEC_CTRL_IBRS | SPEC_CTRL_STIBP |
+                   SPEC_CTRL_DUMMY_ENABLE | SPEC_CTRL_DUMMY_IBPB;
+
        /*
         * Fixup state
         */
@@ -536,9 +546,12 @@ spectre_sysctl_changed(void)
                 *
                 * XXX cleanup, reusing globals inside the loop (they get
                 * set to the same thing each loop)
+                *
+                * [0] kernel entry (idle exit)
+                * [1] kernel exit  (idle entry)
                 */
-               tr->tr_pcb_spec_ctrl[0] = 0;    /* kernel entry (idle exit) */
-               tr->tr_pcb_spec_ctrl[1] = 0;    /* kernel exit  (idle entry) */
+               tr->tr_pcb_spec_ctrl[0] &= ~spec_mask;
+               tr->tr_pcb_spec_ctrl[1] &= ~spec_mask;
 
                /*
                 * Don't try to parse if not available
@@ -595,12 +608,12 @@ spectre_sysctl_changed(void)
                 * When auto mode is supported we leave the bit set, otherwise
                 * we clear the bits.
                 */
-               tr->tr_pcb_spec_ctrl[0] = spec_ctrl;
+               tr->tr_pcb_spec_ctrl[0] |= spec_ctrl;
                if (CHECK(IBRS_AUTO_SUPPORTED) == 0)
                        spec_ctrl &= ~SPEC_CTRL_IBRS;
                if (CHECK(STIBP_AUTO_SUPPORTED) == 0)
                        spec_ctrl &= ~SPEC_CTRL_STIBP;
-               tr->tr_pcb_spec_ctrl[1] = spec_ctrl;
+               tr->tr_pcb_spec_ctrl[1] |= spec_ctrl;
 
                /*
                 * Make sure we set this on the first loop.  It will be
@@ -651,6 +664,8 @@ spectre_sysctl_changed(void)
        kprintf(" )\n");
 }
 
+#undef CHECK
+
 /*
  * User changes sysctl value
  */
@@ -766,7 +781,7 @@ SYSCTL_PROC(_machdep, OID_AUTO, spectre_support,
  *      updated.  Microcode updates must be applied to all cpus
  *      for support to be recognized.
  */
-void
+static void
 spectre_vm_setup(void *arg)
 {
        int inconsistent = 0;
@@ -876,8 +891,382 @@ spectre_vm_setup(void *arg)
        spectre_sysctl_changed();
 }
 
-SYSINIT(spectre_vm_setup, SI_BOOT2_MACHDEP, SI_ORDER_ANY,
-       spectre_vm_setup, NULL);
+#define MDS_AVX512_4VNNIW_SUPPORTED    0x0001
+#define MDS_AVX512_4FMAPS_SUPPORTED    0x0002
+#define MDS_MD_CLEAR_SUPPORTED         0x0004
+#define MDS_TSX_FORCE_ABORT_SUPPORTED  0x0008
+#define MDS_NOT_REQUIRED               0x8000
+
+static
+int
+mds_check_support(void)
+{
+       uint64_t msr;
+       uint32_t p[4];
+       int rv = 0;
+
+       /*
+        * MDS mitigation hw bits
+        *
+        * MD_CLEAR     Use microcode-supported verf insn.  This is the
+        *              only mode we really support.
+        */
+       if (cpu_vendor_id == CPU_VENDOR_INTEL) {
+               p[0] = 0;
+               p[1] = 0;
+               p[2] = 0;
+               p[3] = 0;
+               cpuid_count(7, 0, p);
+               if (p[3] & CPUID_SEF_ARCH_CAP) {
+                       msr = rdmsr(MSR_IA32_ARCH_CAPABILITIES);
+                       if (msr & IA32_ARCH_MDS_NO)
+                               rv = MDS_NOT_REQUIRED;
+               }
+               if (p[3] & CPUID_SEF_AVX512_4VNNIW)
+                       rv |= MDS_AVX512_4VNNIW_SUPPORTED;
+               if (p[3] & CPUID_SEF_AVX512_4FMAPS)
+                       rv |= MDS_AVX512_4FMAPS_SUPPORTED;
+               if (p[3] & CPUID_SEF_MD_CLEAR)
+                       rv |= MDS_MD_CLEAR_SUPPORTED;
+               if (p[3] & CPUID_SEF_TSX_FORCE_ABORT)
+                       rv |= MDS_TSX_FORCE_ABORT_SUPPORTED;
+       } else {
+               rv = MDS_NOT_REQUIRED;
+       }
+
+       return rv;
+}
+
+/*
+ * Iterate CPUs and adjust MSR for global operations, since
+ * the KMMU* code won't do it if spectre_mitigation is 0 or 2.
+ */
+#define CHECK(flag)    (mds_mitigation & mds_support & (flag))
+
+static
+void
+mds_sysctl_changed(void)
+{
+       globaldata_t save_gd;
+       struct trampframe *tr;
+       int spec_ctrl;
+       int spec_mask;
+       int mode;
+       int n;
+
+       spec_mask = SPEC_CTRL_MDS_ENABLE;
+
+       /*
+        * Fixup state
+        */
+       mode = 0;
+       save_gd = mycpu;
+       for (n = 0; n < ncpus; ++n) {
+               lwkt_setcpu_self(globaldata_find(n));
+               cpu_ccfence();
+               tr = &pscpu->trampoline;
+
+               /*
+                * Make sure we are cleaned out.
+                *
+                * XXX cleanup, reusing globals inside the loop (they get
+                * set to the same thing each loop)
+                *
+                * [0] kernel entry (idle exit)
+                * [1] kernel exit  (idle entry)
+                */
+               tr->tr_pcb_spec_ctrl[0] &= ~spec_mask;
+               tr->tr_pcb_spec_ctrl[1] &= ~spec_mask;
+
+               /*
+                * Don't try to parse if not available
+                */
+               if (mds_mitigation < 0)
+                       continue;
+
+               spec_ctrl = 0;
+               if (CHECK(MDS_MD_CLEAR_SUPPORTED)) {
+                       spec_ctrl |= SPEC_CTRL_MDS_ENABLE;
+                       mode |= MDS_MD_CLEAR_SUPPORTED;
+               }
+
+               /*
+                * Update spec_ctrl fields in the trampoline.
+                *
+                * [0] on-kernel-entry (on-idle-exit)
+                * [1] on-kernel-exit  (on-idle-entry)
+                *
+                * The MDS stuff is only needed on kernel-exit or idle-entry
+                */
+               /* tr->tr_pcb_spec_ctrl[0] |= spec_ctrl; */
+               tr->tr_pcb_spec_ctrl[1] |= spec_ctrl;
+
+               /*
+                * Make sure we set this on the first loop.  It will be
+                * the same value on remaining loops.
+                */
+               mds_mode = mode;
+       }
+       lwkt_setcpu_self(save_gd);
+       cpu_ccfence();
+
+       /*
+        * Console message on mitigation mode change
+        */
+       kprintf("MDS: support=(");
+       if (mds_support == 0) {
+               kprintf(" none");
+       } else {
+               if (mds_support & MDS_AVX512_4VNNIW_SUPPORTED)
+                       kprintf(" AVX512_4VNNIW");
+               if (mds_support & MDS_AVX512_4FMAPS_SUPPORTED)
+                       kprintf(" AVX512_4FMAPS");
+               if (mds_support & MDS_MD_CLEAR_SUPPORTED)
+                       kprintf(" MD_CLEAR");
+               if (mds_support & MDS_TSX_FORCE_ABORT_SUPPORTED)
+                       kprintf(" TSX_FORCE_ABORT");
+               if (mds_support & MDS_NOT_REQUIRED)
+                       kprintf(" MDS_NOT_REQUIRED");
+       }
+       kprintf(" ) req=%04x operating=(", (uint16_t)mds_mitigation);
+       if (mds_mode == 0) {
+               kprintf(" none");
+       } else {
+               if (mds_mode & MDS_AVX512_4VNNIW_SUPPORTED)
+                       kprintf(" AVX512_4VNNIW");
+               if (mds_mode & MDS_AVX512_4FMAPS_SUPPORTED)
+                       kprintf(" AVX512_4FMAPS");
+               if (mds_mode & MDS_MD_CLEAR_SUPPORTED)
+                       kprintf(" MD_CLEAR");
+               if (mds_mode & MDS_TSX_FORCE_ABORT_SUPPORTED)
+                       kprintf(" TSX_FORCE_ABORT");
+               if (mds_mode & MDS_NOT_REQUIRED)
+                       kprintf(" MDS_NOT_REQUIRED");
+       }
+       kprintf(" )\n");
+}
+
+#undef CHECK
+
+/*
+ * User changes sysctl value
+ */
+static int
+sysctl_mds_mitigation(SYSCTL_HANDLER_ARGS)
+{
+       char buf[128];
+       char *ptr;
+       char *iter;
+       size_t len;
+       int mds;
+       int error = 0;
+       int loop = 0;
+
+       /*
+        * Return current operating mode or support.
+        */
+       if (oidp->oid_kind & CTLFLAG_WR)
+               mds = mds_mode;
+       else
+               mds = mds_support;
+
+       mds &= MDS_AVX512_4VNNIW_SUPPORTED |
+              MDS_AVX512_4FMAPS_SUPPORTED |
+              MDS_MD_CLEAR_SUPPORTED |
+              MDS_TSX_FORCE_ABORT_SUPPORTED |
+              MDS_NOT_REQUIRED;
+
+       while (mds) {
+               if (error)
+                       break;
+               if (loop++) {
+                       error = SYSCTL_OUT(req, " ", 1);
+                       if (error)
+                               break;
+               }
+               if (mds & MDS_AVX512_4VNNIW_SUPPORTED) {
+                       mds &= ~MDS_AVX512_4VNNIW_SUPPORTED;
+                       error = SYSCTL_OUT(req, "AVX512_4VNNIW", 13);
+               } else
+               if (mds & MDS_AVX512_4FMAPS_SUPPORTED) {
+                       mds &= ~MDS_AVX512_4FMAPS_SUPPORTED;
+                       error = SYSCTL_OUT(req, "AVX512_4FMAPS", 13);
+               } else
+               if (mds & MDS_MD_CLEAR_SUPPORTED) {
+                       mds &= ~MDS_MD_CLEAR_SUPPORTED;
+                       error = SYSCTL_OUT(req, "MD_CLEAR", 8);
+               } else
+               if (mds & MDS_TSX_FORCE_ABORT_SUPPORTED) {
+                       mds &= ~MDS_TSX_FORCE_ABORT_SUPPORTED;
+                       error = SYSCTL_OUT(req, "TSX_FORCE_ABORT", 15);
+               } else
+               if (mds & MDS_NOT_REQUIRED) {
+                       mds &= ~MDS_NOT_REQUIRED;
+                       error = SYSCTL_OUT(req, "MDS_NOT_REQUIRED", 16);
+               }
+       }
+       if (loop == 0) {
+               error = SYSCTL_OUT(req, "NONE", 4);
+       }
+
+       if (error || req->newptr == NULL)
+               return error;
+       if ((oidp->oid_kind & CTLFLAG_WR) == 0)
+               return error;
+
+       /*
+        * Change current operating mode
+        */
+       len = req->newlen - req->newidx;
+       if (len >= sizeof(buf)) {
+               error = EINVAL;
+               len = 0;
+       } else {
+               error = SYSCTL_IN(req, buf, len);
+       }
+       buf[len] = 0;
+       iter = &buf[0];
+       mds = 0;
+
+       while (error == 0 && iter) {
+               ptr = strsep(&iter, " ,\t\r\n");
+               if (*ptr == 0)
+                       continue;
+               if (strcasecmp(ptr, "NONE") == 0)
+                       mds |= 0;
+               else if (strcasecmp(ptr, "AVX512_4VNNIW") == 0)
+                       mds |= MDS_AVX512_4VNNIW_SUPPORTED;
+               else if (strcasecmp(ptr, "AVX512_4FMAPS") == 0)
+                       mds |= MDS_AVX512_4FMAPS_SUPPORTED;
+               else if (strcasecmp(ptr, "MD_CLEAR") == 0)
+                       mds |= MDS_MD_CLEAR_SUPPORTED;
+               else if (strcasecmp(ptr, "TSX_FORCE_ABORT") == 0)
+                       mds |= MDS_TSX_FORCE_ABORT_SUPPORTED;
+               else if (strcasecmp(ptr, "MDS_NOT_REQUIRED") == 0)
+                       mds |= MDS_NOT_REQUIRED;
+               else
+                       error = ENOENT;
+       }
+       if (error == 0) {
+               mds_mitigation = mds;
+               mds_sysctl_changed();
+       }
+       return error;
+}
+
+SYSCTL_PROC(_machdep, OID_AUTO, mds_mitigation,
+       CTLTYPE_STRING | CTLFLAG_RW,
+       0, 0, sysctl_mds_mitigation, "A", "MDS exploit mitigation");
+SYSCTL_PROC(_machdep, OID_AUTO, mds_support,
+       CTLTYPE_STRING | CTLFLAG_RD,
+       0, 0, sysctl_mds_mitigation, "A", "MDS supported features");
+
+/*
+ * NOTE: Called at SI_BOOT2_MACHDEP and also when the microcode is
+ *      updated.  Microcode updates must be applied to all cpus
+ *      for support to be recognized.
+ */
+static void
+mds_vm_setup(void *arg)
+{
+       int inconsistent = 0;
+       int supmask;
+
+       /*
+        * Fetch tunable in auto mode
+        */
+       if (mds_mitigation < 0) {
+               TUNABLE_INT_FETCH("machdep.mds_mitigation", &mds_mitigation);
+       }
+
+       if ((supmask = mds_check_support()) != 0) {
+               /*
+                * Must be supported on all cpus before we
+                * can enable it.  Returns silently if it
+                * isn't.
+                *
+                * NOTE! arg != NULL indicates we were called
+                *       from cpuctl after a successful microcode
+                *       update.
+                */
+               if (arg != NULL) {
+                       globaldata_t save_gd;
+                       int n;
+
+                       save_gd = mycpu;
+                       for (n = 0; n < ncpus; ++n) {
+                               lwkt_setcpu_self(globaldata_find(n));
+                               cpu_ccfence();
+                               if (mds_check_support() != supmask) {
+                                       inconsistent = 1;
+                                       break;
+                               }
+                       }
+                       lwkt_setcpu_self(save_gd);
+                       cpu_ccfence();
+               }
+       }
+
+       /*
+        * Be silent while microcode is being loaded on various CPUs,
+        * until all done.
+        */
+       if (inconsistent) {
+               mds_mitigation = -1;
+               mds_support = 0;
+               return;
+       }
+
+       /*
+        * IBRS support
+        */
+       mds_support = supmask;
+
+       /*
+        * Enable mds_mitigation, set defaults if -1, adjust
+        * tuned value according to support if not.
+        *
+        * NOTE!  MDS is not enabled by default.
+        */
+       if (mds_support) {
+               if (mds_mitigation < 0) {
+                       mds_mitigation = 0;
+
+                       if ((mds_support & MDS_NOT_REQUIRED) == 0 &&
+                           (mds_support & MDS_MD_CLEAR_SUPPORTED)) {
+                               /* mds_mitigation |= MDS_MD_CLEAR_SUPPORTED; */
+                       }
+               }
+       } else {
+               mds_mitigation = -1;
+       }
+
+       /*
+        * Disallow sysctl changes when there is no support (otherwise
+        * the wrmsr will cause a protection fault).
+        */
+       if (mds_mitigation < 0)
+               sysctl___machdep_mds_mitigation.oid_kind &= ~CTLFLAG_WR;
+       else
+               sysctl___machdep_mds_mitigation.oid_kind |= CTLFLAG_WR;
+
+       mds_sysctl_changed();
+}
+
+/*
+ * NOTE: Called at SI_BOOT2_MACHDEP and also when the microcode is
+ *      updated.  Microcode updates must be applied to all cpus
+ *      for support to be recognized.
+ */
+void
+mitigation_vm_setup(void *arg)
+{
+       spectre_vm_setup(arg);
+       mds_vm_setup(arg);
+}
+
+SYSINIT(mitigation_vm_setup, SI_BOOT2_MACHDEP, SI_ORDER_ANY,
+       mitigation_vm_setup, NULL);
 
 /*
  * platform-specific vmspace initialization (nothing for x86_64)