kernel - Implement spectre mitigations part 1
authorMatthew Dillon <dillon@apollo.backplane.com>
Wed, 10 Jan 2018 05:36:18 +0000 (21:36 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Wed, 10 Jan 2018 05:36:18 +0000 (21:36 -0800)
* Implement machdep.spectre_mitigation.  This can be set as a tunable
  or sysctl'd later.  The tunable is only applicable if the BIOS has
  the appropriate microcode, otherwise you have to update the microcode
  first and then use sysctl to set the mode.

  This works similarly to Linux's IBRS support.

  mode 0 - Spectre IBPB MSRs disabled

  mode 1 - Sets IBPB MSR on USER->KERN transition and clear it
   on KERN->USER.

  mode 2 - Leave IBPB set globally.  Do not toggle on USER->KERN or
   KERN->USER transitions.

* Retest spectre microcode MSRs on microcode update.

* Spectre mode 1 is enabled by default if the microcode supports it.
  (we might change this to disabled by default, I'm still mulling it
  over).

* General performance effects (not counting the MMU separation mode,
  which is machdep.meltdown_mitigation and adds another 3% in overhead):

  Skylake loses around 5% for mode 1 and 12% for mode 2, verses mode 0.
  Haswell loses around 12% for mode 1 and 53% for mode 2, verses mode 0.

  Add another 3% if MMU separation is also turned on (aka
  machdep.meltdown_mitigation).

* General system call overhead effects on Skylake:

  machdep.meltdown_mitigation=0, machdep.spectre_mitigation=0 103ns
  machdep.meltdown_mitigation=1, machdep.spectre_mitigation=0 360ns
  machdep.meltdown_mitigation=1, machdep.spectre_mitigation=1 848ns
  machdep.meltdown_mitigation=1, machdep.spectre_mitigation=2 404ns

  Note that mode 1 has better overall performance for mixed user+kernel
  workloads despite having a much higher system call overhead, whereas
  mode 2 has lower system call overhead but generally lower overall
  performance because IBPB is enabled in usermode.

sys/cpu/x86_64/include/asmacros.h
sys/cpu/x86_64/include/frame.h
sys/dev/misc/cpuctl/cpuctl.c
sys/platform/pc64/include/pcb.h
sys/platform/pc64/x86_64/exception.S
sys/platform/pc64/x86_64/genassym.c
sys/platform/pc64/x86_64/machdep.c
sys/platform/pc64/x86_64/vm_machdep.c

index 6156ccb..53cd2d0 100644 (file)
  */
 #define KMMUENTER_TFRIP                                                        \
        subq    $TR_RIP, %rsp ;                                         \
-       movq    %r10, TR_R10(%rsp) ;                                    \
-       movq    %r11, TR_R11(%rsp) ;                                    \
+       movq    %rcx, TR_RCX(%rsp) ;                                    \
+       movq    %rdx, TR_RDX(%rsp) ;                                    \
        testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
        je      40f ;                                                   \
-       movq    PCPU(trampoline)+TR_PCB_CR3,%r10 ;                      \
-       movq    %r10,%cr3 ;                                             \
-40:                                                                    \
-       movq    %rsp, %r10 ;            /* trampoline rsp */            \
+       movq    PCPU(trampoline)+TR_PCB_CR3,%rcx ;                      \
+       movq    %rcx,%cr3 ;                                             \
+40:    testq   $PCB_IBRS1,PCPU(trampoline)+TR_PCB_GFLAGS ;     \
+       je      41f ;                                                   \
+       movq    %rax, TR_RAX(%rsp) ;                                    \
+       movl    $0x48,%ecx ;                                            \
+       movl    $1,%eax ;                                               \
+       xorl    %edx,%edx ;                                             \
+       wrmsr ;                                                         \
+       movq    TR_RAX(%rsp), %rax ;                                    \
+41:                                                                    \
+       movq    %rsp, %rcx ;            /* trampoline rsp */            \
        movq    PCPU(trampoline)+TR_PCB_RSP,%rsp ; /* kstack rsp */     \
-       movq    TR_SS(%r10), %r11 ;                                     \
-       pushq   %r11 ;                                                  \
-       movq    TR_RSP(%r10), %r11 ;                                    \
-       pushq   %r11 ;                                                  \
-       movq    TR_RFLAGS(%r10), %r11 ;                                 \
-       pushq   %r11 ;                                                  \
-       movq    TR_CS(%r10), %r11 ;                                     \
-       pushq   %r11 ;                                                  \
-       movq    TR_RIP(%r10), %r11 ;                                    \
-       pushq   %r11 ;                                                  \
-       movq    TR_R11(%r10), %r11 ;                                    \
-       movq    TR_R10(%r10), %r10                                      \
+       movq    TR_SS(%rcx), %rdx ;                                     \
+       pushq   %rdx ;                                                  \
+       movq    TR_RSP(%rcx), %rdx ;                                    \
+       pushq   %rdx ;                                                  \
+       movq    TR_RFLAGS(%rcx), %rdx ;                                 \
+       pushq   %rdx ;                                                  \
+       movq    TR_CS(%rcx), %rdx ;                                     \
+       pushq   %rdx ;                                                  \
+       movq    TR_RIP(%rcx), %rdx ;                                    \
+       pushq   %rdx ;                                                  \
+       movq    TR_RDX(%rcx), %rdx ;                                    \
+       movq    TR_RCX(%rcx), %rcx                                      \
 
 #define KMMUENTER_TFERR                                                        \
        subq    $TR_ERR, %rsp ;                                         \
-       movq    %r10, TR_R10(%rsp) ;                                    \
-       movq    %r11, TR_R11(%rsp) ;                                    \
+       movq    %rcx, TR_RCX(%rsp) ;                                    \
+       movq    %rdx, TR_RDX(%rsp) ;                                    \
        testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
        je      40f ;                                                   \
-       movq    PCPU(trampoline)+TR_PCB_CR3,%r10 ;                      \
-       movq    %r10,%cr3 ;                                             \
-40:                                                                    \
-       movq    %rsp, %r10 ;            /* trampoline rsp */            \
+       movq    PCPU(trampoline)+TR_PCB_CR3,%rcx ;                      \
+       movq    %rcx,%cr3 ;                                             \
+40:    testq   $PCB_IBRS1,PCPU(trampoline)+TR_PCB_GFLAGS ;     \
+       je      41f ;                                                   \
+       movq    %rax, TR_RAX(%rsp) ;                                    \
+       movl    $0x48,%ecx ;                                            \
+       movl    $1,%eax ;                                               \
+       xorl    %edx,%edx ;                                             \
+       wrmsr ;                                                         \
+       movq    TR_RAX(%rsp), %rax ;                                    \
+41:                                                                    \
+       movq    %rsp, %rcx ;            /* trampoline rsp */            \
        movq    PCPU(trampoline)+TR_PCB_RSP,%rsp ; /* kstack rsp */     \
-       movq    TR_SS(%r10), %r11 ;                                     \
-       pushq   %r11 ;                                                  \
-       movq    TR_RSP(%r10), %r11 ;                                    \
-       pushq   %r11 ;                                                  \
-       movq    TR_RFLAGS(%r10), %r11 ;                                 \
-       pushq   %r11 ;                                                  \
-       movq    TR_CS(%r10), %r11 ;                                     \
-       pushq   %r11 ;                                                  \
-       movq    TR_RIP(%r10), %r11 ;                                    \
-       pushq   %r11 ;                                                  \
-       movq    TR_ERR(%r10), %r11 ;                                    \
-       pushq   %r11 ;                                                  \
-       movq    TR_R11(%r10), %r11 ;                                    \
-       movq    TR_R10(%r10), %r10                                      \
+       movq    TR_SS(%rcx), %rdx ;                                     \
+       pushq   %rdx ;                                                  \
+       movq    TR_RSP(%rcx), %rdx ;                                    \
+       pushq   %rdx ;                                                  \
+       movq    TR_RFLAGS(%rcx), %rdx ;                                 \
+       pushq   %rdx ;                                                  \
+       movq    TR_CS(%rcx), %rdx ;                                     \
+       pushq   %rdx ;                                                  \
+       movq    TR_RIP(%rcx), %rdx ;                                    \
+       pushq   %rdx ;                                                  \
+       movq    TR_ERR(%rcx), %rdx ;                                    \
+       pushq   %rdx ;                                                  \
+       movq    TR_RDX(%rcx), %rdx ;                                    \
+       movq    TR_RCX(%rcx), %rcx                                      \
 
 #define KMMUENTER_TFERR_SAVECR2                                                \
        subq    $TR_ERR, %rsp ;                                         \
-       movq    %r10, TR_R10(%rsp) ;                                    \
-       movq    %r11, TR_R11(%rsp) ;                                    \
-       movq    %cr2, %r10 ;                                            \
-       movq    %r10, PCPU(trampoline)+TR_CR2 ;                         \
+       movq    %rcx, TR_RCX(%rsp) ;                                    \
+       movq    %rdx, TR_RDX(%rsp) ;                                    \
+       movq    %cr2, %rcx ;                                            \
+       movq    %rcx, PCPU(trampoline)+TR_CR2 ;                         \
        testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
        je      40f ;                                                   \
-       movq    PCPU(trampoline)+TR_PCB_CR3,%r10 ;                      \
-       movq    %r10,%cr3 ;                                             \
-40:                                                                    \
-       movq    %rsp, %r10 ;            /* trampoline rsp */            \
+       movq    PCPU(trampoline)+TR_PCB_CR3,%rcx ;                      \
+       movq    %rcx,%cr3 ;                                             \
+40:    testq   $PCB_IBRS1,PCPU(trampoline)+TR_PCB_GFLAGS ;     \
+       je      41f ;                                                   \
+       movq    %rax, TR_RAX(%rsp) ;                                    \
+       movl    $0x48,%ecx ;                                            \
+       movl    $1,%eax ;                                               \
+       xorl    %edx,%edx ;                                             \
+       wrmsr ;                                                         \
+       movq    TR_RAX(%rsp), %rax ;                                    \
+41:                                                                    \
+       movq    %rsp, %rcx ;            /* trampoline rsp */            \
        movq    PCPU(trampoline)+TR_PCB_RSP,%rsp ; /* kstack rsp */     \
-       movq    TR_SS(%r10), %r11 ;                                     \
-       pushq   %r11 ;                                                  \
-       movq    TR_RSP(%r10), %r11 ;                                    \
-       pushq   %r11 ;                                                  \
-       movq    TR_RFLAGS(%r10), %r11 ;                                 \
-       pushq   %r11 ;                                                  \
-       movq    TR_CS(%r10), %r11 ;                                     \
-       pushq   %r11 ;                                                  \
-       movq    TR_RIP(%r10), %r11 ;                                    \
-       pushq   %r11 ;                                                  \
-       movq    TR_ERR(%r10), %r11 ;                                    \
-       pushq   %r11 ;                                                  \
-       movq    TR_R11(%r10), %r11 ;                                    \
-       movq    TR_R10(%r10), %r10                                      \
+       movq    TR_SS(%rcx), %rdx ;                                     \
+       pushq   %rdx ;                                                  \
+       movq    TR_RSP(%rcx), %rdx ;                                    \
+       pushq   %rdx ;                                                  \
+       movq    TR_RFLAGS(%rcx), %rdx ;                                 \
+       pushq   %rdx ;                                                  \
+       movq    TR_CS(%rcx), %rdx ;                                     \
+       pushq   %rdx ;                                                  \
+       movq    TR_RIP(%rcx), %rdx ;                                    \
+       pushq   %rdx ;                                                  \
+       movq    TR_ERR(%rcx), %rdx ;                                    \
+       pushq   %rdx ;                                                  \
+       movq    TR_RDX(%rcx), %rdx ;                                    \
+       movq    TR_RCX(%rcx), %rcx                                      \
 
 /*
  * Set %cr3 if necessary on syscall entry.  No registers may be
  * disturbed.
+ *
+ * NOTE: TR_RCX is used by the caller, we cannot use it here
  */
 #define KMMUENTER_SYSCALL                                              \
        testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
        je      40f ;                                                   \
-       pushq   %r10 ;                                                  \
-       movq    PCPU(trampoline)+TR_PCB_CR3,%r10 ;                      \
-       movq    %r10,%cr3 ;                                             \
-       popq    %r10 ;                                                  \
-40:                                                                    \
+       pushq   %rcx ;                                                  \
+       movq    PCPU(trampoline)+TR_PCB_CR3,%rcx ;                      \
+       movq    %rcx,%cr3 ;                                             \
+       popq    %rcx ;                                                  \
+40:    testq   $PCB_IBRS1,PCPU(trampoline)+TR_PCB_GFLAGS ;     \
+       je      41f ;                                                   \
+       pushq   %rax ;                                                  \
+       pushq   %rcx ;                                                  \
+       pushq   %rdx ;                                                  \
+       movl    $0x48,%ecx ;                                            \
+       movl    $1,%eax ;                                               \
+       xorl    %edx,%edx ;                                             \
+       wrmsr ;                                                         \
+       popq    %rdx ;                                                  \
+       popq    %rcx ;                                                  \
+       popq    %rax ;                                                  \
+41:                                                                    \
 
 /*
  * We are positioned at the base of the trapframe.  Advance the trapframe
  */
 #define KMMUEXIT                                                       \
        addq    $TF_RIP,%rsp ;                                          \
-       testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
+       testq   $PCB_IBRS1,PCPU(trampoline)+TR_PCB_GFLAGS ;             \
+       je      41f ;                                                   \
+       movq    %rax, PCPU(trampoline)+TR_RAX ;                         \
+       movq    %rcx, PCPU(trampoline)+TR_RCX ;                         \
+       movq    %rdx, PCPU(trampoline)+TR_RDX ;                         \
+       movl    $0x48,%ecx ;                                            \
+       movl    $0,%eax ;                                               \
+       xorl    %edx,%edx ;                                             \
+       wrmsr ;                                                         \
+       movq    PCPU(trampoline)+TR_RDX, %rdx ;                         \
+       movq    PCPU(trampoline)+TR_RCX, %rcx ;                         \
+       movq    PCPU(trampoline)+TR_RAX, %rax ;                         \
+41:    testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
        je      40f ;                                                   \
-       movq    %r11, PCPU(trampoline)+TR_ERR ; /* save in TR_ERR */    \
-       popq    %r11 ;                          /* copy %rip */         \
-       movq    %r11, PCPU(trampoline)+TR_RIP ;                         \
-       popq    %r11 ;                          /* copy %cs */          \
-       movq    %r11, PCPU(trampoline)+TR_CS ;                          \
-       popq    %r11 ;                          /* copy %rflags */      \
-       movq    %r11, PCPU(trampoline)+TR_RFLAGS ;                      \
-       popq    %r11 ;                          /* copy %rsp */         \
-       movq    %r11, PCPU(trampoline)+TR_RSP ;                         \
-       popq    %r11 ;                          /* copy %ss */          \
-       movq    %r11, PCPU(trampoline)+TR_SS ;                          \
-       movq    %gs:0,%r11 ;                                            \
-       addq    $GD_TRAMPOLINE+TR_ERR,%r11 ;                            \
-       movq    %r11,%rsp ;                                             \
-       movq    PCPU(trampoline)+TR_PCB_CR3_ISO,%r11 ;                  \
-       movq    %r11,%cr3 ;                                             \
-       popq    %r11 ;          /* positioned at TR_RIP after this */   \
+       movq    %rcx, PCPU(trampoline)+TR_ERR ; /* save in TR_ERR */    \
+       popq    %rcx ;                          /* copy %rip */         \
+       movq    %rcx, PCPU(trampoline)+TR_RIP ;                         \
+       popq    %rcx ;                          /* copy %cs */          \
+       movq    %rcx, PCPU(trampoline)+TR_CS ;                          \
+       popq    %rcx ;                          /* copy %rflags */      \
+       movq    %rcx, PCPU(trampoline)+TR_RFLAGS ;                      \
+       popq    %rcx ;                          /* copy %rsp */         \
+       movq    %rcx, PCPU(trampoline)+TR_RSP ;                         \
+       popq    %rcx ;                          /* copy %ss */          \
+       movq    %rcx, PCPU(trampoline)+TR_SS ;                          \
+       movq    %gs:0,%rcx ;                                            \
+       addq    $GD_TRAMPOLINE+TR_ERR,%rcx ;                            \
+       movq    %rcx,%rsp ;                                             \
+       movq    PCPU(trampoline)+TR_PCB_CR3_ISO,%rcx ;                  \
+       movq    %rcx,%cr3 ;                                             \
+       popq    %rcx ;          /* positioned at TR_RIP after this */   \
 40:                                                                    \
 
 /*
  * point.  We still have the kernel %gs.
  */
 #define KMMUEXIT_SYSCALL                                               \
-       testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
+       testq   $PCB_IBRS1,PCPU(trampoline)+TR_PCB_GFLAGS ;             \
+       je      41f ;                                                   \
+       movq    %rax, PCPU(trampoline)+TR_RAX ;                         \
+       movq    %rcx, PCPU(trampoline)+TR_RCX ;                         \
+       movq    %rdx, PCPU(trampoline)+TR_RDX ;                         \
+       movl    $0x48,%ecx ;                                            \
+       movl    $0,%eax ;                                               \
+       xorl    %edx,%edx ;                                             \
+       wrmsr ;                                                         \
+       movq    PCPU(trampoline)+TR_RDX, %rdx ;                         \
+       movq    PCPU(trampoline)+TR_RCX, %rcx ;                         \
+       movq    PCPU(trampoline)+TR_RAX, %rax ;                         \
+41:    testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
        je      40f ;                                                   \
-       movq    %r10, PCPU(trampoline)+TR_R10 ;                         \
-       movq    PCPU(trampoline)+TR_PCB_CR3_ISO,%r10 ;                  \
-       movq    %r10,%cr3 ;                                             \
-       movq    PCPU(trampoline)+TR_R10, %r10 ;                         \
+       movq    %rcx, PCPU(trampoline)+TR_RCX ;                         \
+       movq    PCPU(trampoline)+TR_PCB_CR3_ISO,%rcx ;                  \
+       movq    %rcx,%cr3 ;                                             \
+       movq    PCPU(trampoline)+TR_RCX, %rcx ;                         \
 40:                                                                    \
 
 /*
index 1b15f50..49a753d 100644 (file)
@@ -124,10 +124,10 @@ struct intrframe {
  * of the isolated user pmap.
  */
 struct trampframe {
-       register_t      tr_unused01;
        register_t      tr_cr2;
-       register_t      tr_r10;
-       register_t      tr_r11;
+       register_t      tr_rax;
+       register_t      tr_rcx;
+       register_t      tr_rdx;
        register_t      tr_err;
        register_t      tr_rip;
        register_t      tr_cs;
@@ -144,6 +144,8 @@ struct trampframe {
        register_t      tr_pcb_flags;   /* copy of pcb control flags */
        register_t      tr_pcb_cr3_iso; /* copy of isolated pml4e */
        register_t      tr_pcb_cr3;     /* copy of primary pml4e */
+       register_t      tr_pcb_gflags;  /* global flags (IBRS support) */
+       register_t      tr_pcb_unused01;
 };
 
 int    kdb_trap(int, int, struct trapframe *);
index 5760df3..27a8dd3 100644 (file)
@@ -216,6 +216,8 @@ cpuctl_do_msr(int cpu, cpuctl_msr_args_t *data, u_long cmd)
 /*
  * Actually perform microcode update.
  */
+extern void spectre_vm_setup(void *arg);
+
 static int
 cpuctl_do_update(int cpu, cpuctl_update_args_t *data)
 {
@@ -242,6 +244,10 @@ cpuctl_do_update(int cpu, cpuctl_update_args_t *data)
                ret = update_via(cpu, data);
        else
                ret = ENXIO;
+
+       if (ret == 0)
+               spectre_vm_setup((void *)(intptr_t)1);
+
        return (ret);
 }
 
index 6264e32..104aa8c 100644 (file)
@@ -87,6 +87,8 @@ struct pcb {
 #define        PCB_DBREGS      0x00000002      /* process using debug registers */
 #define        PCB_FPUINITDONE 0x00000008      /* fpu state is initialized */
 #define PCB_ISOMMU     0x00000010      /* isolated mmu context active */
+#define PCB_IBRS1      0x00000020      /* IBRS mode 1 (kernel only) */
+#define PCB_IBRS2      0x00000040      /* IBRS mode 2 (always) */
 #define FP_SOFTFP       0x01           /* process using soft flt emulator */
 #define        FP_VIRTFP       0x04            /* vkernel wants exception */
 
index 51ba1c3..8a9376c 100644 (file)
@@ -241,11 +241,11 @@ prot_normal:
  * pointer.  We have to juggle a few things around to find our stack etc.
  * swapgs gives us access to our PCPU space only.
  *
- * We use GD_TRAMPOLINE+TR_R10
+ * We use GD_TRAMPOLINE+TR_RCX
  */
 IDTVEC(fast_syscall)
        swapgs                                  /* get kernel %gs */
-       movq    %rsp,PCPU(trampoline)+TR_R10    /* save user %rsp */
+       movq    %rsp,PCPU(trampoline)+TR_RCX    /* save user %rsp */
        movq    PCPU(common_tss)+TSS_RSP0,%rsp
        KMMUENTER_SYSCALL
        movq    PCPU(trampoline)+TR_PCB_RSP,%rsp
@@ -255,7 +255,7 @@ IDTVEC(fast_syscall)
        /* defer TF_RSP till we have a spare register */
        movq    %r11,TF_RFLAGS(%rsp)
        movq    %rcx,TF_RIP(%rsp)       /* %rcx original value is in %r10 */
-       movq    PCPU(trampoline)+TR_R10,%r11    /* %r11 already saved */
+       movq    PCPU(trampoline)+TR_RCX,%r11    /* %r11 already saved */
        movq    %r11,TF_RSP(%rsp)       /* user stack pointer */
        orl     $RQF_QUICKRET,PCPU(reqflags)
        movq    $KUDSEL,TF_SS(%rsp)
index 8f29ec5..bed0c41 100644 (file)
@@ -137,6 +137,8 @@ ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
 
 ASSYM(PCB_DBREGS, PCB_DBREGS);
 ASSYM(PCB_ISOMMU, PCB_ISOMMU);
+ASSYM(PCB_IBRS1, PCB_IBRS1);
+ASSYM(PCB_IBRS2, PCB_IBRS2);
 
 ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
 ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
@@ -211,8 +213,9 @@ ASSYM(GD_USER_GS, offsetof(struct mdglobaldata, gd_user_gs));
 ASSYM(GD_INTR_NESTING_LEVEL, offsetof(struct mdglobaldata, mi.gd_intr_nesting_level));
 
 ASSYM(TR_CR2, offsetof(struct trampframe, tr_cr2));
-ASSYM(TR_R10, offsetof(struct trampframe, tr_r10));
-ASSYM(TR_R11, offsetof(struct trampframe, tr_r11));
+ASSYM(TR_RAX, offsetof(struct trampframe, tr_rax));
+ASSYM(TR_RCX, offsetof(struct trampframe, tr_rcx));
+ASSYM(TR_RDX, offsetof(struct trampframe, tr_rdx));
 ASSYM(TR_ERR, offsetof(struct trampframe, tr_err));
 ASSYM(TR_RIP, offsetof(struct trampframe, tr_rip));
 ASSYM(TR_CS, offsetof(struct trampframe, tr_cs));
@@ -223,6 +226,7 @@ ASSYM(TR_PCB_RSP, offsetof(struct trampframe, tr_pcb_rsp));
 ASSYM(TR_PCB_FLAGS, offsetof(struct trampframe, tr_pcb_flags));
 ASSYM(TR_PCB_CR3_ISO, offsetof(struct trampframe, tr_pcb_cr3_iso));
 ASSYM(TR_PCB_CR3, offsetof(struct trampframe, tr_pcb_cr3));
+ASSYM(TR_PCB_GFLAGS, offsetof(struct trampframe, tr_pcb_gflags));
 
 ASSYM(GD_IPENDING, offsetof(struct mdglobaldata, gd_ipending));
 ASSYM(GD_SPENDING, offsetof(struct mdglobaldata, gd_spending));
index c9c500b..c5ad4b2 100644 (file)
@@ -177,8 +177,8 @@ SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count,
 #endif
 SYSCTL_INT(_hw, OID_AUTO, cpu_mwait_halt,
        CTLFLAG_RD, &cpu_mwait_halt_global, 0, "");
-SYSCTL_INT(_hw, OID_AUTO, cpu_mwait_spin, CTLFLAG_RD, &cpu_mwait_spin, 0,
-    "monitor/mwait target state");
+SYSCTL_INT(_hw, OID_AUTO, cpu_mwait_spin,
+       CTLFLAG_RD, &cpu_mwait_spin, 0, "monitor/mwait target state");
 
 #define CPU_MWAIT_HAS_CX       \
        ((cpu_feature2 & CPUID2_MON) && \
@@ -1217,19 +1217,37 @@ cpu_idle(void)
                }
                ++stat->repeat_last;
 
+               /*
+                * General idle thread halt code
+                *
+                * IBRS NOTES - IBRS is a SPECTRE mitigation.  When going
+                *              idle, IBRS
+                */
                ++gd->gd_idle_repeat;
                reqflags = gd->gd_reqflags;
                quick = (cpu_idle_hlt == 1) ||
                        (cpu_idle_hlt == 2 &&
                         gd->gd_idle_repeat < cpu_idle_repeat);
 
+
                if (quick && (cpu_mi_feature & CPU_MI_MONITOR) &&
                    (reqflags & RQF_IDLECHECK_WK_MASK) == 0) {
+                       /*
+                        * MWAIT halt
+                        */
                        splz(); /* XXX */
                        crit_enter_gd(gd);
                        ATOMIC_CPUMASK_ORBIT(smp_idleinvl_mask, gd->gd_cpuid);
+                       if (pscpu->trampoline.tr_pcb_gflags &
+                           (PCB_IBRS1 | PCB_IBRS2)) {
+                               wrmsr(0x48, 0); /* IBRS (spectre) */
+                       }
                        cpu_mmw_pause_int(&gd->gd_reqflags, reqflags,
-                           cpu_mwait_cx_hint(stat), 0);
+                                         cpu_mwait_cx_hint(stat), 0);
+                       if (pscpu->trampoline.tr_pcb_gflags &
+                           (PCB_IBRS1 | PCB_IBRS2)) {
+                               wrmsr(0x48, 1); /* IBRS (spectre) */
+                       }
                        stat->halt++;
                        ATOMIC_CPUMASK_NANDBIT(smp_idleinvl_mask, gd->gd_cpuid);
                        if (ATOMIC_CPUMASK_TESTANDCLR(smp_idleinvl_reqs,
@@ -1239,6 +1257,9 @@ cpu_idle(void)
                        }
                        crit_exit_gd(gd);
                } else if (cpu_idle_hlt) {
+                       /*
+                        * Idle halt
+                        */
                        __asm __volatile("cli");
                        splz();
                        crit_enter_gd(gd);
@@ -1247,9 +1268,27 @@ cpu_idle(void)
                                if (cpu_idle_hlt == 5) {
                                        __asm __volatile("sti");
                                } else if (quick || cpu_idle_hlt == 4) {
+                                       if (pscpu->trampoline.tr_pcb_gflags &
+                                           (PCB_IBRS1 | PCB_IBRS2)) {
+                                               /* IBRS (spectre) */
+                                               wrmsr(0x48, 0);
+                                       }
                                        cpu_idle_default_hook();
+                                       if (pscpu->trampoline.tr_pcb_gflags &
+                                           (PCB_IBRS1 | PCB_IBRS2)) {
+                                               /* IBRS (spectre) */
+                                               wrmsr(0x48, 1);
+                                       }
                                } else {
+                                       if (pscpu->trampoline.tr_pcb_gflags &
+                                           (PCB_IBRS1 | PCB_IBRS2)) {
+                                               wrmsr(0x48, 0);
+                                       }
                                        cpu_idle_hook();
+                                       if (pscpu->trampoline.tr_pcb_gflags &
+                                           (PCB_IBRS1 | PCB_IBRS2)) {
+                                               wrmsr(0x48, 1);
+                                       }
                                }
                        }
                        __asm __volatile("sti");
index 0b65759..1e4e557 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (c) 1982, 1986 The Regents of the University of California.
  * Copyright (c) 1989, 1990 William Jolitz
  * Copyright (c) 1994 John Dyson
- * Copyright (c) 2008 The DragonFly Project.
+ * Copyright (c) 2008-2018 The DragonFly Project.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
@@ -79,6 +79,9 @@
 #include <bus/isa/isa.h>
 
 static void    cpu_reset_real (void);
+
+int spectre_mitigation = -1;
+
 /*
  * Finish a fork operation, with lwp lp2 nearly set up.
  * Copy and update the pcb, set up the stack so that the child
@@ -159,6 +162,24 @@ cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags)
                pcb2->pcb_flags &= ~PCB_ISOMMU;
                pcb2->pcb_cr3_iso = 0;
        }
+
+#if 0
+       /*
+        * Per-process spectre mitigation (future)
+        */
+       pcb2->pcb_flags &= ~(PCB_IBRS1 | PCB_IBRS2);
+       switch (spectre_mitigation) {
+       case 1:
+               pcb2->pcb_flags |= PCB_IBRS1;
+               break;
+       case 2:
+               pcb2->pcb_flags |= PCB_IBRS2;
+               break;
+       default:
+               break;
+       }
+#endif
+
        pcb2->pcb_rbx = (unsigned long)fork_return;     /* fork_trampoline argument */
        pcb2->pcb_rbp = 0;
        pcb2->pcb_rsp = (unsigned long)lp2->lwp_md.md_regs - sizeof(void *);
@@ -379,7 +400,193 @@ swi_vm_setup(void *arg)
        register_swi_mp(SWI_VM, swi_vm, NULL, "swi_vm", NULL, 0);
 }
 
-SYSINIT(vm_setup, SI_BOOT2_MACHDEP, SI_ORDER_ANY, swi_vm_setup, NULL);
+SYSINIT(swi_vm_setup, SI_BOOT2_MACHDEP, SI_ORDER_ANY, swi_vm_setup, NULL);
+
+/*
+ * NOTE: This routine is also called after a successful microcode
+ *      reload on cpu 0.
+ */
+void spectre_vm_setup(void *arg);
+
+/*
+ * Check for IBRS support
+ */
+static
+int
+spectre_check_support(void)
+{
+       uint32_t p[4];
+
+       p[0] = 0;
+       p[1] = 0;
+       p[2] = 0;
+       p[3] = 0;
+       cpuid_count(7, 0, p);
+       if ((p[3] & 0x0C000000U) == 0x0C000000U) {
+
+               /*
+                * SPEC_CTRL (bit 26) and STIBP support (bit 27)
+                *
+                * 0x80000008 p[0] bit 12 indicates IBPB support
+                */
+               p[0] = 0;
+               p[1] = 0;
+               p[2] = 0;
+               p[3] = 0;
+               do_cpuid(0x80000008U, p);
+               if (p[0] & 0x00001000)
+                       return 1;
+       }
+       return 0;
+}
+
+/*
+ * Iterate CPUs and adjust MSR for global operations, since
+ * the KMMU* code won't do it if spectre_mitigation is 0 or 2.
+ */
+static
+void
+spectre_sysctl_changed(int old_value)
+{
+       globaldata_t save_gd;
+       int n;
+
+       save_gd = mycpu;
+       for (n = 0; n < ncpus; ++n) {
+               lwkt_setcpu_self(globaldata_find(n));
+
+               pscpu->trampoline.tr_pcb_gflags &= ~(PCB_IBRS1 | PCB_IBRS2);
+
+               switch(spectre_mitigation) {
+               case 0:
+                       if (old_value >= 0)
+                               wrmsr(0x48, 0);
+                       break;
+               case 1:
+                       pscpu->trampoline.tr_pcb_gflags |= PCB_IBRS1;
+                       wrmsr(0x48, 1);
+                       break;
+               case 2:
+                       pscpu->trampoline.tr_pcb_gflags |= PCB_IBRS2;
+                       wrmsr(0x48, 1);
+                       break;
+               }
+       }
+       if (save_gd != mycpu)
+               lwkt_setcpu_self(save_gd);
+}
+
+/*
+ * User changes sysctl value
+ */
+static int
+sysctl_spectre_mitigation(SYSCTL_HANDLER_ARGS)
+{
+       int new_spectre;
+       int old_spectre;
+       int error;
+
+       old_spectre = spectre_mitigation;
+       new_spectre = old_spectre;
+       error = sysctl_handle_int(oidp, &new_spectre, 0, req);
+       if (error || req->newptr == NULL)
+               return error;
+       spectre_mitigation = new_spectre;
+       spectre_sysctl_changed(old_spectre);
+
+       return 0;
+}
+
+SYSCTL_PROC(_machdep, OID_AUTO, spectre_mitigation, CTLTYPE_INT | CTLFLAG_RW,
+       0, 0, sysctl_spectre_mitigation, "I", "Spectre exploit mitigation");
+
+void
+spectre_vm_setup(void *arg)
+{
+       int inconsistent = 0;
+       int old_value = spectre_mitigation;
+
+       if (spectre_mitigation < 0) {
+               TUNABLE_INT_FETCH("machdep.spectre_mitigation",
+                                 &spectre_mitigation);
+       }
+
+       if (cpu_vendor_id == CPU_VENDOR_INTEL) {
+               if (spectre_check_support()) {
+                       /*
+                        * Must be supported on all cpus before we
+                        * can enable it.  Returns silently if it
+                        * isn't.
+                        *
+                        * NOTE! arg != NULL indicates we were called
+                        *       from cpuctl after a successful microcode
+                        *       update.
+                        */
+                       if (arg != NULL) {
+                               globaldata_t save_gd;
+                               int n;
+
+                               save_gd = mycpu;
+                               for (n = 0; n < ncpus; ++n) {
+                                       lwkt_setcpu_self(globaldata_find(n));
+                                       if (spectre_check_support() == 0) {
+                                               inconsistent = 1;
+                                               break;
+                                       }
+                               }
+                               if (save_gd != mycpu)
+                                       lwkt_setcpu_self(save_gd);
+                       }
+                       if (inconsistent == 0) {
+                               if (spectre_mitigation < 0)
+                                       spectre_mitigation = 1;
+                       } else {
+                               spectre_mitigation = -1;
+                       }
+               } else {
+                       spectre_mitigation = -1;
+               }
+       } else {
+                spectre_mitigation = -1;                /* no support */
+       }
+
+       /*
+        * Be silent while microcode is being loaded on various CPUs,
+        * until all done.
+        */
+       if (inconsistent)
+               return;
+
+       /*
+        * Disallow sysctl changes when there is no support (otherwise
+        * the wrmsr will cause a protection fault).
+        */
+       switch(spectre_mitigation) {
+       case 0:
+               sysctl___machdep_spectre_mitigation.oid_kind |= CTLFLAG_WR;
+               kprintf("machdep.spectre_mitigation available but disabled\n");
+               break;
+       case 1:
+               sysctl___machdep_spectre_mitigation.oid_kind |= CTLFLAG_WR;
+               kprintf("machdep.spectre_mitigation available, system call\n"
+                       "performance and kernel operation will be impacted\n");
+               break;
+       case 2:
+               sysctl___machdep_spectre_mitigation.oid_kind |= CTLFLAG_WR;
+               kprintf("machdep.spectre_mitigation available, whole machine\n"
+                       "performance will be impacted\n");
+               break;
+       default:
+               sysctl___machdep_spectre_mitigation.oid_kind &= ~CTLFLAG_WR;
+               if (cpu_vendor_id == CPU_VENDOR_INTEL)
+                       kprintf("no microcode spectre mitigation available\n");
+               break;
+       }
+       spectre_sysctl_changed(old_value);
+}
+
+SYSINIT(spectre_vm_setup, SI_BOOT2_MACHDEP, SI_ORDER_ANY,
+       spectre_vm_setup, NULL);
 
 /*
  * platform-specific vmspace initialization (nothing for x86_64)