kernel - Add MDS mitigation support for Intel side-channel attack
[dragonfly.git] / sys / cpu / x86_64 / include / asmacros.h
1 /*
2  * Copyright (c) 1993 The Regents of the University of California.
3  * Copyright (c) 2008 The DragonFly Project.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. Neither the name of the University nor the names of its contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  * $FreeBSD: src/sys/amd64/include/asmacros.h,v 1.32 2006/10/28 06:04:29 bde Exp $
31  */
32
33 #ifndef _CPU_ASMACROS_H_
34 #define _CPU_ASMACROS_H_
35
36 #include <sys/cdefs.h>
37 #include <machine/specialreg.h>
38
39 /* XXX too much duplication in various asm*.h's. */
40
41 /*
42  * CNAME is used to manage the relationship between symbol names in C
43  * and the equivalent assembly language names.  CNAME is given a name as
44  * it would be used in a C program.  It expands to the equivalent assembly
45  * language name.
46  */
47 #define CNAME(csym)             csym
48
49 #define ALIGN_DATA      .p2align 3      /* 8 byte alignment, zero filled */
50 #define ALIGN_TEXT      .p2align 4,0x90 /* 16-byte alignment, nop filled */
51 #define SUPERALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
52
53 #define GEN_ENTRY(name)         ALIGN_TEXT; .globl CNAME(name); \
54                                 .type CNAME(name),@function; CNAME(name):
55 #define NON_GPROF_ENTRY(name)   GEN_ENTRY(name)
56 #define NON_GPROF_RET           .byte 0xc3      /* opcode for `ret' */
57
58 #define END(name)               .size name, . - name
59
60 /*
61  * ALTENTRY() has to align because it is before a corresponding ENTRY().
62  * ENTRY() has to align to because there may be no ALTENTRY() before it.
63  * If there is a previous ALTENTRY() then the alignment code for ENTRY()
64  * is empty.
65  */
66 #define ALTENTRY(name)          GEN_ENTRY(name)
67 #define CROSSJUMP(jtrue, label, jfalse) jtrue label
68 #define CROSSJUMPTARGET(label)
69 #define ENTRY(name)             GEN_ENTRY(name)
70 #define FAKE_MCOUNT(caller)
71 #define MCOUNT
72 #define MCOUNT_LABEL(name)
73 #define MEXITCOUNT
74
75 #ifdef LOCORE
76 /*
77  * Convenience macro for declaring interrupt entry points.
78  */
79 #define IDTVEC(name)    ALIGN_TEXT; .globl __CONCAT(X,name); \
80                         .type __CONCAT(X,name),@function; __CONCAT(X,name):
81
82 /*
83  * stack frame macro support - supports mmu isolation, swapgs, and
84  * stack frame pushing and popping.
85  */
86
87 /*
88  * Kernel pmap isolation to work-around the massive Intel mmu bug
89  * that allows kernel memory to be sussed out due to speculative memory
90  * reads and instruction execution creating timing differences that can
91  * be detected by userland.  e.g. force speculative read, speculatively
92  * execute a cmp/branch sequence, detect timing.  Iterate cmp $values
93  * to suss-out content of speculatively read kernel memory.
94  *
95  * We do this by creating a trampoline area for all user->kernel and
96  * kernel->user transitions.  The trampoline area allows us to limit
97  * the reach the kernel map in the isolated version of the user pmap
98  * to JUST the trampoline area (for all cpus), tss, and vector area.
99  *
100  * It is very important that these transitions not access any memory
101  * outside of the trampoline page while the isolated user process pmap
102  * is active in %cr3.
103  *
104  * The trampoline does not add much overhead when pmap isolation is
105  * disabled, so we just run with it regardless.  Of course, when pmap
106  * isolation is enabled, the %cr3 loads add 150-250ns to every system
107  * call as well as (without PCID) smash the TLB.
108  *
109  * KMMUENTER -  Executed by the trampoline when a user->kernel transition
110  *              is detected.  The stack pointer points into the pcpu
111  *              trampoline space and is available for register save/restore.
112  *              Other registers have not yet been saved.  %gs points at
113  *              the kernel pcpu structure.
114  *
115  *              Caller has already determined that a transition is in
116  *              progress and has already issued the swapgs.  hwtf indicates
117  *              how much hardware has already pushed.
118  *
119  * KMMUEXIT  -  Executed when a kernel->user transition is made.  The stack
120  *              pointer points into the pcpu trampoline space and we are
121  *              almost ready to iretq.  %gs still points at the kernel pcpu
122  *              structure.
123  *
124  *              Caller has already determined that a transition is in
125  *              progress.  hwtf indicates how much hardware has already
126  *              pushed.
127  */
128
129 /*
130  * KMMUENTER_CORE - Handles ISOMMU, IBRS, and IBPB.  Caller has already
131  *                  saved %rcx and %rdx.  We have to deal with %rax.
132  *
133  *                  XXX If IBPB is not supported, try to clear the
134  *                  call return hw cache w/ many x chained call sequence?
135  *
136  * NOTE - IBRS2 - We are leaving IBRS on full-time.  However, Intel
137  *                believes it is not safe unless the MSR is poked on each
138  *                user->kernel transition, so poke the MSR for both IBRS1
139  *                and IBRS2.
140  */
141 #define KMMUENTER_CORE                                                  \
142         testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
143         je      40f ;                                                   \
144         movq    PCPU(trampoline)+TR_PCB_CR3,%rcx ;                      \
145         movq    %rcx,%cr3 ;                                             \
146 40:     movl    PCPU(trampoline)+TR_PCB_SPEC_CTRL,%edx ;                \
147         testq   %rdx, %rdx ;                                            \
148         je      43f ;                                                   \
149         movq    %rax, PCPU(trampoline)+TR_RAX ;                         \
150         testq   $SPEC_CTRL_DUMMY_ENABLE,%rdx ;                          \
151         je      41f ;                                                   \
152         movq    %rdx, %rax ;                                            \
153         andq    $SPEC_CTRL_IBRS|SPEC_CTRL_STIBP, %rax ;                 \
154         movq    $MSR_SPEC_CTRL,%rcx ;                                   \
155         xorl    %edx,%edx ;                                             \
156         wrmsr ;                                                         \
157         movl    PCPU(trampoline)+TR_PCB_SPEC_CTRL,%edx ;                \
158 41:     testq   $SPEC_CTRL_DUMMY_IBPB,%rdx ;                            \
159         je      42f ;                                                   \
160         movl    $MSR_PRED_CMD,%ecx ;                                    \
161         movl    $1,%eax ;                                               \
162         xorl    %edx,%edx ;                                             \
163         wrmsr ;                                                         \
164 42:     movq    PCPU(trampoline)+TR_RAX, %rax ;                         \
165 43:                                                                     \
166
167 /*
168  * Enter with trampoline, hardware pushed up to %rip
169  */
170 #define KMMUENTER_TFRIP                                                 \
171         subq    $TR_RIP, %rsp ;                                         \
172         movq    %rcx, TR_RCX(%rsp) ;                                    \
173         movq    %rdx, TR_RDX(%rsp) ;                                    \
174         KMMUENTER_CORE ;                                                \
175         movq    %rsp, %rcx ;            /* trampoline rsp */            \
176         movq    PCPU(trampoline)+TR_PCB_RSP,%rsp ; /* kstack rsp */     \
177         movq    TR_SS(%rcx), %rdx ;                                     \
178         pushq   %rdx ;                                                  \
179         movq    TR_RSP(%rcx), %rdx ;                                    \
180         pushq   %rdx ;                                                  \
181         movq    TR_RFLAGS(%rcx), %rdx ;                                 \
182         pushq   %rdx ;                                                  \
183         movq    TR_CS(%rcx), %rdx ;                                     \
184         pushq   %rdx ;                                                  \
185         movq    TR_RIP(%rcx), %rdx ;                                    \
186         pushq   %rdx ;                                                  \
187         movq    TR_RDX(%rcx), %rdx ;                                    \
188         movq    TR_RCX(%rcx), %rcx                                      \
189
190 /*
191  * Enter with trampoline, hardware pushed up to ERR
192  */
193 #define KMMUENTER_TFERR                                                 \
194         subq    $TR_ERR, %rsp ;                                         \
195         movq    %rcx, TR_RCX(%rsp) ;                                    \
196         movq    %rdx, TR_RDX(%rsp) ;                                    \
197         KMMUENTER_CORE ;                                                \
198         movq    %rsp, %rcx ;            /* trampoline rsp */            \
199         movq    PCPU(trampoline)+TR_PCB_RSP,%rsp ; /* kstack rsp */     \
200         movq    TR_SS(%rcx), %rdx ;                                     \
201         pushq   %rdx ;                                                  \
202         movq    TR_RSP(%rcx), %rdx ;                                    \
203         pushq   %rdx ;                                                  \
204         movq    TR_RFLAGS(%rcx), %rdx ;                                 \
205         pushq   %rdx ;                                                  \
206         movq    TR_CS(%rcx), %rdx ;                                     \
207         pushq   %rdx ;                                                  \
208         movq    TR_RIP(%rcx), %rdx ;                                    \
209         pushq   %rdx ;                                                  \
210         movq    TR_ERR(%rcx), %rdx ;                                    \
211         pushq   %rdx ;                                                  \
212         movq    TR_RDX(%rcx), %rdx ;                                    \
213         movq    TR_RCX(%rcx), %rcx                                      \
214
215 /*
216  * Enter with trampoline, hardware pushed up to ERR and
217  * we need to save %cr2 early (before potentially reloading %cr3).
218  */
219 #define KMMUENTER_TFERR_SAVECR2                                         \
220         subq    $TR_ERR, %rsp ;                                         \
221         movq    %rcx, TR_RCX(%rsp) ;                                    \
222         movq    %rdx, TR_RDX(%rsp) ;                                    \
223         movq    %cr2, %rcx ;                                            \
224         movq    %rcx, PCPU(trampoline)+TR_CR2 ;                         \
225         KMMUENTER_CORE ;                                                \
226         movq    %rsp, %rcx ;            /* trampoline rsp */            \
227         movq    PCPU(trampoline)+TR_PCB_RSP,%rsp ; /* kstack rsp */     \
228         movq    TR_SS(%rcx), %rdx ;                                     \
229         pushq   %rdx ;                                                  \
230         movq    TR_RSP(%rcx), %rdx ;                                    \
231         pushq   %rdx ;                                                  \
232         movq    TR_RFLAGS(%rcx), %rdx ;                                 \
233         pushq   %rdx ;                                                  \
234         movq    TR_CS(%rcx), %rdx ;                                     \
235         pushq   %rdx ;                                                  \
236         movq    TR_RIP(%rcx), %rdx ;                                    \
237         pushq   %rdx ;                                                  \
238         movq    TR_ERR(%rcx), %rdx ;                                    \
239         pushq   %rdx ;                                                  \
240         movq    TR_RDX(%rcx), %rdx ;                                    \
241         movq    TR_RCX(%rcx), %rcx                                      \
242
243 /*
244  * Set %cr3 if necessary on syscall entry.  No registers may be
245  * disturbed.
246  *
247  * NOTE: TR_CR2 is used by the caller to save %rsp, we cannot use it here.
248  */
249 #define KMMUENTER_SYSCALL                                               \
250         movq    %rcx, PCPU(trampoline)+TR_RCX ;                         \
251         movq    %rdx, PCPU(trampoline)+TR_RDX ;                         \
252         KMMUENTER_CORE ;                                                \
253         movq    PCPU(trampoline)+TR_RDX, %rdx ;                         \
254         movq    PCPU(trampoline)+TR_RCX, %rcx                           \
255
256 /*
257  * KMMUEXIT_CORE handles IBRS, STIBP, and MDS, but not ISOMMU
258  *
259  * We don't re-execute the IBPB barrier on exit atm.
260  *
261  * The MDS barrier (Microarchitectural Data Sampling) should be executed
262  * prior to any return to user-mode, if supported and enabled.  This is
263  * Intel-only.
264  *
265  * WARNING! %rsp may not be usable (it could be pointing to the user
266  *          stack at this point).  And we must save/restore any registers
267  *          we use.
268  */
269 #define KMMUEXIT_CORE                                                   \
270         testl   $SPEC_CTRL_DUMMY_ENABLE|SPEC_CTRL_MDS_ENABLE, PCPU(trampoline)+TR_PCB_SPEC_CTRL+4 ; \
271         je      43f ;                                                   \
272         movq    %rax, PCPU(trampoline)+TR_RAX ;                         \
273         movl    PCPU(trampoline)+TR_PCB_SPEC_CTRL+4, %eax ;             \
274         testq   $SPEC_CTRL_MDS_ENABLE, %rax ;                           \
275         je      41f ;                                                   \
276         movq    $GSEL(GDATA_SEL, SEL_KPL), PCPU(trampoline)+TR_RCX ;    \
277         verw    PCPU(trampoline)+TR_RCX ;                               \
278 41:     testq   $SPEC_CTRL_DUMMY_ENABLE, %rax ;                         \
279         je      42f ;                                                   \
280         movq    %rcx, PCPU(trampoline)+TR_RCX ;                         \
281         movq    %rdx, PCPU(trampoline)+TR_RDX ;                         \
282         andq    $SPEC_CTRL_IBRS|SPEC_CTRL_STIBP, %rax ;                 \
283         movq    $MSR_SPEC_CTRL,%rcx ;                                   \
284         xorl    %edx,%edx ;                                             \
285         wrmsr ;                                                         \
286         movq    PCPU(trampoline)+TR_RDX, %rdx ;                         \
287         movq    PCPU(trampoline)+TR_RCX, %rcx ;                         \
288 42:     movq    PCPU(trampoline)+TR_RAX, %rax ;                         \
289 43:
290
291 /*
292  * We are positioned at the base of the trapframe.  Advance the trapframe
293  * and handle MMU isolation.  MMU isolation requires us to copy the
294  * hardware frame to the trampoline area before setting %cr3 to the
295  * isolated map.  We then set the %rsp for iretq to TR_RIP in the
296  * trampoline area (after restoring the register we saved in TR_ERR).
297  */
298 #define KMMUEXIT                                                        \
299         addq    $TF_RIP,%rsp ;                                          \
300         KMMUEXIT_CORE ;                                                 \
301         testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
302         je      50f ;                                                   \
303         movq    %rcx, PCPU(trampoline)+TR_ERR ; /* save in TR_ERR */    \
304         popq    %rcx ;                          /* copy %rip */         \
305         movq    %rcx, PCPU(trampoline)+TR_RIP ;                         \
306         popq    %rcx ;                          /* copy %cs */          \
307         movq    %rcx, PCPU(trampoline)+TR_CS ;                          \
308         popq    %rcx ;                          /* copy %rflags */      \
309         movq    %rcx, PCPU(trampoline)+TR_RFLAGS ;                      \
310         popq    %rcx ;                          /* copy %rsp */         \
311         movq    %rcx, PCPU(trampoline)+TR_RSP ;                         \
312         popq    %rcx ;                          /* copy %ss */          \
313         movq    %rcx, PCPU(trampoline)+TR_SS ;                          \
314         movq    %gs:0,%rcx ;                                            \
315         addq    $GD_TRAMPOLINE+TR_ERR,%rcx ;                            \
316         movq    %rcx,%rsp ;                                             \
317         movq    PCPU(trampoline)+TR_PCB_CR3_ISO,%rcx ;                  \
318         movq    %rcx,%cr3 ;                                             \
319         popq    %rcx ;          /* positioned at TR_RIP after this */   \
320 50:                                                                     \
321
322 /*
323  * Warning: user stack pointer already loaded into %rsp at this
324  * point.  We still have the kernel %gs.
325  *
326  * Caller will sysexit, we do not have to copy anything to the
327  * trampoline area.
328  */
329 #define KMMUEXIT_SYSCALL                                                \
330         KMMUEXIT_CORE ;                                                 \
331         testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
332         je      50f ;                                                   \
333         movq    %rcx, PCPU(trampoline)+TR_RCX ;                         \
334         movq    PCPU(trampoline)+TR_PCB_CR3_ISO,%rcx ;                  \
335         movq    %rcx,%cr3 ;                                             \
336         movq    PCPU(trampoline)+TR_RCX, %rcx ;                         \
337 50:                                                                     \
338
339 /*
340  * Macros to create and destroy a trap frame.  rsp has already been shifted
341  * to the base of the trapframe in the thread structure.
342  */
343 #define PUSH_FRAME_REGS                                                 \
344         movq    %rdi,TF_RDI(%rsp) ;                                     \
345         movq    %rsi,TF_RSI(%rsp) ;                                     \
346         movq    %rdx,TF_RDX(%rsp) ;                                     \
347         movq    %rcx,TF_RCX(%rsp) ;                                     \
348         movq    %r8,TF_R8(%rsp) ;                                       \
349         movq    %r9,TF_R9(%rsp) ;                                       \
350         movq    %rax,TF_RAX(%rsp) ;                                     \
351         movq    %rbx,TF_RBX(%rsp) ;                                     \
352         movq    %rbp,TF_RBP(%rsp) ;                                     \
353         movq    %r10,TF_R10(%rsp) ;                                     \
354         movq    %r11,TF_R11(%rsp) ;                                     \
355         movq    %r12,TF_R12(%rsp) ;                                     \
356         movq    %r13,TF_R13(%rsp) ;                                     \
357         movq    %r14,TF_R14(%rsp) ;                                     \
358         movq    %r15,TF_R15(%rsp) ;                                     \
359                                         /* SECURITY CLEAR REGS */       \
360         xorq    %rax,%rax ;                                             \
361         movq    %rax,%rbx ;                                             \
362         movq    %rax,%rcx ;                                             \
363         movq    %rax,%rdx ;                                             \
364         movq    %rax,%rdi ;                                             \
365         movq    %rax,%rsi ;                                             \
366         movq    %rax,%rbp ;                                             \
367         movq    %rax,%r8 ;                                              \
368         movq    %rax,%r9 ;                                              \
369         movq    %rax,%r10 ;                                             \
370         movq    %rax,%r11 ;                                             \
371         movq    %rax,%r12 ;                                             \
372         movq    %rax,%r13 ;                                             \
373         movq    %rax,%r14 ;                                             \
374         movq    %rax,%r15                                               \
375
376
377 /*
378  * PUSH_FRAME is the first thing executed upon interrupt entry.  We are
379  * responsible for swapgs execution and the KMMUENTER dispatch.
380  *
381  * NOTE - PUSH_FRAME code doesn't mess with %gs or the stack, or assume it can
382  *        use PCPU(trampoline), if the trap/exception is from supevisor mode.
383  *        It only messes with that stuff when the trap/exception is from user
384  *        mode.  Our DBG and NMI code depend on this behavior.
385  */
386 #define PUSH_FRAME_TFRIP                                                \
387         testb   $SEL_RPL_MASK,TF_CS-TF_RIP(%rsp) ; /* from userland? */ \
388         jz      1f ;                                                    \
389         swapgs ;                /* from userland */                     \
390         KMMUENTER_TFRIP ;       /* from userland */                     \
391 1:                                                                      \
392         subq    $TF_RIP,%rsp ;                                          \
393         PUSH_FRAME_REGS                                                 \
394
395 #define PUSH_FRAME_TFERR                                                \
396         testb   $SEL_RPL_MASK,TF_CS-TF_ERR(%rsp) ; /* from userland? */ \
397         jz      1f ;                                                    \
398         swapgs ;                /* from userland */                     \
399         KMMUENTER_TFERR ;       /* from userland */                     \
400 1:                                                                      \
401         subq    $TF_ERR,%rsp ;                                          \
402         PUSH_FRAME_REGS                                                 \
403
404 #define PUSH_FRAME_TFERR_SAVECR2                                        \
405         testb   $SEL_RPL_MASK,TF_CS-TF_ERR(%rsp) ;                      \
406         jz      1f ;                                                    \
407         swapgs ;                /* from userland */                     \
408         KMMUENTER_TFERR_SAVECR2 ;/* from userland */                    \
409         subq    $TF_ERR,%rsp ;                                          \
410         PUSH_FRAME_REGS ;                                               \
411         movq    PCPU(trampoline)+TR_CR2, %r10 ;                         \
412         jmp 2f ;                                                        \
413 1:                                                                      \
414         subq    $TF_ERR,%rsp ;                                          \
415         PUSH_FRAME_REGS ;                                               \
416         movq    %cr2, %r10 ;                                            \
417 2:                                                                      \
418         movq    %r10, TF_ADDR(%rsp)
419
420 /*
421  * POP_FRAME is issued just prior to the iretq, or just prior to a
422  * jmp doreti_iret.  These must be passed in to the macro.
423  */
424 #define POP_FRAME(lastinsn)                                             \
425         movq    TF_RDI(%rsp),%rdi ;                                     \
426         movq    TF_RSI(%rsp),%rsi ;                                     \
427         movq    TF_RDX(%rsp),%rdx ;                                     \
428         movq    TF_RCX(%rsp),%rcx ;                                     \
429         movq    TF_R8(%rsp),%r8 ;                                       \
430         movq    TF_R9(%rsp),%r9 ;                                       \
431         movq    TF_RAX(%rsp),%rax ;                                     \
432         movq    TF_RBX(%rsp),%rbx ;                                     \
433         movq    TF_RBP(%rsp),%rbp ;                                     \
434         movq    TF_R10(%rsp),%r10 ;                                     \
435         movq    TF_R11(%rsp),%r11 ;                                     \
436         movq    TF_R12(%rsp),%r12 ;                                     \
437         movq    TF_R13(%rsp),%r13 ;                                     \
438         movq    TF_R14(%rsp),%r14 ;                                     \
439         movq    TF_R15(%rsp),%r15 ;                                     \
440         cli ;                                                           \
441         testb   $SEL_RPL_MASK,TF_CS(%rsp) ; /* return to user? */       \
442         jz      1f ;                                                    \
443         KMMUEXIT ;              /* return to user */                    \
444         swapgs ;                /* return to user */                    \
445         jmp     2f ;                                                    \
446 1:                                                                      \
447         addq    $TF_RIP,%rsp ;  /* setup for iretq */                   \
448 2:                                                                      \
449         lastinsn
450
451 /*
452  * Access per-CPU data.
453  */
454 #define PCPU(member)            %gs:gd_ ## member
455 #define PCPU_E8(member,idx)     %gs:gd_ ## member(,idx,8)
456 #define PCPU_ADDR(member, reg)                                  \
457         movq %gs:PC_PRVSPACE, reg ;                             \
458         addq $PC_ ## member, reg
459
460 #endif /* LOCORE */
461
462 #endif /* !_CPU_ASMACROS_H_ */