Merge remote-tracking branch 'origin/vendor/LIBRESSL'
[dragonfly.git] / sys / cpu / x86_64 / include / asmacros.h
1 /*
2  * Copyright (c) 1993 The Regents of the University of California.
3  * Copyright (c) 2008 The DragonFly Project.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. Neither the name of the University nor the names of its contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  * $FreeBSD: src/sys/amd64/include/asmacros.h,v 1.32 2006/10/28 06:04:29 bde Exp $
31  */
32
33 #ifndef _CPU_ASMACROS_H_
34 #define _CPU_ASMACROS_H_
35
36 #include <sys/cdefs.h>
37 #include <machine/specialreg.h>
38
39 /* XXX too much duplication in various asm*.h's. */
40
41 /*
42  * CNAME is used to manage the relationship between symbol names in C
43  * and the equivalent assembly language names.  CNAME is given a name as
44  * it would be used in a C program.  It expands to the equivalent assembly
45  * language name.
46  */
47 #define CNAME(csym)             csym
48
49 #define ALIGN_DATA      .p2align 3      /* 8 byte alignment, zero filled */
50 #define ALIGN_TEXT      .p2align 4,0x90 /* 16-byte alignment, nop filled */
51 #define SUPERALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
52
53 #define GEN_ENTRY(name)         ALIGN_TEXT; .globl CNAME(name); \
54                                 .type CNAME(name),@function; CNAME(name):
55 #define NON_GPROF_ENTRY(name)   GEN_ENTRY(name)
56 #define NON_GPROF_RET           .byte 0xc3      /* opcode for `ret' */
57
58 #define END(name)               .size name, . - name
59
60 /*
61  * ALTENTRY() has to align because it is before a corresponding ENTRY().
62  * ENTRY() has to align to because there may be no ALTENTRY() before it.
63  * If there is a previous ALTENTRY() then the alignment code for ENTRY()
64  * is empty.
65  */
66 #define ALTENTRY(name)          GEN_ENTRY(name)
67 #define CROSSJUMP(jtrue, label, jfalse) jtrue label
68 #define CROSSJUMPTARGET(label)
69 #define ENTRY(name)             GEN_ENTRY(name)
70 #define FAKE_MCOUNT(caller)
71 #define MCOUNT
72 #define MCOUNT_LABEL(name)
73 #define MEXITCOUNT
74
75 #ifdef LOCORE
76 /*
77  * Convenience macro for declaring interrupt entry points.
78  */
79 #define IDTVEC(name)    ALIGN_TEXT; .globl __CONCAT(X,name); \
80                         .type __CONCAT(X,name),@function; __CONCAT(X,name):
81
82 /*
83  * stack frame macro support - supports mmu isolation, swapgs, and
84  * stack frame pushing and popping.
85  */
86
87 /*
88  * Kernel pmap isolation to work-around the massive Intel mmu bug
89  * that allows kernel memory to be sussed out due to speculative memory
90  * reads and instruction execution creating timing differences that can
91  * be detected by userland.  e.g. force speculative read, speculatively
92  * execute a cmp/branch sequence, detect timing.  Iterate cmp $values
93  * to suss-out content of speculatively read kernel memory.
94  *
95  * We do this by creating a trampoline area for all user->kernel and
96  * kernel->user transitions.  The trampoline area allows us to limit
97  * the reach the kernel map in the isolated version of the user pmap
98  * to JUST the trampoline area (for all cpus), tss, and vector area.
99  *
100  * It is very important that these transitions not access any memory
101  * outside of the trampoline page while the isolated user process pmap
102  * is active in %cr3.
103  *
104  * The trampoline does not add much overhead when pmap isolation is
105  * disabled, so we just run with it regardless.  Of course, when pmap
106  * isolation is enabled, the %cr3 loads add 150-250ns to every system
107  * call as well as (without PCID) smash the TLB.
108  *
109  * KMMUENTER -  Executed by the trampoline when a user->kernel transition
110  *              is detected.  The stack pointer points into the pcpu
111  *              trampoline space and is available for register save/restore.
112  *              Other registers have not yet been saved.  %gs points at
113  *              the kernel pcpu structure.
114  *
115  *              Caller has already determined that a transition is in
116  *              progress and has already issued the swapgs.  hwtf indicates
117  *              how much hardware has already pushed.
118  *
119  * KMMUEXIT  -  Executed when a kernel->user transition is made.  The stack
120  *              pointer points into the pcpu trampoline space and we are
121  *              almost ready to iretq.  %gs still points at the kernel pcpu
122  *              structure.
123  *
124  *              Caller has already determined that a transition is in
125  *              progress.  hwtf indicates how much hardware has already
126  *              pushed.
127  */
128
129 /*
130  * KMMUENTER_CORE - Handles ISOMMU, IBRS, and IBPB.  Caller has already
131  *                  saved %rcx and %rdx.  We have to deal with %rax.
132  *
133  *                  XXX If IBPB is not supported, try to clear the
134  *                  call return hw cache w/ many x chained call sequence?
135  *
136  * NOTE - IBRS2 - We are leaving IBRS on full-time.  However, Intel
137  *                believes it is not safe unless the MSR is poked on each
138  *                user->kernel transition, so poke the MSR for both IBRS1
139  *                and IBRS2.
140  */
141 #define KMMUENTER_CORE                                                  \
142         testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
143         je      40f ;                                                   \
144         movq    PCPU(trampoline)+TR_PCB_CR3,%rcx ;                      \
145         movq    %rcx,%cr3 ;                                             \
146 40:     movl    PCPU(trampoline)+TR_PCB_SPEC_CTRL,%edx ;                \
147         testq   %rdx, %rdx ;                                            \
148         je      43f ;                                                   \
149         movq    %rax, PCPU(trampoline)+TR_RAX ;                         \
150         testq   $SPEC_CTRL_DUMMY_ENABLE,%rdx ;                          \
151         je      41f ;                                                   \
152         movq    %rdx, %rax ;                                            \
153         andq    $SPEC_CTRL_IBRS|SPEC_CTRL_STIBP, %rax ;                 \
154         movq    $MSR_SPEC_CTRL,%rcx ;                                   \
155         xorl    %edx,%edx ;                                             \
156         wrmsr ;                                                         \
157         movl    PCPU(trampoline)+TR_PCB_SPEC_CTRL,%edx ;                \
158 41:     testq   $SPEC_CTRL_DUMMY_IBPB,%rdx ;                            \
159         je      42f ;                                                   \
160         movl    $MSR_PRED_CMD,%ecx ;                                    \
161         movl    $1,%eax ;                                               \
162         xorl    %edx,%edx ;                                             \
163         wrmsr ;                                                         \
164 42:     movq    PCPU(trampoline)+TR_RAX, %rax ;                         \
165 43:                                                                     \
166
167 /*
168  * Enter with trampoline, hardware pushed up to %rip
169  */
170 #define KMMUENTER_TFRIP                                                 \
171         subq    $TR_RIP, %rsp ;                                         \
172         movq    %rcx, TR_RCX(%rsp) ;                                    \
173         movq    %rdx, TR_RDX(%rsp) ;                                    \
174         KMMUENTER_CORE ;                                                \
175         movq    %rsp, %rcx ;            /* trampoline rsp */            \
176         movq    PCPU(trampoline)+TR_PCB_RSP,%rsp ; /* kstack rsp */     \
177         movq    TR_SS(%rcx), %rdx ;                                     \
178         pushq   %rdx ;                                                  \
179         movq    TR_RSP(%rcx), %rdx ;                                    \
180         pushq   %rdx ;                                                  \
181         movq    TR_RFLAGS(%rcx), %rdx ;                                 \
182         pushq   %rdx ;                                                  \
183         movq    TR_CS(%rcx), %rdx ;                                     \
184         pushq   %rdx ;                                                  \
185         movq    TR_RIP(%rcx), %rdx ;                                    \
186         pushq   %rdx ;                                                  \
187         movq    TR_RDX(%rcx), %rdx ;                                    \
188         movq    TR_RCX(%rcx), %rcx                                      \
189
190 /*
191  * Enter with trampoline, hardware pushed up to ERR
192  */
193 #define KMMUENTER_TFERR                                                 \
194         subq    $TR_ERR, %rsp ;                                         \
195         movq    %rcx, TR_RCX(%rsp) ;                                    \
196         movq    %rdx, TR_RDX(%rsp) ;                                    \
197         KMMUENTER_CORE ;                                                \
198         movq    %rsp, %rcx ;            /* trampoline rsp */            \
199         movq    PCPU(trampoline)+TR_PCB_RSP,%rsp ; /* kstack rsp */     \
200         movq    TR_SS(%rcx), %rdx ;                                     \
201         pushq   %rdx ;                                                  \
202         movq    TR_RSP(%rcx), %rdx ;                                    \
203         pushq   %rdx ;                                                  \
204         movq    TR_RFLAGS(%rcx), %rdx ;                                 \
205         pushq   %rdx ;                                                  \
206         movq    TR_CS(%rcx), %rdx ;                                     \
207         pushq   %rdx ;                                                  \
208         movq    TR_RIP(%rcx), %rdx ;                                    \
209         pushq   %rdx ;                                                  \
210         movq    TR_ERR(%rcx), %rdx ;                                    \
211         pushq   %rdx ;                                                  \
212         movq    TR_RDX(%rcx), %rdx ;                                    \
213         movq    TR_RCX(%rcx), %rcx                                      \
214
215 /*
216  * Enter with trampoline, hardware pushed up to ERR and
217  * we need to save %cr2 early (before potentially reloading %cr3).
218  */
219 #define KMMUENTER_TFERR_SAVECR2                                         \
220         subq    $TR_ERR, %rsp ;                                         \
221         movq    %rcx, TR_RCX(%rsp) ;                                    \
222         movq    %rdx, TR_RDX(%rsp) ;                                    \
223         movq    %cr2, %rcx ;                                            \
224         movq    %rcx, PCPU(trampoline)+TR_CR2 ;                         \
225         KMMUENTER_CORE ;                                                \
226         movq    %rsp, %rcx ;            /* trampoline rsp */            \
227         movq    PCPU(trampoline)+TR_PCB_RSP,%rsp ; /* kstack rsp */     \
228         movq    TR_SS(%rcx), %rdx ;                                     \
229         pushq   %rdx ;                                                  \
230         movq    TR_RSP(%rcx), %rdx ;                                    \
231         pushq   %rdx ;                                                  \
232         movq    TR_RFLAGS(%rcx), %rdx ;                                 \
233         pushq   %rdx ;                                                  \
234         movq    TR_CS(%rcx), %rdx ;                                     \
235         pushq   %rdx ;                                                  \
236         movq    TR_RIP(%rcx), %rdx ;                                    \
237         pushq   %rdx ;                                                  \
238         movq    TR_ERR(%rcx), %rdx ;                                    \
239         pushq   %rdx ;                                                  \
240         movq    TR_RDX(%rcx), %rdx ;                                    \
241         movq    TR_RCX(%rcx), %rcx                                      \
242
243 /*
244  * Set %cr3 if necessary on syscall entry.  No registers may be
245  * disturbed.
246  *
247  * NOTE: TR_CR2 is used by the caller to save %rsp, we cannot use it here.
248  */
249 #define KMMUENTER_SYSCALL                                               \
250         movq    %rcx, PCPU(trampoline)+TR_RCX ;                         \
251         movq    %rdx, PCPU(trampoline)+TR_RDX ;                         \
252         KMMUENTER_CORE ;                                                \
253         movq    PCPU(trampoline)+TR_RDX, %rdx ;                         \
254         movq    PCPU(trampoline)+TR_RCX, %rcx                           \
255
256 /*
257  * KMMUEXIT_CORE handles IBRS and STIBP, but not ISOMMU
258  *
259  * We don't re-execute the IBPB barrier on exit atm.
260  */
261 #define KMMUEXIT_CORE                                                   \
262         testq   $SPEC_CTRL_DUMMY_ENABLE,PCPU(trampoline)+TR_PCB_SPEC_CTRL+4 ; \
263         je      41f ;                                                   \
264         movq    %rax, PCPU(trampoline)+TR_RAX ;                         \
265         movq    %rcx, PCPU(trampoline)+TR_RCX ;                         \
266         movq    %rdx, PCPU(trampoline)+TR_RDX ;                         \
267         movl    PCPU(trampoline)+TR_PCB_SPEC_CTRL+4, %eax ;             \
268         andq    $SPEC_CTRL_IBRS|SPEC_CTRL_STIBP, %rax ;                 \
269         movq    $MSR_SPEC_CTRL,%rcx ;                                   \
270         xorl    %edx,%edx ;                                             \
271         wrmsr ;                                                         \
272         movq    PCPU(trampoline)+TR_RDX, %rdx ;                         \
273         movq    PCPU(trampoline)+TR_RCX, %rcx ;                         \
274         movq    PCPU(trampoline)+TR_RAX, %rax ;                         \
275 41:
276
277 /*
278  * We are positioned at the base of the trapframe.  Advance the trapframe
279  * and handle MMU isolation.  MMU isolation requires us to copy the
280  * hardware frame to the trampoline area before setting %cr3 to the
281  * isolated map.  We then set the %rsp for iretq to TR_RIP in the
282  * trampoline area (after restoring the register we saved in TR_ERR).
283  */
284 #define KMMUEXIT                                                        \
285         addq    $TF_RIP,%rsp ;                                          \
286         KMMUEXIT_CORE ;                                                 \
287         testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
288         je      40f ;                                                   \
289         movq    %rcx, PCPU(trampoline)+TR_ERR ; /* save in TR_ERR */    \
290         popq    %rcx ;                          /* copy %rip */         \
291         movq    %rcx, PCPU(trampoline)+TR_RIP ;                         \
292         popq    %rcx ;                          /* copy %cs */          \
293         movq    %rcx, PCPU(trampoline)+TR_CS ;                          \
294         popq    %rcx ;                          /* copy %rflags */      \
295         movq    %rcx, PCPU(trampoline)+TR_RFLAGS ;                      \
296         popq    %rcx ;                          /* copy %rsp */         \
297         movq    %rcx, PCPU(trampoline)+TR_RSP ;                         \
298         popq    %rcx ;                          /* copy %ss */          \
299         movq    %rcx, PCPU(trampoline)+TR_SS ;                          \
300         movq    %gs:0,%rcx ;                                            \
301         addq    $GD_TRAMPOLINE+TR_ERR,%rcx ;                            \
302         movq    %rcx,%rsp ;                                             \
303         movq    PCPU(trampoline)+TR_PCB_CR3_ISO,%rcx ;                  \
304         movq    %rcx,%cr3 ;                                             \
305         popq    %rcx ;          /* positioned at TR_RIP after this */   \
306 40:                                                                     \
307
308 /*
309  * Warning: user stack pointer already loaded into %rsp at this
310  * point.  We still have the kernel %gs.
311  *
312  * Caller will sysexit, we do not have to copy anything to the
313  * trampoline area.
314  */
315 #define KMMUEXIT_SYSCALL                                                \
316         KMMUEXIT_CORE ;                                                 \
317         testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
318         je      40f ;                                                   \
319         movq    %rcx, PCPU(trampoline)+TR_RCX ;                         \
320         movq    PCPU(trampoline)+TR_PCB_CR3_ISO,%rcx ;                  \
321         movq    %rcx,%cr3 ;                                             \
322         movq    PCPU(trampoline)+TR_RCX, %rcx ;                         \
323 40:                                                                     \
324
325 /*
326  * Macros to create and destroy a trap frame.  rsp has already been shifted
327  * to the base of the trapframe in the thread structure.
328  */
329 #define PUSH_FRAME_REGS                                                 \
330         movq    %rdi,TF_RDI(%rsp) ;                                     \
331         movq    %rsi,TF_RSI(%rsp) ;                                     \
332         movq    %rdx,TF_RDX(%rsp) ;                                     \
333         movq    %rcx,TF_RCX(%rsp) ;                                     \
334         movq    %r8,TF_R8(%rsp) ;                                       \
335         movq    %r9,TF_R9(%rsp) ;                                       \
336         movq    %rax,TF_RAX(%rsp) ;                                     \
337         movq    %rbx,TF_RBX(%rsp) ;                                     \
338         movq    %rbp,TF_RBP(%rsp) ;                                     \
339         movq    %r10,TF_R10(%rsp) ;                                     \
340         movq    %r11,TF_R11(%rsp) ;                                     \
341         movq    %r12,TF_R12(%rsp) ;                                     \
342         movq    %r13,TF_R13(%rsp) ;                                     \
343         movq    %r14,TF_R14(%rsp) ;                                     \
344         movq    %r15,TF_R15(%rsp) ;                                     \
345                                         /* SECURITY CLEAR REGS */       \
346         xorq    %rax,%rax ;                                             \
347         movq    %rax,%rbx ;                                             \
348         movq    %rax,%rcx ;                                             \
349         movq    %rax,%rdx ;                                             \
350         movq    %rax,%rdi ;                                             \
351         movq    %rax,%rsi ;                                             \
352         movq    %rax,%rbp ;                                             \
353         movq    %rax,%r8 ;                                              \
354         movq    %rax,%r9 ;                                              \
355         movq    %rax,%r10 ;                                             \
356         movq    %rax,%r11 ;                                             \
357         movq    %rax,%r12 ;                                             \
358         movq    %rax,%r13 ;                                             \
359         movq    %rax,%r14 ;                                             \
360         movq    %rax,%r15                                               \
361
362
363 /*
364  * PUSH_FRAME is the first thing executed upon interrupt entry.  We are
365  * responsible for swapgs execution and the KMMUENTER dispatch.
366  *
367  * NOTE - PUSH_FRAME code doesn't mess with %gs or the stack, or assume it can
368  *        use PCPU(trampoline), if the trap/exception is from supevisor mode.
369  *        It only messes with that stuff when the trap/exception is from user
370  *        mode.  Our DBG and NMI code depend on this behavior.
371  */
372 #define PUSH_FRAME_TFRIP                                                \
373         testb   $SEL_RPL_MASK,TF_CS-TF_RIP(%rsp) ; /* from userland? */ \
374         jz      1f ;                                                    \
375         swapgs ;                /* from userland */                     \
376         KMMUENTER_TFRIP ;       /* from userland */                     \
377 1:                                                                      \
378         subq    $TF_RIP,%rsp ;                                          \
379         PUSH_FRAME_REGS                                                 \
380
381 #define PUSH_FRAME_TFERR                                                \
382         testb   $SEL_RPL_MASK,TF_CS-TF_ERR(%rsp) ; /* from userland? */ \
383         jz      1f ;                                                    \
384         swapgs ;                /* from userland */                     \
385         KMMUENTER_TFERR ;       /* from userland */                     \
386 1:                                                                      \
387         subq    $TF_ERR,%rsp ;                                          \
388         PUSH_FRAME_REGS                                                 \
389
390 #define PUSH_FRAME_TFERR_SAVECR2                                        \
391         testb   $SEL_RPL_MASK,TF_CS-TF_ERR(%rsp) ;                      \
392         jz      1f ;                                                    \
393         swapgs ;                /* from userland */                     \
394         KMMUENTER_TFERR_SAVECR2 ;/* from userland */                    \
395         subq    $TF_ERR,%rsp ;                                          \
396         PUSH_FRAME_REGS ;                                               \
397         movq    PCPU(trampoline)+TR_CR2, %r10 ;                         \
398         jmp 2f ;                                                        \
399 1:                                                                      \
400         subq    $TF_ERR,%rsp ;                                          \
401         PUSH_FRAME_REGS ;                                               \
402         movq    %cr2, %r10 ;                                            \
403 2:                                                                      \
404         movq    %r10, TF_ADDR(%rsp)
405
406 /*
407  * POP_FRAME is issued just prior to the iretq, or just prior to a
408  * jmp doreti_iret.  These must be passed in to the macro.
409  */
410 #define POP_FRAME(lastinsn)                                             \
411         movq    TF_RDI(%rsp),%rdi ;                                     \
412         movq    TF_RSI(%rsp),%rsi ;                                     \
413         movq    TF_RDX(%rsp),%rdx ;                                     \
414         movq    TF_RCX(%rsp),%rcx ;                                     \
415         movq    TF_R8(%rsp),%r8 ;                                       \
416         movq    TF_R9(%rsp),%r9 ;                                       \
417         movq    TF_RAX(%rsp),%rax ;                                     \
418         movq    TF_RBX(%rsp),%rbx ;                                     \
419         movq    TF_RBP(%rsp),%rbp ;                                     \
420         movq    TF_R10(%rsp),%r10 ;                                     \
421         movq    TF_R11(%rsp),%r11 ;                                     \
422         movq    TF_R12(%rsp),%r12 ;                                     \
423         movq    TF_R13(%rsp),%r13 ;                                     \
424         movq    TF_R14(%rsp),%r14 ;                                     \
425         movq    TF_R15(%rsp),%r15 ;                                     \
426         cli ;                                                           \
427         testb   $SEL_RPL_MASK,TF_CS(%rsp) ; /* return to user? */       \
428         jz      1f ;                                                    \
429         KMMUEXIT ;              /* return to user */                    \
430         swapgs ;                /* return to user */                    \
431         jmp     2f ;                                                    \
432 1:                                                                      \
433         addq    $TF_RIP,%rsp ;  /* setup for iretq */                   \
434 2:                                                                      \
435         lastinsn
436
437 /*
438  * Access per-CPU data.
439  */
440 #define PCPU(member)            %gs:gd_ ## member
441 #define PCPU_E8(member,idx)     %gs:gd_ ## member(,idx,8)
442 #define PCPU_ADDR(member, reg)                                  \
443         movq %gs:PC_PRVSPACE, reg ;                             \
444         addq $PC_ ## member, reg
445
446 #endif /* LOCORE */
447
448 #endif /* !_CPU_ASMACROS_H_ */