kernel - Intel user/kernel separation MMU bug fix part 5
[dragonfly.git] / sys / cpu / x86_64 / include / asmacros.h
1 /*
2  * Copyright (c) 1993 The Regents of the University of California.
3  * Copyright (c) 2008 The DragonFly Project.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. Neither the name of the University nor the names of its contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  * $FreeBSD: src/sys/amd64/include/asmacros.h,v 1.32 2006/10/28 06:04:29 bde Exp $
31  */
32
33 #ifndef _CPU_ASMACROS_H_
34 #define _CPU_ASMACROS_H_
35
36 #include <sys/cdefs.h>
37
38 /* XXX too much duplication in various asm*.h's. */
39
40 /*
41  * CNAME is used to manage the relationship between symbol names in C
42  * and the equivalent assembly language names.  CNAME is given a name as
43  * it would be used in a C program.  It expands to the equivalent assembly
44  * language name.
45  */
46 #define CNAME(csym)             csym
47
48 #define ALIGN_DATA      .p2align 3      /* 8 byte alignment, zero filled */
49 #ifdef GPROF
50 #define ALIGN_TEXT      .p2align 4,0x90 /* 16-byte alignment, nop filled */
51 #else
52 #define ALIGN_TEXT      .p2align 4,0x90 /* 16-byte alignment, nop filled */
53 #endif
54 #define SUPERALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
55
56 #define GEN_ENTRY(name)         ALIGN_TEXT; .globl CNAME(name); \
57                                 .type CNAME(name),@function; CNAME(name):
58 #define NON_GPROF_ENTRY(name)   GEN_ENTRY(name)
59 #define NON_GPROF_RET           .byte 0xc3      /* opcode for `ret' */
60
61 #define END(name)               .size name, . - name
62
63 #ifdef GPROF
64 /*
65  * __mcount is like [.]mcount except that doesn't require its caller to set
66  * up a frame pointer.  It must be called before pushing anything onto the
67  * stack.  gcc should eventually generate code to call __mcount in most
68  * cases.  This would make -pg in combination with -fomit-frame-pointer
69  * useful.  gcc has a configuration variable PROFILE_BEFORE_PROLOGUE to
70  * allow profiling before setting up the frame pointer, but this is
71  * inadequate for good handling of special cases, e.g., -fpic works best
72  * with profiling after the prologue.
73  *
74  * [.]mexitcount is a new function to support non-statistical profiling if an
75  * accurate clock is available.  For C sources, calls to it are generated
76  * by the FreeBSD extension `-mprofiler-epilogue' to gcc.  It is best to
77  * call [.]mexitcount at the end of a function like the MEXITCOUNT macro does,
78  * but gcc currently generates calls to it at the start of the epilogue to
79  * avoid problems with -fpic.
80  *
81  * [.]mcount and __mcount may clobber the call-used registers and %ef.
82  * [.]mexitcount may clobber %ecx and %ef.
83  *
84  * Cross-jumping makes non-statistical profiling timing more complicated.
85  * It is handled in many cases by calling [.]mexitcount before jumping.  It
86  * is handled for conditional jumps using CROSSJUMP() and CROSSJUMP_LABEL().
87  * It is handled for some fault-handling jumps by not sharing the exit
88  * routine.
89  *
90  * ALTENTRY() must be before a corresponding ENTRY() so that it can jump to
91  * the main entry point.  Note that alt entries are counted twice.  They
92  * have to be counted as ordinary entries for gprof to get the call times
93  * right for the ordinary entries.
94  *
95  * High local labels are used in macros to avoid clashes with local labels
96  * in functions.
97  *
98  * Ordinary `ret' is used instead of a macro `RET' because there are a lot
99  * of `ret's.  0xc3 is the opcode for `ret' (`#define ret ... ret' can't
100  * be used because this file is sometimes preprocessed in traditional mode).
101  * `ret' clobbers eflags but this doesn't matter.
102  */
103 #define ALTENTRY(name)          GEN_ENTRY(name) ; MCOUNT ; MEXITCOUNT ; jmp 9f
104 #define CROSSJUMP(jtrue, label, jfalse) \
105         jfalse 8f; MEXITCOUNT; jmp __CONCAT(to,label); 8:
106 #define CROSSJUMPTARGET(label) \
107         ALIGN_TEXT; __CONCAT(to,label): ; MCOUNT; jmp label
108 #define ENTRY(name)             GEN_ENTRY(name) ; 9: ; MCOUNT
109 #define FAKE_MCOUNT(caller)     pushq caller ; call __mcount ; popq %rcx
110 #define MCOUNT                  call __mcount
111 #define MCOUNT_LABEL(name)      GEN_ENTRY(name) ; nop ; ALIGN_TEXT
112 #ifdef GUPROF
113 #define MEXITCOUNT              call .mexitcount
114 #define ret                     MEXITCOUNT ; NON_GPROF_RET
115 #else
116 #define MEXITCOUNT
117 #endif
118
119 #else /* !GPROF */
120 /*
121  * ALTENTRY() has to align because it is before a corresponding ENTRY().
122  * ENTRY() has to align to because there may be no ALTENTRY() before it.
123  * If there is a previous ALTENTRY() then the alignment code for ENTRY()
124  * is empty.
125  */
126 #define ALTENTRY(name)          GEN_ENTRY(name)
127 #define CROSSJUMP(jtrue, label, jfalse) jtrue label
128 #define CROSSJUMPTARGET(label)
129 #define ENTRY(name)             GEN_ENTRY(name)
130 #define FAKE_MCOUNT(caller)
131 #define MCOUNT
132 #define MCOUNT_LABEL(name)
133 #define MEXITCOUNT
134 #endif /* GPROF */
135
136 #ifdef LOCORE
137 /*
138  * Convenience macro for declaring interrupt entry points.
139  */
140 #define IDTVEC(name)    ALIGN_TEXT; .globl __CONCAT(X,name); \
141                         .type __CONCAT(X,name),@function; __CONCAT(X,name):
142
143 /*
144  * stack frame macro support - supports mmu isolation, swapgs, and
145  * stack frame pushing and popping.
146  */
147
148 /*
149  * Kernel pmap isolation to work-around the massive Intel mmu bug
150  * that allows kernel memory to be sussed out due to speculative memory
151  * reads and instruction execution creating timing differences that can
152  * be detected by userland.  e.g. force speculative read, speculatively
153  * execute a cmp/branch sequence, detect timing.  Iterate cmp $values
154  * to suss-out content of speculatively read kernel memory.
155  *
156  * We do this by creating a trampoline area for all user->kernel and
157  * kernel->user transitions.  The trampoline area allows us to limit
158  * the reach the kernel map in the isolated version of the user pmap
159  * to JUST the trampoline area (for all cpus), tss, and vector area.
160  *
161  * It is very important that these transitions not access any memory
162  * outside of the trampoline page while the isolated user process pmap
163  * is active in %cr3.
164  *
165  * The trampoline does not add much overhead when pmap isolation is
166  * disabled, so we just run with it regardless.  Of course, when pmap
167  * isolation is enabled, the %cr3 loads add 150-250ns to every system
168  * call as well as (without PCID) smash the TLB.
169  *
170  * KMMUENTER -  Executed by the trampoline when a user->kernel transition
171  *              is detected.  The stack pointer points into the pcpu
172  *              trampoline space and is available for register save/restore.
173  *              Other registers have not yet been saved.  %gs points at
174  *              the kernel pcpu structure.
175  *
176  *              Caller has already determined that a transition is in
177  *              progress and has already issued the swapgs.  hwtf indicates
178  *              how much hardware has already pushed.
179  *
180  * KMMUEXIT  -  Executed when a kernel->user transition is made.  The stack
181  *              pointer points into the pcpu trampoline space and we are
182  *              almost ready to iretq.  %gs still points at the kernel pcpu
183  *              structure.
184  *
185  *              Caller has already determined that a transition is in
186  *              progress.  hwtf indicates how much hardware has already
187  *              pushed.
188  */
189 #define KMMUENTER_TFRIP                                                 \
190         subq    $TR_RIP, %rsp ;                                         \
191         movq    %r10, TR_R10(%rsp) ;                                    \
192         movq    %r11, TR_R11(%rsp) ;                                    \
193         testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
194         je      40f ;                                                   \
195         movq    PCPU(trampoline)+TR_PCB_CR3,%r10 ;                      \
196         movq    %r10,%cr3 ;                                             \
197 40:                                                                     \
198         movq    %rsp, %r10 ;            /* trampoline rsp */            \
199         movq    PCPU(trampoline)+TR_PCB_RSP,%rsp ; /* kstack rsp */     \
200         movq    TR_SS(%r10), %r11 ;                                     \
201         pushq   %r11 ;                                                  \
202         movq    TR_RSP(%r10), %r11 ;                                    \
203         pushq   %r11 ;                                                  \
204         movq    TR_RFLAGS(%r10), %r11 ;                                 \
205         pushq   %r11 ;                                                  \
206         movq    TR_CS(%r10), %r11 ;                                     \
207         pushq   %r11 ;                                                  \
208         movq    TR_RIP(%r10), %r11 ;                                    \
209         pushq   %r11 ;                                                  \
210         movq    TR_R11(%r10), %r11 ;                                    \
211         movq    TR_R10(%r10), %r10                                      \
212
213 #define KMMUENTER_TFERR                                                 \
214         subq    $TR_ERR, %rsp ;                                         \
215         movq    %r10, TR_R10(%rsp) ;                                    \
216         movq    %r11, TR_R11(%rsp) ;                                    \
217         testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
218         je      40f ;                                                   \
219         movq    PCPU(trampoline)+TR_PCB_CR3,%r10 ;                      \
220         movq    %r10,%cr3 ;                                             \
221 40:                                                                     \
222         movq    %rsp, %r10 ;            /* trampoline rsp */            \
223         movq    PCPU(trampoline)+TR_PCB_RSP,%rsp ; /* kstack rsp */     \
224         movq    TR_SS(%r10), %r11 ;                                     \
225         pushq   %r11 ;                                                  \
226         movq    TR_RSP(%r10), %r11 ;                                    \
227         pushq   %r11 ;                                                  \
228         movq    TR_RFLAGS(%r10), %r11 ;                                 \
229         pushq   %r11 ;                                                  \
230         movq    TR_CS(%r10), %r11 ;                                     \
231         pushq   %r11 ;                                                  \
232         movq    TR_RIP(%r10), %r11 ;                                    \
233         pushq   %r11 ;                                                  \
234         movq    TR_ERR(%r10), %r11 ;                                    \
235         pushq   %r11 ;                                                  \
236         movq    TR_R11(%r10), %r11 ;                                    \
237         movq    TR_R10(%r10), %r10                                      \
238
239 #define KMMUENTER_TFERR_SAVECR2                                         \
240         subq    $TR_ERR, %rsp ;                                         \
241         movq    %r10, TR_R10(%rsp) ;                                    \
242         movq    %r11, TR_R11(%rsp) ;                                    \
243         movq    %cr2, %r10 ;                                            \
244         movq    %r10, PCPU(trampoline)+TR_CR2 ;                         \
245         testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
246         je      40f ;                                                   \
247         movq    PCPU(trampoline)+TR_PCB_CR3,%r10 ;                      \
248         movq    %r10,%cr3 ;                                             \
249 40:                                                                     \
250         movq    %rsp, %r10 ;            /* trampoline rsp */            \
251         movq    PCPU(trampoline)+TR_PCB_RSP,%rsp ; /* kstack rsp */     \
252         movq    TR_SS(%r10), %r11 ;                                     \
253         pushq   %r11 ;                                                  \
254         movq    TR_RSP(%r10), %r11 ;                                    \
255         pushq   %r11 ;                                                  \
256         movq    TR_RFLAGS(%r10), %r11 ;                                 \
257         pushq   %r11 ;                                                  \
258         movq    TR_CS(%r10), %r11 ;                                     \
259         pushq   %r11 ;                                                  \
260         movq    TR_RIP(%r10), %r11 ;                                    \
261         pushq   %r11 ;                                                  \
262         movq    TR_ERR(%r10), %r11 ;                                    \
263         pushq   %r11 ;                                                  \
264         movq    TR_R11(%r10), %r11 ;                                    \
265         movq    TR_R10(%r10), %r10                                      \
266
267 /*
268  * Set %cr3 if necessary on syscall entry.  No registers may be
269  * disturbed.
270  */
271 #define KMMUENTER_SYSCALL                                               \
272         testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
273         je      40f ;                                                   \
274         pushq   %r10 ;                                                  \
275         movq    PCPU(trampoline)+TR_PCB_CR3,%r10 ;                      \
276         movq    %r10,%cr3 ;                                             \
277         popq    %r10 ;                                                  \
278 40:                                                                     \
279
280 /*
281  * We are positioned at the base of the trapframe.  Advance the trapframe
282  * and handle MMU isolation.  MMU isolation requires us to copy the
283  * hardware frame to the trampoline area before setting %cr3 to the
284  * isolated map.  We then set the %rsp for iretq to TR_RIP in the
285  * trampoline area (after restoring the register we saved in TR_ERR).
286  */
287 #define KMMUEXIT                                                        \
288         addq    $TF_RIP,%rsp ;                                          \
289         testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
290         je      40f ;                                                   \
291         movq    %r11, PCPU(trampoline)+TR_ERR ; /* save in TR_ERR */    \
292         popq    %r11 ;                          /* copy %rip */         \
293         movq    %r11, PCPU(trampoline)+TR_RIP ;                         \
294         popq    %r11 ;                          /* copy %cs */          \
295         movq    %r11, PCPU(trampoline)+TR_CS ;                          \
296         popq    %r11 ;                          /* copy %rflags */      \
297         movq    %r11, PCPU(trampoline)+TR_RFLAGS ;                      \
298         popq    %r11 ;                          /* copy %rsp */         \
299         movq    %r11, PCPU(trampoline)+TR_RSP ;                         \
300         popq    %r11 ;                          /* copy %ss */          \
301         movq    %r11, PCPU(trampoline)+TR_SS ;                          \
302         movq    %gs:0,%r11 ;                                            \
303         addq    $GD_TRAMPOLINE+TR_ERR,%r11 ;                            \
304         movq    %r11,%rsp ;                                             \
305         movq    PCPU(trampoline)+TR_PCB_CR3_ISO,%r11 ;                  \
306         movq    %r11,%cr3 ;                                             \
307         popq    %r11 ;          /* positioned at TR_RIP after this */   \
308 40:                                                                     \
309
310 /*
311  * Warning: user stack pointer already loaded into %rsp at this
312  * point.  We still have the kernel %gs.
313  */
314 #define KMMUEXIT_SYSCALL                                                \
315         testq   $PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ;             \
316         je      40f ;                                                   \
317         movq    %r10, PCPU(trampoline)+TR_R10 ;                         \
318         movq    PCPU(trampoline)+TR_PCB_CR3_ISO,%r10 ;                  \
319         movq    %r10,%cr3 ;                                             \
320         movq    PCPU(trampoline)+TR_R10, %r10 ;                         \
321 40:                                                                     \
322
323 /*
324  * Macros to create and destroy a trap frame.  rsp has already been shifted
325  * to the base of the trapframe in the thread structure.
326  */
327 #define PUSH_FRAME_REGS                                                 \
328         movq    %rdi,TF_RDI(%rsp) ;                                     \
329         movq    %rsi,TF_RSI(%rsp) ;                                     \
330         movq    %rdx,TF_RDX(%rsp) ;                                     \
331         movq    %rcx,TF_RCX(%rsp) ;                                     \
332         movq    %r8,TF_R8(%rsp) ;                                       \
333         movq    %r9,TF_R9(%rsp) ;                                       \
334         movq    %rax,TF_RAX(%rsp) ;                                     \
335         movq    %rbx,TF_RBX(%rsp) ;                                     \
336         movq    %rbp,TF_RBP(%rsp) ;                                     \
337         movq    %r10,TF_R10(%rsp) ;                                     \
338         movq    %r11,TF_R11(%rsp) ;                                     \
339         movq    %r12,TF_R12(%rsp) ;                                     \
340         movq    %r13,TF_R13(%rsp) ;                                     \
341         movq    %r14,TF_R14(%rsp) ;                                     \
342         movq    %r15,TF_R15(%rsp)
343
344 /*
345  * PUSH_FRAME is the first thing executed upon interrupt entry.  We are
346  * responsible for swapgs execution and the KMMUENTER dispatch.
347  */
348 #define PUSH_FRAME_TFRIP                                                \
349         testb   $SEL_RPL_MASK,TF_CS-TF_RIP(%rsp) ; /* from userland? */ \
350         jz      1f ;                                                    \
351         swapgs ;                /* from userland */                     \
352         KMMUENTER_TFRIP ;       /* from userland */                     \
353 1:                                                                      \
354         subq    $TF_RIP,%rsp ;                                          \
355         PUSH_FRAME_REGS                                                 \
356
357 #define PUSH_FRAME_TFERR                                                \
358         testb   $SEL_RPL_MASK,TF_CS-TF_ERR(%rsp) ; /* from userland? */ \
359         jz      1f ;                                                    \
360         swapgs ;                /* from userland */                     \
361         KMMUENTER_TFERR ;       /* from userland */                     \
362 1:                                                                      \
363         subq    $TF_ERR,%rsp ;                                          \
364         PUSH_FRAME_REGS                                                 \
365
366 #define PUSH_FRAME_TFERR_SAVECR2                                        \
367         testb   $SEL_RPL_MASK,TF_CS-TF_ERR(%rsp) ;                      \
368         jz      1f ;                                                    \
369         swapgs ;                /* from userland */                     \
370         KMMUENTER_TFERR_SAVECR2 ;/* from userland */                    \
371         subq    $TF_ERR,%rsp ;                                          \
372         PUSH_FRAME_REGS ;                                               \
373         movq    PCPU(trampoline)+TR_CR2, %r10 ;                         \
374         jmp 2f ;                                                        \
375 1:                                                                      \
376         subq    $TF_ERR,%rsp ;                                          \
377         PUSH_FRAME_REGS ;                                               \
378         movq    %cr2, %r10 ;                                            \
379 2:                                                                      \
380         movq    %r10, TF_ADDR(%rsp)
381
382 /*
383  * POP_FRAME is issued just prior to the iretq, or just prior to a
384  * jmp doreti_iret.  These must be passed in to the macro.
385  */
386 #define POP_FRAME(lastinsn)                                             \
387         movq    TF_RDI(%rsp),%rdi ;                                     \
388         movq    TF_RSI(%rsp),%rsi ;                                     \
389         movq    TF_RDX(%rsp),%rdx ;                                     \
390         movq    TF_RCX(%rsp),%rcx ;                                     \
391         movq    TF_R8(%rsp),%r8 ;                                       \
392         movq    TF_R9(%rsp),%r9 ;                                       \
393         movq    TF_RAX(%rsp),%rax ;                                     \
394         movq    TF_RBX(%rsp),%rbx ;                                     \
395         movq    TF_RBP(%rsp),%rbp ;                                     \
396         movq    TF_R10(%rsp),%r10 ;                                     \
397         movq    TF_R11(%rsp),%r11 ;                                     \
398         movq    TF_R12(%rsp),%r12 ;                                     \
399         movq    TF_R13(%rsp),%r13 ;                                     \
400         movq    TF_R14(%rsp),%r14 ;                                     \
401         movq    TF_R15(%rsp),%r15 ;                                     \
402         cli ;                                                           \
403         testb   $SEL_RPL_MASK,TF_CS(%rsp) ; /* return to user? */       \
404         jz      1f ;                                                    \
405         KMMUEXIT ;              /* return to user */                    \
406         swapgs ;                /* return to user */                    \
407         jmp     2f ;                                                    \
408 1:                                                                      \
409         addq    $TF_RIP,%rsp ;  /* setup for iretq */                   \
410 2:                                                                      \
411         lastinsn
412
413 /*
414  * Access per-CPU data.
415  */
416 #define PCPU(member)            %gs:gd_ ## member
417 #define PCPU_E8(member,idx)     %gs:gd_ ## member(,idx,8)
418 #define PCPU_ADDR(member, reg)                                  \
419         movq %gs:PC_PRVSPACE, reg ;                             \
420         addq $PC_ ## member, reg
421
422 #endif /* LOCORE */
423
424 #endif /* !_CPU_ASMACROS_H_ */