2 * Copyright (c) 1993 The Regents of the University of California.
3 * Copyright (c) 2008 The DragonFly Project.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * $FreeBSD: src/sys/amd64/include/asmacros.h,v 1.32 2006/10/28 06:04:29 bde Exp $
33 #ifndef _CPU_ASMACROS_H_
34 #define _CPU_ASMACROS_H_
36 #include <sys/cdefs.h>
38 /* XXX too much duplication in various asm*.h's. */
41 * CNAME is used to manage the relationship between symbol names in C
42 * and the equivalent assembly language names. CNAME is given a name as
43 * it would be used in a C program. It expands to the equivalent assembly
46 #define CNAME(csym) csym
48 #define ALIGN_DATA .p2align 3 /* 8 byte alignment, zero filled */
50 #define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
52 #define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
54 #define SUPERALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
56 #define GEN_ENTRY(name) ALIGN_TEXT; .globl CNAME(name); \
57 .type CNAME(name),@function; CNAME(name):
58 #define NON_GPROF_ENTRY(name) GEN_ENTRY(name)
59 #define NON_GPROF_RET .byte 0xc3 /* opcode for `ret' */
61 #define END(name) .size name, . - name
65 * __mcount is like [.]mcount except that doesn't require its caller to set
66 * up a frame pointer. It must be called before pushing anything onto the
67 * stack. gcc should eventually generate code to call __mcount in most
68 * cases. This would make -pg in combination with -fomit-frame-pointer
69 * useful. gcc has a configuration variable PROFILE_BEFORE_PROLOGUE to
70 * allow profiling before setting up the frame pointer, but this is
71 * inadequate for good handling of special cases, e.g., -fpic works best
72 * with profiling after the prologue.
74 * [.]mexitcount is a new function to support non-statistical profiling if an
75 * accurate clock is available. For C sources, calls to it are generated
76 * by the FreeBSD extension `-mprofiler-epilogue' to gcc. It is best to
77 * call [.]mexitcount at the end of a function like the MEXITCOUNT macro does,
78 * but gcc currently generates calls to it at the start of the epilogue to
79 * avoid problems with -fpic.
81 * [.]mcount and __mcount may clobber the call-used registers and %ef.
82 * [.]mexitcount may clobber %ecx and %ef.
84 * Cross-jumping makes non-statistical profiling timing more complicated.
85 * It is handled in many cases by calling [.]mexitcount before jumping. It
86 * is handled for conditional jumps using CROSSJUMP() and CROSSJUMP_LABEL().
87 * It is handled for some fault-handling jumps by not sharing the exit
90 * ALTENTRY() must be before a corresponding ENTRY() so that it can jump to
91 * the main entry point. Note that alt entries are counted twice. They
92 * have to be counted as ordinary entries for gprof to get the call times
93 * right for the ordinary entries.
95 * High local labels are used in macros to avoid clashes with local labels
98 * Ordinary `ret' is used instead of a macro `RET' because there are a lot
99 * of `ret's. 0xc3 is the opcode for `ret' (`#define ret ... ret' can't
100 * be used because this file is sometimes preprocessed in traditional mode).
101 * `ret' clobbers eflags but this doesn't matter.
103 #define ALTENTRY(name) GEN_ENTRY(name) ; MCOUNT ; MEXITCOUNT ; jmp 9f
104 #define CROSSJUMP(jtrue, label, jfalse) \
105 jfalse 8f; MEXITCOUNT; jmp __CONCAT(to,label); 8:
106 #define CROSSJUMPTARGET(label) \
107 ALIGN_TEXT; __CONCAT(to,label): ; MCOUNT; jmp label
108 #define ENTRY(name) GEN_ENTRY(name) ; 9: ; MCOUNT
109 #define FAKE_MCOUNT(caller) pushq caller ; call __mcount ; popq %rcx
110 #define MCOUNT call __mcount
111 #define MCOUNT_LABEL(name) GEN_ENTRY(name) ; nop ; ALIGN_TEXT
113 #define MEXITCOUNT call .mexitcount
114 #define ret MEXITCOUNT ; NON_GPROF_RET
121 * ALTENTRY() has to align because it is before a corresponding ENTRY().
122 * ENTRY() has to align to because there may be no ALTENTRY() before it.
123 * If there is a previous ALTENTRY() then the alignment code for ENTRY()
126 #define ALTENTRY(name) GEN_ENTRY(name)
127 #define CROSSJUMP(jtrue, label, jfalse) jtrue label
128 #define CROSSJUMPTARGET(label)
129 #define ENTRY(name) GEN_ENTRY(name)
130 #define FAKE_MCOUNT(caller)
132 #define MCOUNT_LABEL(name)
138 * Convenience macro for declaring interrupt entry points.
140 #define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(X,name); \
141 .type __CONCAT(X,name),@function; __CONCAT(X,name):
144 * stack frame macro support - supports mmu isolation, swapgs, and
145 * stack frame pushing and popping.
149 * Kernel pmap isolation to work-around the massive Intel mmu bug
150 * that allows kernel memory to be sussed out due to speculative memory
151 * reads and instruction execution creating timing differences that can
152 * be detected by userland. e.g. force speculative read, speculatively
153 * execute a cmp/branch sequence, detect timing. Iterate cmp $values
154 * to suss-out content of speculatively read kernel memory.
156 * KMMUENTER - Executed by the trampoline when a user->kernel transition
157 * is detected. The stack pointer points into the pcpu
158 * trampoline space and is available for register save/restore.
159 * Other registers have not yet been saved. %gs points at
160 * the kernel pcpu structure.
162 * Caller has already determined that a transition is in
163 * progress and has already issued the swapgs. hwtf indicates
164 * how much hardware has already pushed.
166 * KMMUEXIT - Executed when a kernel->user transition is made. The stack
167 * pointer points into the pcpu trampoline space and we are
168 * almost ready to iretq. %gs still points at the kernel pcpu
171 * Caller has already determined that a transition is in
172 * progress. hwtf indicates how much hardware has already
175 #define KMMUENTER_TFRIP \
176 subq $TR_RIP, %rsp ; \
177 movq %r10, TR_R10(%rsp) ; \
178 movq %r11, TR_R11(%rsp) ; \
179 testq $PCB_ISOMMU,PCPU(pcb_flags) ; \
181 movq PCPU(pcb_cr3),%r10 ; \
184 movq %rsp, %r10 ; /* trampoline rsp */ \
185 movq PCPU(pcb_rsp),%rsp ; /* kstack rsp */ \
186 movq TR_SS(%r10), %r11 ; \
188 movq TR_RSP(%r10), %r11 ; \
190 movq TR_RFLAGS(%r10), %r11 ; \
192 movq TR_CS(%r10), %r11 ; \
194 movq TR_RIP(%r10), %r11 ; \
196 movq TR_R11(%r10), %r11 ; \
197 movq TR_R10(%r10), %r10 \
199 #define KMMUENTER_TFERR \
200 subq $TR_ERR, %rsp ; \
201 movq %r10, TR_R10(%rsp) ; \
202 movq %r11, TR_R11(%rsp) ; \
203 testq $PCB_ISOMMU,PCPU(pcb_flags) ; \
205 movq PCPU(pcb_cr3),%r10 ; \
208 movq %rsp, %r10 ; /* trampoline rsp */ \
209 movq PCPU(pcb_rsp),%rsp ; /* kstack rsp */ \
210 movq TR_SS(%r10), %r11 ; \
212 movq TR_RSP(%r10), %r11 ; \
214 movq TR_RFLAGS(%r10), %r11 ; \
216 movq TR_CS(%r10), %r11 ; \
218 movq TR_RIP(%r10), %r11 ; \
220 movq TR_ERR(%r10), %r11 ; \
222 movq TR_R11(%r10), %r11 ; \
223 movq TR_R10(%r10), %r10 \
225 #define KMMUENTER_TFERR_SAVECR2 \
226 subq $TR_ERR, %rsp ; \
227 movq %r10, TR_R10(%rsp) ; \
228 movq %r11, TR_R11(%rsp) ; \
230 movq %r10, PCPU(trampoline)+TR_CR2 ; \
231 testq $PCB_ISOMMU,PCPU(pcb_flags) ; \
233 movq PCPU(pcb_cr3),%r10 ; \
236 movq %rsp, %r10 ; /* trampoline rsp */ \
237 movq PCPU(pcb_rsp),%rsp ; /* kstack rsp */ \
238 movq TR_SS(%r10), %r11 ; \
240 movq TR_RSP(%r10), %r11 ; \
242 movq TR_RFLAGS(%r10), %r11 ; \
244 movq TR_CS(%r10), %r11 ; \
246 movq TR_RIP(%r10), %r11 ; \
248 movq TR_ERR(%r10), %r11 ; \
250 movq TR_R11(%r10), %r11 ; \
251 movq TR_R10(%r10), %r10 \
254 * Set %cr3 if necessary on syscall entry. No registers may be
257 #define KMMUENTER_SYSCALL \
258 testq $PCB_ISOMMU,PCPU(pcb_flags) ; \
261 movq PCPU(pcb_cr3),%r10 ; \
267 * We are positioned at the base of the trapframe. Advance the trapframe
268 * and handle MMU isolation. MMU isolation requires us to copy the
269 * hardware frame to the trampoline area before setting %cr3 to the
270 * isolated map. We then set the %rsp for iretq to TR_RIP in the
271 * trampoline area (after restoring the register we saved in TR_ERR).
274 addq $TF_RIP,%rsp ; \
275 testq $PCB_ISOMMU,PCPU(pcb_flags) ; \
277 movq %r11, PCPU(trampoline)+TR_ERR ; /* save in TR_ERR */ \
278 popq %r11 ; /* copy %rip */ \
279 movq %r11, PCPU(trampoline)+TR_RIP ; \
280 popq %r11 ; /* copy %cs */ \
281 movq %r11, PCPU(trampoline)+TR_CS ; \
282 popq %r11 ; /* copy %rflags */ \
283 movq %r11, PCPU(trampoline)+TR_RFLAGS ; \
284 popq %r11 ; /* copy %rsp */ \
285 movq %r11, PCPU(trampoline)+TR_RSP ; \
286 popq %r11 ; /* copy %ss */ \
287 movq %r11, PCPU(trampoline)+TR_SS ; \
289 addq $GD_TRAMPOLINE+TR_ERR,%r11 ; \
291 movq PCPU(pcb_cr3_iso),%r11 ; \
293 popq %r11 ; /* positioned at TR_RIP after this */ \
297 * Warning: user stack pointer already loaded into %rsp at this
298 * point. We still have the kernel %gs.
300 #define KMMUEXIT_SYSCALL \
301 testq $PCB_ISOMMU,PCPU(pcb_flags) ; \
303 movq %r10, PCPU(trampoline)+TR_R10 ; \
304 movq PCPU(pcb_cr3_iso),%r10 ; \
306 movq PCPU(trampoline)+TR_R10, %r10 ; \
310 * Macros to create and destroy a trap frame. rsp has already been shifted
311 * to the base of the trapframe in the thread structure.
313 #define PUSH_FRAME_REGS \
314 movq %rdi,TF_RDI(%rsp) ; \
315 movq %rsi,TF_RSI(%rsp) ; \
316 movq %rdx,TF_RDX(%rsp) ; \
317 movq %rcx,TF_RCX(%rsp) ; \
318 movq %r8,TF_R8(%rsp) ; \
319 movq %r9,TF_R9(%rsp) ; \
320 movq %rax,TF_RAX(%rsp) ; \
321 movq %rbx,TF_RBX(%rsp) ; \
322 movq %rbp,TF_RBP(%rsp) ; \
323 movq %r10,TF_R10(%rsp) ; \
324 movq %r11,TF_R11(%rsp) ; \
325 movq %r12,TF_R12(%rsp) ; \
326 movq %r13,TF_R13(%rsp) ; \
327 movq %r14,TF_R14(%rsp) ; \
328 movq %r15,TF_R15(%rsp)
331 * PUSH_FRAME is the first thing executed upon interrupt entry. We are
332 * responsible for swapgs execution and the KMMUENTER dispatch.
334 #define PUSH_FRAME_TFRIP \
335 testb $SEL_RPL_MASK,TF_CS-TF_RIP(%rsp) ; /* from userland? */ \
337 swapgs ; /* from userland */ \
338 KMMUENTER_TFRIP ; /* from userland */ \
340 subq $TF_RIP,%rsp ; \
343 #define PUSH_FRAME_TFERR \
344 testb $SEL_RPL_MASK,TF_CS-TF_ERR(%rsp) ; /* from userland? */ \
346 swapgs ; /* from userland */ \
347 KMMUENTER_TFERR ; /* from userland */ \
349 subq $TF_ERR,%rsp ; \
352 #define PUSH_FRAME_TFERR_SAVECR2 \
353 testb $SEL_RPL_MASK,TF_CS-TF_ERR(%rsp) ; \
355 swapgs ; /* from userland */ \
356 KMMUENTER_TFERR_SAVECR2 ;/* from userland */ \
357 subq $TF_ERR,%rsp ; \
359 movq PCPU(trampoline)+TR_CR2, %r10 ; \
362 subq $TF_ERR,%rsp ; \
366 movq %r10, TF_ADDR(%rsp)
369 * Called when the iretq in doreti_iret faults. XXX
371 #define PUSH_FRAME_NOSWAP \
376 * POP_FRAME is issued just prior to the iretq, or just prior to a
377 * jmp doreti_iret. These must be passed in to the macro.
379 #define POP_FRAME(lastinsn) \
380 movq TF_RDI(%rsp),%rdi ; \
381 movq TF_RSI(%rsp),%rsi ; \
382 movq TF_RDX(%rsp),%rdx ; \
383 movq TF_RCX(%rsp),%rcx ; \
384 movq TF_R8(%rsp),%r8 ; \
385 movq TF_R9(%rsp),%r9 ; \
386 movq TF_RAX(%rsp),%rax ; \
387 movq TF_RBX(%rsp),%rbx ; \
388 movq TF_RBP(%rsp),%rbp ; \
389 movq TF_R10(%rsp),%r10 ; \
390 movq TF_R11(%rsp),%r11 ; \
391 movq TF_R12(%rsp),%r12 ; \
392 movq TF_R13(%rsp),%r13 ; \
393 movq TF_R14(%rsp),%r14 ; \
394 movq TF_R15(%rsp),%r15 ; \
395 testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* return to user? */ \
397 cli ; /* return to user */ \
398 KMMUEXIT ; /* return to user */ \
399 swapgs ; /* return to user */ \
402 addq $TF_RIP,%rsp ; /* setup for iretq */ \
407 * Access per-CPU data.
409 #define PCPU(member) %gs:gd_ ## member
410 #define PCPU_E8(member,idx) %gs:gd_ ## member(,idx,8)
411 #define PCPU_ADDR(member, reg) \
412 movq %gs:PC_PRVSPACE, reg ; \
413 addq $PC_ ## member, reg
417 #endif /* !_CPU_ASMACROS_H_ */