2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * Copyright (c) 2008-2018 The DragonFly Project.
8 * This code is derived from software contributed to Berkeley by
9 * the Systems Programming Group of the University of Utah Computer
10 * Science Department, and William Jolitz.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91
41 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
42 * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/malloc.h>
50 #include <sys/interrupt.h>
51 #include <sys/vnode.h>
52 #include <sys/vmmeter.h>
53 #include <sys/kernel.h>
54 #include <sys/sysctl.h>
55 #include <sys/unistd.h>
58 #include <machine/clock.h>
59 #include <machine/cpu.h>
60 #include <machine/md_var.h>
61 #include <machine/smp.h>
62 #include <machine/pcb.h>
63 #include <machine/pcb_ext.h>
64 #include <machine/segments.h>
65 #include <machine/globaldata.h> /* npxthread */
66 #include <machine/specialreg.h>
67 #include <machine/vmm.h>
70 #include <vm/vm_param.h>
72 #include <vm/vm_kern.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_map.h>
75 #include <vm/vm_extern.h>
77 #include <sys/thread2.h>
78 #include <sys/mplock2.h>
80 #include <bus/isa/isa.h>
82 static void cpu_reset_real (void);
84 static int spectre_mitigation = -1;
85 static int spectre_support = 0;
87 static int spectre_mode = 0;
88 SYSCTL_INT(_machdep, OID_AUTO, spectre_mode, CTLFLAG_RD,
89 &spectre_mode, 0, "current Spectre enablements");
92 * Finish a fork operation, with lwp lp2 nearly set up.
93 * Copy and update the pcb, set up the stack so that the child
94 * ready to run and return to user mode.
97 cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags)
102 if ((flags & RFPROC) == 0) {
103 if ((flags & RFMEM) == 0) {
105 * Unshare user LDT. > 1 test is MPSAFE. While
106 * it can potentially race a 2->1 transition, the
107 * worst that happens is that we do an unnecessary
110 struct pcb *pcb1 = lp1->lwp_thread->td_pcb;
111 struct pcb_ldt *pcb_ldt = pcb1->pcb_ldt;
113 if (pcb_ldt && pcb_ldt->ldt_refcnt > 1) {
114 pcb_ldt = user_ldt_alloc(pcb1,pcb_ldt->ldt_len);
116 pcb1->pcb_ldt = pcb_ldt;
123 /* Ensure that lp1's pcb is up to date. */
124 if (mdcpu->gd_npxthread == lp1->lwp_thread)
125 npxsave(lp1->lwp_thread->td_savefpu);
128 * Copy lp1's PCB. This really only applies to the
129 * debug registers and FP state, but its faster to just copy the
130 * whole thing. Because we only save the PCB at switchout time,
131 * the register state may not be current.
133 pcb2 = lp2->lwp_thread->td_pcb;
134 *pcb2 = *lp1->lwp_thread->td_pcb;
137 * Create a new fresh stack for the new process.
138 * Copy the trap frame for the return to user mode as if from a
139 * syscall. This copies the user mode register values.
141 * pcb_rsp must allocate an additional call-return pointer below
142 * the trap frame which will be restored by cpu_heavy_restore from
143 * PCB_RIP, and the thread's td_sp pointer must allocate an
144 * additonal two quadwords below the pcb_rsp call-return pointer to
145 * hold the LWKT restore function pointer and rflags.
147 * The LWKT restore function pointer must be set to cpu_heavy_restore,
148 * which is our standard heavy-weight process switch-in function.
149 * YYY eventually we should shortcut fork_return and fork_trampoline
150 * to use the LWKT restore function directly so we can get rid of
151 * all the extra crap we are setting up.
153 lp2->lwp_md.md_regs = (struct trapframe *)pcb2 - 1;
154 bcopy(lp1->lwp_md.md_regs, lp2->lwp_md.md_regs, sizeof(*lp2->lwp_md.md_regs));
157 * Set registers for trampoline to user mode. Leave space for the
158 * return address on stack. These are the kernel mode register values.
160 * Set the new pmap CR3. If the new process uses isolated VM spaces,
161 * also set the isolated CR3.
163 pmap2 = vmspace_pmap(lp2->lwp_proc->p_vmspace);
164 pcb2->pcb_cr3 = vtophys(pmap2->pm_pml4);
165 if ((pcb2->pcb_flags & PCB_ISOMMU) && pmap2->pm_pmlpv_iso) {
166 pcb2->pcb_cr3_iso = vtophys(pmap2->pm_pml4_iso);
168 pcb2->pcb_flags &= ~PCB_ISOMMU;
169 pcb2->pcb_cr3_iso = 0;
174 * Per-process spectre mitigation (future)
176 pcb2->pcb_flags &= ~(PCB_IBRS1 | PCB_IBRS2);
177 switch (spectre_mitigation) {
179 pcb2->pcb_flags |= PCB_IBRS1;
182 pcb2->pcb_flags |= PCB_IBRS2;
189 pcb2->pcb_rbx = (unsigned long)fork_return; /* fork_trampoline argument */
191 pcb2->pcb_rsp = (unsigned long)lp2->lwp_md.md_regs - sizeof(void *);
192 pcb2->pcb_r12 = (unsigned long)lp2; /* fork_trampoline argument */
196 pcb2->pcb_rip = (unsigned long)fork_trampoline;
197 lp2->lwp_thread->td_sp = (char *)(pcb2->pcb_rsp - sizeof(void *));
198 *(u_int64_t *)lp2->lwp_thread->td_sp = PSL_USER;
199 lp2->lwp_thread->td_sp -= sizeof(void *);
200 *(void **)lp2->lwp_thread->td_sp = (void *)cpu_heavy_restore;
203 * pcb2->pcb_ldt: duplicated below, if necessary.
204 * pcb2->pcb_savefpu: cloned above.
205 * pcb2->pcb_flags: cloned above
206 * pcb2->pcb_onfault: cloned above (always NULL here).
207 * pcb2->pcb_onfault_sp:cloned above (dont care)
211 * XXX don't copy the i/o pages. this should probably be fixed.
213 pcb2->pcb_ext = NULL;
215 /* Copy the LDT, if necessary. */
216 if (pcb2->pcb_ldt != NULL) {
218 atomic_add_int(&pcb2->pcb_ldt->ldt_refcnt, 1);
220 pcb2->pcb_ldt = user_ldt_alloc(pcb2,
221 pcb2->pcb_ldt->ldt_len);
224 bcopy(&lp1->lwp_thread->td_tls, &lp2->lwp_thread->td_tls,
225 sizeof(lp2->lwp_thread->td_tls));
227 * Now, cpu_switch() can schedule the new lwp.
228 * pcb_rsp is loaded pointing to the cpu_switch() stack frame
229 * containing the return address when exiting cpu_switch.
230 * This will normally be to fork_trampoline(), which will have
231 * %rbx loaded with the new lwp's pointer. fork_trampoline()
232 * will set up a stack to call fork_return(lp, frame); to complete
233 * the return to user-mode.
238 * Prepare new lwp to return to the address specified in params.
241 cpu_prepare_lwp(struct lwp *lp, struct lwp_params *params)
243 struct trapframe *regs = lp->lwp_md.md_regs;
244 void *bad_return = NULL;
247 regs->tf_rip = (long)params->lwp_func;
248 regs->tf_rsp = (long)params->lwp_stack;
249 /* Set up argument for function call */
250 regs->tf_rdi = (long)params->lwp_arg;
253 * Set up fake return address. As the lwp function may never return,
254 * we simply copy out a NULL pointer and force the lwp to receive
255 * a SIGSEGV if it returns anyways.
257 regs->tf_rsp -= sizeof(void *);
258 error = copyout(&bad_return, (void *)regs->tf_rsp, sizeof(bad_return));
262 if (lp->lwp_proc->p_vmm) {
263 lp->lwp_thread->td_pcb->pcb_cr3 = KPML4phys;
264 cpu_set_fork_handler(lp,
265 (void (*)(void *, struct trapframe *))vmm_lwp_return, lp);
267 cpu_set_fork_handler(lp,
268 (void (*)(void *, struct trapframe *))generic_lwp_return, lp);
274 * Intercept the return address from a freshly forked process that has NOT
275 * been scheduled yet.
277 * This is needed to make kernel threads stay in kernel mode.
280 cpu_set_fork_handler(struct lwp *lp, void (*func)(void *, struct trapframe *),
284 * Note that the trap frame follows the args, so the function
285 * is really called like this: func(arg, frame);
287 lp->lwp_thread->td_pcb->pcb_rbx = (long)func; /* function */
288 lp->lwp_thread->td_pcb->pcb_r12 = (long)arg; /* first arg */
292 cpu_set_thread_handler(thread_t td, void (*rfunc)(void), void *func, void *arg)
294 td->td_pcb->pcb_rbx = (long)func;
295 td->td_pcb->pcb_r12 = (long)arg;
296 td->td_switch = cpu_lwkt_switch;
297 td->td_sp -= sizeof(void *);
298 *(void **)td->td_sp = rfunc; /* exit function on return */
299 td->td_sp -= sizeof(void *);
300 *(void **)td->td_sp = cpu_kthread_restore;
306 struct thread *td = curthread;
311 /* Some x86 functionality was dropped */
312 KKASSERT(pcb->pcb_ext == NULL);
315 * disable all hardware breakpoints
317 if (pcb->pcb_flags & PCB_DBREGS) {
319 pcb->pcb_flags &= ~PCB_DBREGS;
321 td->td_gd->gd_cnt.v_swtch++;
323 crit_enter_quick(td);
324 if (td->td_flags & TDF_TSLEEPQ)
326 lwkt_deschedule_self(td);
327 lwkt_remove_tdallq(td);
332 * Terminate the current thread. The caller must have already acquired
333 * the thread's rwlock and placed it on a reap list or otherwise notified
334 * a reaper of its existance. We set a special assembly switch function which
335 * releases td_rwlock after it has cleaned up the MMU state and switched
338 * Must be caller from a critical section and with the thread descheduled.
341 cpu_thread_exit(void)
344 curthread->td_switch = cpu_exit_switch;
345 curthread->td_flags |= TDF_EXITING;
347 panic("cpu_thread_exit: lwkt_switch() unexpectedly returned");
360 * Attempt to do a CPU reset via the keyboard controller,
361 * do not turn off the GateA20, as any machine that fails
362 * to do the reset here would then end up in no man's land.
365 #if !defined(BROKEN_KEYBOARD_RESET)
366 outb(IO_KBD + 4, 0xFE);
367 DELAY(500000); /* wait 0.5 sec to see if that did it */
368 kprintf("Keyboard reset did not work, attempting CPU shutdown\n");
369 DELAY(1000000); /* wait 1 sec for kprintf to complete */
372 /* force a shutdown by unmapping entire address space ! */
373 bzero((caddr_t) PTD, PAGE_SIZE);
376 /* "good night, sweet prince .... <THUNK!>" */
383 * Convert kernel VA to physical address
390 pa = pmap_kextract((vm_offset_t)addr);
392 panic("kvtop: zero page frame");
397 swi_vm(void *arg, void *frame)
399 if (busdma_swi_pending != 0)
404 swi_vm_setup(void *arg)
406 register_swi_mp(SWI_VM, swi_vm, NULL, "swi_vm", NULL, 0);
409 SYSINIT(swi_vm_setup, SI_BOOT2_MACHDEP, SI_ORDER_ANY, swi_vm_setup, NULL);
412 * NOTE: This routine is also called after a successful microcode
415 void spectre_vm_setup(void *arg);
418 * Check for IBPB and IBRS support
420 * This bits also specify desired modes in the spectre_mitigation sysctl.
422 #define IBRS_SUPPORTED 0x0001
423 #define STIBP_SUPPORTED 0x0002
424 #define IBPB_SUPPORTED 0x0004
425 #define IBRS_AUTO_SUPPORTED 0x0008
426 #define STIBP_AUTO_SUPPORTED 0x0010
427 #define IBRS_PREFERRED_REQUEST 0x0020
431 spectre_check_support(void)
437 * Spectre mitigation hw bits
439 * IBRS Indirect Branch Restricted Speculation (isolation)
440 * STIBP Single Thread Indirect Branch Prediction (isolation)
441 * IBPB Branch Prediction Barrier (barrier)
443 * IBRS and STIBP must be toggled (enabled on entry to kernel,
444 * disabled on exit, as well as disabled during any MWAIT/HLT).
445 * When *_AUTO bits are available, IBRS and STIBP may be left
446 * turned on and do not have to be toggled on kernel entry/exit.
448 * All this shit has enormous overhead. IBPB in particular, and
449 * non-auto modes are disabled by default.
451 if (cpu_vendor_id == CPU_VENDOR_INTEL) {
456 cpuid_count(7, 0, p);
457 if (p[3] & CPUID_7_0_I3_SPEC_CTRL)
458 rv |= IBRS_SUPPORTED | IBPB_SUPPORTED;
459 if (p[3] & CPUID_7_0_I3_STIBP)
460 rv |= STIBP_SUPPORTED;
463 * 0x80000008 p[1] bit 12 indicates IBPB support
465 * This bit might be set even though SPEC_CTRL is not set.
471 do_cpuid(0x80000008U, p);
472 if (p[1] & CPUID_INTEL_80000008_I1_IBPB_SUPPORT)
473 rv |= IBPB_SUPPORTED;
474 } else if (cpu_vendor_id == CPU_VENDOR_AMD) {
476 * 0x80000008 p[1] bit 12 indicates IBPB support
477 * p[1] bit 14 indicates IBRS support
478 * p[1] bit 15 indicates STIBP support
480 * p[1] bit 16 indicates IBRS auto support
481 * p[1] bit 17 indicates STIBP auto support
482 * p[1] bit 18 indicates processor prefers using
483 * IBRS instead of retpoline.
489 do_cpuid(0x80000008U, p);
490 if (p[1] & CPUID_AMD_80000008_I1_IBPB_SUPPORT)
491 rv |= IBPB_SUPPORTED;
492 if (p[1] & CPUID_AMD_80000008_I1_IBRS_SUPPORT)
493 rv |= IBRS_SUPPORTED;
494 if (p[1] & CPUID_AMD_80000008_I1_STIBP_SUPPORT)
495 rv |= STIBP_SUPPORTED;
497 if (p[1] & CPUID_AMD_80000008_I1_IBRS_AUTO)
498 rv |= IBRS_AUTO_SUPPORTED;
499 if (p[1] & CPUID_AMD_80000008_I1_STIBP_AUTO)
500 rv |= STIBP_AUTO_SUPPORTED;
501 if (p[1] & CPUID_AMD_80000008_I1_IBRS_REQUESTED)
502 rv |= IBRS_PREFERRED_REQUEST;
509 * Iterate CPUs and adjust MSR for global operations, since
510 * the KMMU* code won't do it if spectre_mitigation is 0 or 2.
512 #define CHECK(flag) (spectre_mitigation & spectre_support & (flag))
516 spectre_sysctl_changed(void)
518 globaldata_t save_gd;
519 struct trampframe *tr;
529 for (n = 0; n < ncpus; ++n) {
530 lwkt_setcpu_self(globaldata_find(n));
532 tr = &pscpu->trampoline;
535 * Make sure we are cleaned out.
537 * XXX cleanup, reusing globals inside the loop (they get
538 * set to the same thing each loop)
540 tr->tr_pcb_spec_ctrl[0] = 0; /* kernel entry (idle exit) */
541 tr->tr_pcb_spec_ctrl[1] = 0; /* kernel exit (idle entry) */
544 * Don't try to parse if not available
546 if (spectre_mitigation < 0)
550 * IBRS mode. Auto overrides toggling.
552 * Only set the ENABLE flag if we have to toggle something
556 if (CHECK(IBRS_AUTO_SUPPORTED)) {
557 spec_ctrl |= SPEC_CTRL_IBRS;
558 mode |= IBRS_AUTO_SUPPORTED;
559 } else if (CHECK(IBRS_SUPPORTED)) {
560 spec_ctrl |= SPEC_CTRL_IBRS | SPEC_CTRL_DUMMY_ENABLE;
561 mode |= IBRS_SUPPORTED;
563 if (CHECK(STIBP_AUTO_SUPPORTED)) {
564 spec_ctrl |= SPEC_CTRL_STIBP;
565 mode |= STIBP_AUTO_SUPPORTED;
566 } else if (CHECK(STIBP_SUPPORTED)) {
567 spec_ctrl |= SPEC_CTRL_STIBP | SPEC_CTRL_DUMMY_ENABLE;
568 mode |= STIBP_SUPPORTED;
572 * IBPB requested and supported.
574 if (CHECK(IBPB_SUPPORTED)) {
575 spec_ctrl |= SPEC_CTRL_DUMMY_IBPB;
576 mode |= IBPB_SUPPORTED;
580 * Update the MSR if the cpu supports the modes to ensure
581 * proper disablement if the user disabled the mode.
583 if (spectre_support & (IBRS_SUPPORTED | IBRS_AUTO_SUPPORTED |
584 STIBP_SUPPORTED | STIBP_AUTO_SUPPORTED)) {
586 spec_ctrl & (SPEC_CTRL_IBRS|SPEC_CTRL_STIBP));
590 * Update spec_ctrl fields in the trampoline.
592 * [0] on-kernel-entry (on-idle-exit)
593 * [1] on-kernel-exit (on-idle-entry)
595 * When auto mode is supported we leave the bit set, otherwise
598 tr->tr_pcb_spec_ctrl[0] = spec_ctrl;
599 if (CHECK(IBRS_AUTO_SUPPORTED) == 0)
600 spec_ctrl &= ~SPEC_CTRL_IBRS;
601 if (CHECK(STIBP_AUTO_SUPPORTED) == 0)
602 spec_ctrl &= ~SPEC_CTRL_STIBP;
603 tr->tr_pcb_spec_ctrl[1] = spec_ctrl;
606 * Make sure we set this on the first loop. It will be
607 * the same value on remaining loops.
611 lwkt_setcpu_self(save_gd);
615 * Console message on mitigation mode change
617 kprintf("Spectre: support=(");
618 if (spectre_support == 0) {
621 if (spectre_support & IBRS_SUPPORTED)
623 if (spectre_support & STIBP_SUPPORTED)
625 if (spectre_support & IBPB_SUPPORTED)
627 if (spectre_support & IBRS_AUTO_SUPPORTED)
628 kprintf(" IBRS_AUTO");
629 if (spectre_support & STIBP_AUTO_SUPPORTED)
630 kprintf(" STIBP_AUTO");
631 if (spectre_support & IBRS_PREFERRED_REQUEST)
632 kprintf(" IBRS_REQUESTED");
634 kprintf(" ) req=%04x operating=(", (uint16_t)spectre_mitigation);
635 if (spectre_mode == 0) {
638 if (spectre_mode & IBRS_SUPPORTED)
640 if (spectre_mode & STIBP_SUPPORTED)
642 if (spectre_mode & IBPB_SUPPORTED)
644 if (spectre_mode & IBRS_AUTO_SUPPORTED)
645 kprintf(" IBRS_AUTO");
646 if (spectre_mode & STIBP_AUTO_SUPPORTED)
647 kprintf(" STIBP_AUTO");
648 if (spectre_mode & IBRS_PREFERRED_REQUEST)
649 kprintf(" IBRS_REQUESTED");
655 * User changes sysctl value
658 sysctl_spectre_mitigation(SYSCTL_HANDLER_ARGS)
669 * Return current operating mode or support.
671 if (oidp->oid_kind & CTLFLAG_WR)
672 spectre = spectre_mode;
674 spectre = spectre_support;
676 spectre &= (IBRS_SUPPORTED | IBRS_AUTO_SUPPORTED |
677 STIBP_SUPPORTED | STIBP_AUTO_SUPPORTED |
683 error = SYSCTL_OUT(req, " ", 1);
687 if (spectre & IBRS_SUPPORTED) {
688 spectre &= ~IBRS_SUPPORTED;
689 error = SYSCTL_OUT(req, "IBRS", 4);
691 if (spectre & IBRS_AUTO_SUPPORTED) {
692 spectre &= ~IBRS_AUTO_SUPPORTED;
693 error = SYSCTL_OUT(req, "IBRS_AUTO", 9);
695 if (spectre & STIBP_SUPPORTED) {
696 spectre &= ~STIBP_SUPPORTED;
697 error = SYSCTL_OUT(req, "STIBP", 5);
699 if (spectre & STIBP_AUTO_SUPPORTED) {
700 spectre &= ~STIBP_AUTO_SUPPORTED;
701 error = SYSCTL_OUT(req, "STIBP_AUTO", 10);
703 if (spectre & IBPB_SUPPORTED) {
704 spectre &= ~IBPB_SUPPORTED;
705 error = SYSCTL_OUT(req, "IBPB", 4);
709 error = SYSCTL_OUT(req, "NONE", 4);
712 if (error || req->newptr == NULL)
714 if ((oidp->oid_kind & CTLFLAG_WR) == 0)
718 * Change current operating mode
720 len = req->newlen - req->newidx;
721 if (len >= sizeof(buf)) {
725 error = SYSCTL_IN(req, buf, len);
731 while (error == 0 && iter) {
732 ptr = strsep(&iter, " ,\t\r\n");
735 if (strcasecmp(ptr, "NONE") == 0)
737 else if (strcasecmp(ptr, "IBRS") == 0)
738 spectre |= IBRS_SUPPORTED;
739 else if (strcasecmp(ptr, "IBRS_AUTO") == 0)
740 spectre |= IBRS_AUTO_SUPPORTED;
741 else if (strcasecmp(ptr, "STIBP") == 0)
742 spectre |= STIBP_SUPPORTED;
743 else if (strcasecmp(ptr, "STIBP_AUTO") == 0)
744 spectre |= STIBP_AUTO_SUPPORTED;
745 else if (strcasecmp(ptr, "IBPB") == 0)
746 spectre |= IBPB_SUPPORTED;
751 spectre_mitigation = spectre;
752 spectre_sysctl_changed();
757 SYSCTL_PROC(_machdep, OID_AUTO, spectre_mitigation,
758 CTLTYPE_STRING | CTLFLAG_RW,
759 0, 0, sysctl_spectre_mitigation, "A", "Spectre exploit mitigation");
760 SYSCTL_PROC(_machdep, OID_AUTO, spectre_support,
761 CTLTYPE_STRING | CTLFLAG_RD,
762 0, 0, sysctl_spectre_mitigation, "A", "Spectre supported features");
765 * NOTE: Called at SI_BOOT2_MACHDEP and also when the microcode is
766 * updated. Microcode updates must be applied to all cpus
767 * for support to be recognized.
770 spectre_vm_setup(void *arg)
772 int inconsistent = 0;
776 * Fetch tunable in auto mode
778 if (spectre_mitigation < 0) {
779 TUNABLE_INT_FETCH("machdep.spectre_mitigation",
780 &spectre_mitigation);
783 if ((supmask = spectre_check_support()) != 0) {
785 * Must be supported on all cpus before we
786 * can enable it. Returns silently if it
789 * NOTE! arg != NULL indicates we were called
790 * from cpuctl after a successful microcode
794 globaldata_t save_gd;
798 for (n = 0; n < ncpus; ++n) {
799 lwkt_setcpu_self(globaldata_find(n));
801 if (spectre_check_support() !=
807 lwkt_setcpu_self(save_gd);
813 * Be silent while microcode is being loaded on various CPUs,
817 spectre_mitigation = -1;
825 spectre_support = supmask;
828 * Enable spectre_mitigation, set defaults if -1, adjust
829 * tuned value according to support if not.
831 * NOTE! We do not enable IBPB for user->kernel transitions
832 * by default, so this code is commented out for now.
834 if (spectre_support) {
835 if (spectre_mitigation < 0) {
836 spectre_mitigation = 0;
839 * IBRS toggling not currently recommended as a
842 if (spectre_support & IBRS_AUTO_SUPPORTED)
843 spectre_mitigation |= IBRS_AUTO_SUPPORTED;
844 else if (spectre_support & IBRS_SUPPORTED)
845 spectre_mitigation |= 0;
848 * STIBP toggling not currently recommended as a
851 if (spectre_support & STIBP_AUTO_SUPPORTED)
852 spectre_mitigation |= STIBP_AUTO_SUPPORTED;
853 else if (spectre_support & STIBP_SUPPORTED)
854 spectre_mitigation |= 0;
857 * IBPB adds enormous (~2uS) overhead to system
858 * calls etc, we do not enable it by default.
860 if (spectre_support & IBPB_SUPPORTED)
861 spectre_mitigation |= 0;
864 spectre_mitigation = -1;
868 * Disallow sysctl changes when there is no support (otherwise
869 * the wrmsr will cause a protection fault).
871 if (spectre_mitigation < 0)
872 sysctl___machdep_spectre_mitigation.oid_kind &= ~CTLFLAG_WR;
874 sysctl___machdep_spectre_mitigation.oid_kind |= CTLFLAG_WR;
876 spectre_sysctl_changed();
879 SYSINIT(spectre_vm_setup, SI_BOOT2_MACHDEP, SI_ORDER_ANY,
880 spectre_vm_setup, NULL);
883 * platform-specific vmspace initialization (nothing for x86_64)
886 cpu_vmspace_alloc(struct vmspace *vm __unused)
891 cpu_vmspace_free(struct vmspace *vm __unused)
896 kvm_access_check(vm_offset_t saddr, vm_offset_t eaddr, int prot)
900 if (saddr < KvaStart)
904 for (addr = saddr; addr < eaddr; addr += PAGE_SIZE) {
905 if (pmap_kextract(addr) == 0)
908 if (!kernacc((caddr_t)saddr, eaddr - saddr, prot))
915 void _test_frame_enter(struct trapframe *frame);
916 void _test_frame_exit(struct trapframe *frame);
919 _test_frame_enter(struct trapframe *frame)
921 thread_t td = curthread;
923 if (ISPL(frame->tf_cs) == SEL_UPL) {
924 KKASSERT(td->td_lwp);
925 KASSERT(td->td_lwp->lwp_md.md_regs == frame,
926 ("_test_frame_exit: Frame mismatch %p %p",
927 td->td_lwp->lwp_md.md_regs, frame));
928 td->td_lwp->lwp_saveusp = (void *)frame->tf_rsp;
929 td->td_lwp->lwp_saveupc = (void *)frame->tf_rip;
931 if ((char *)frame < td->td_kstack ||
932 (char *)frame > td->td_kstack + td->td_kstack_size) {
933 panic("_test_frame_exit: frame not on kstack %p kstack=%p",
934 frame, td->td_kstack);
939 _test_frame_exit(struct trapframe *frame)
941 thread_t td = curthread;
943 if (ISPL(frame->tf_cs) == SEL_UPL) {
944 KKASSERT(td->td_lwp);
945 KASSERT(td->td_lwp->lwp_md.md_regs == frame,
946 ("_test_frame_exit: Frame mismatch %p %p",
947 td->td_lwp->lwp_md.md_regs, frame));
948 if (td->td_lwp->lwp_saveusp != (void *)frame->tf_rsp) {
949 kprintf("_test_frame_exit: %s:%d usp mismatch %p/%p\n",
950 td->td_comm, td->td_proc->p_pid,
951 td->td_lwp->lwp_saveusp,
952 (void *)frame->tf_rsp);
954 if (td->td_lwp->lwp_saveupc != (void *)frame->tf_rip) {
955 kprintf("_test_frame_exit: %s:%d upc mismatch %p/%p\n",
956 td->td_comm, td->td_proc->p_pid,
957 td->td_lwp->lwp_saveupc,
958 (void *)frame->tf_rip);
962 * adulterate the fields to catch entries that
963 * don't run through test_frame_enter
965 td->td_lwp->lwp_saveusp =
966 (void *)~(intptr_t)td->td_lwp->lwp_saveusp;
967 td->td_lwp->lwp_saveupc =
968 (void *)~(intptr_t)td->td_lwp->lwp_saveupc;
970 if ((char *)frame < td->td_kstack ||
971 (char *)frame > td->td_kstack + td->td_kstack_size) {
972 panic("_test_frame_exit: frame not on kstack %p kstack=%p",
973 frame, td->td_kstack);