Fix amd64 trap handling.
[dragonfly.git] / sys / platform / pc64 / amd64 / trap.c
CommitLineData
d7f50089 1/*-
d7f50089
YY
2 * Copyright (c) 1990, 1993
3 * The Regents of the University of California. All rights reserved.
c8fe38ae
MD
4 * Copyright (C) 1994, David Greenman
5 * Copyright (c) 2008 The DragonFly Project.
6 * Copyright (c) 2008 Jordan Gordeev.
d7f50089
YY
7 *
8 * This code is derived from software contributed to Berkeley by
9 * the University of Utah, and William Jolitz.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
c8fe38ae 39 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91
d7f50089 40 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $
c730be20 41 * $DragonFly: src/sys/platform/pc64/amd64/trap.c,v 1.3 2008/09/09 04:06:18 dillon Exp $
d7f50089
YY
42 */
43
c8fe38ae
MD
44/*
45 * AMD64 Trap and System call handling
46 */
47
48#include "opt_ddb.h"
49#include "opt_ktrace.h"
d7f50089
YY
50
51#include <machine/frame.h>
c8fe38ae
MD
52#include <sys/param.h>
53#include <sys/systm.h>
54#include <sys/kernel.h>
55#include <sys/proc.h>
56#include <sys/pioctl.h>
57#include <sys/types.h>
58#include <sys/signal2.h>
59#include <sys/syscall.h>
60#include <sys/sysctl.h>
61#include <sys/sysent.h>
62#include <sys/systm.h>
63#ifdef KTRACE
64#include <sys/ktrace.h>
65#endif
66#include <sys/ktr.h>
67#include <sys/sysmsg.h>
68#include <sys/sysproto.h>
69#include <sys/sysunion.h>
70
71#include <vm/pmap.h>
72#include <vm/vm.h>
73#include <vm/vm_extern.h>
74#include <vm/vm_kern.h>
75#include <vm/vm_param.h>
76#include <machine/cpu.h>
77#include <machine/pcb.h>
78#include <machine/thread.h>
79#include <machine/vmparam.h>
80#include <machine/md_var.h>
81
82#include <ddb/ddb.h>
83
84#ifdef SMP
85
86#define MAKEMPSAFE(have_mplock) \
87 if (have_mplock == 0) { \
88 get_mplock(); \
89 have_mplock = 1; \
90 }
91
92#else
93
94#define MAKEMPSAFE(have_mplock)
95
96#endif
97
98extern void trap(struct trapframe *frame);
99extern void syscall2(struct trapframe *frame);
100
101static int trap_pfault(struct trapframe *, int);
102static void trap_fatal(struct trapframe *, vm_offset_t);
103void dblfault_handler(struct trapframe *frame);
104
105#define PCPU_GET(member) ((mycpu)->gd_##member)
106#define PCPU_INC(member) ((mycpu)->gd_##member)++
107
108#define MAX_TRAP_MSG 30
109static char *trap_msg[] = {
110 "", /* 0 unused */
111 "privileged instruction fault", /* 1 T_PRIVINFLT */
112 "", /* 2 unused */
113 "breakpoint instruction fault", /* 3 T_BPTFLT */
114 "", /* 4 unused */
115 "", /* 5 unused */
116 "arithmetic trap", /* 6 T_ARITHTRAP */
117 "system forced exception", /* 7 T_ASTFLT */
118 "", /* 8 unused */
119 "general protection fault", /* 9 T_PROTFLT */
120 "trace trap", /* 10 T_TRCTRAP */
121 "", /* 11 unused */
122 "page fault", /* 12 T_PAGEFLT */
123 "", /* 13 unused */
124 "alignment fault", /* 14 T_ALIGNFLT */
125 "", /* 15 unused */
126 "", /* 16 unused */
127 "", /* 17 unused */
128 "integer divide fault", /* 18 T_DIVIDE */
129 "non-maskable interrupt trap", /* 19 T_NMI */
130 "overflow trap", /* 20 T_OFLOW */
131 "FPU bounds check fault", /* 21 T_BOUND */
132 "FPU device not available", /* 22 T_DNA */
133 "double fault", /* 23 T_DOUBLEFLT */
134 "FPU operand fetch fault", /* 24 T_FPOPFLT */
135 "invalid TSS fault", /* 25 T_TSSFLT */
136 "segment not present fault", /* 26 T_SEGNPFLT */
137 "stack fault", /* 27 T_STKFLT */
138 "machine check trap", /* 28 T_MCHK */
139 "SIMD floating-point exception", /* 29 T_XMMFLT */
140 "reserved (unknown) fault", /* 30 T_RESERVED */
141};
142
143#ifdef DDB
144static int ddb_on_nmi = 1;
145SYSCTL_INT(_machdep, OID_AUTO, ddb_on_nmi, CTLFLAG_RW,
146 &ddb_on_nmi, 0, "Go to DDB on NMI");
147#endif
148static int panic_on_nmi = 1;
149SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
150 &panic_on_nmi, 0, "Panic on NMI");
151static int fast_release;
152SYSCTL_INT(_machdep, OID_AUTO, fast_release, CTLFLAG_RW,
153 &fast_release, 0, "Passive Release was optimal");
154static int slow_release;
155SYSCTL_INT(_machdep, OID_AUTO, slow_release, CTLFLAG_RW,
156 &slow_release, 0, "Passive Release was nonoptimal");
157#ifdef SMP
158static int syscall_mpsafe = 1;
159SYSCTL_INT(_kern, OID_AUTO, syscall_mpsafe, CTLFLAG_RW,
160 &syscall_mpsafe, 0, "Allow MPSAFE marked syscalls to run without BGL");
161TUNABLE_INT("kern.syscall_mpsafe", &syscall_mpsafe);
162static int trap_mpsafe = 1;
163SYSCTL_INT(_kern, OID_AUTO, trap_mpsafe, CTLFLAG_RW,
164 &trap_mpsafe, 0, "Allow traps to mostly run without the BGL");
165TUNABLE_INT("kern.trap_mpsafe", &trap_mpsafe);
166#endif
167
168
169
170/*
171 * Passive USER->KERNEL transition. This only occurs if we block in the
172 * kernel while still holding our userland priority. We have to fixup our
173 * priority in order to avoid potential deadlocks before we allow the system
174 * to switch us to another thread.
175 */
176static void
177passive_release(struct thread *td)
178{
179 struct lwp *lp = td->td_lwp;
180
181 td->td_release = NULL;
182 lwkt_setpri_self(TDPRI_KERN_USER);
183 lp->lwp_proc->p_usched->release_curproc(lp);
184}
185
186/*
187 * userenter() passively intercepts the thread switch function to increase
188 * the thread priority from a user priority to a kernel priority, reducing
189 * syscall and trap overhead for the case where no switch occurs.
190 */
191
192static __inline void
193userenter(struct thread *curtd)
194{
195 curtd->td_release = passive_release;
196}
197
198/*
199 * Handle signals, upcalls, profiling, and other AST's and/or tasks that
200 * must be completed before we can return to or try to return to userland.
201 *
202 * Note that td_sticks is a 64 bit quantity, but there's no point doing 64
203 * arithmatic on the delta calculation so the absolute tick values are
204 * truncated to an integer.
205 */
206static void
207userret(struct lwp *lp, struct trapframe *frame, int sticks)
208{
209 struct proc *p = lp->lwp_proc;
210 int sig;
211
212 /*
213 * Charge system time if profiling. Note: times are in microseconds.
214 * This may do a copyout and block, so do it first even though it
215 * means some system time will be charged as user time.
216 */
217 if (p->p_flag & P_PROFIL) {
218 addupc_task(p, frame->tf_rip,
219 (u_int)((int)lp->lwp_thread->td_sticks - sticks));
220 }
221
222recheck:
223 /*
224 * If the jungle wants us dead, so be it.
225 */
226 if (lp->lwp_flag & LWP_WEXIT) {
227 get_mplock();
228 lwp_exit(0);
229 rel_mplock(); /* NOT REACHED */
230 }
231
232 /*
233 * Block here if we are in a stopped state.
234 */
235 if (p->p_stat == SSTOP) {
236 get_mplock();
237 tstop();
238 rel_mplock();
239 goto recheck;
240 }
241
242 /*
243 * Post any pending upcalls. If running a virtual kernel be sure
244 * to restore the virtual kernel's vmspace before posting the upcall.
245 */
246 if (p->p_flag & P_UPCALLPEND) {
247 p->p_flag &= ~P_UPCALLPEND;
248 get_mplock();
249 postupcall(lp);
250 rel_mplock();
251 goto recheck;
252 }
253
254 /*
255 * Post any pending signals. If running a virtual kernel be sure
256 * to restore the virtual kernel's vmspace before posting the signal.
257 */
258 if ((sig = CURSIG(lp)) != 0) {
259 get_mplock();
260 postsig(sig);
261 rel_mplock();
262 goto recheck;
263 }
264
265 /*
266 * block here if we are swapped out, but still process signals
267 * (such as SIGKILL). proc0 (the swapin scheduler) is already
268 * aware of our situation, we do not have to wake it up.
269 */
270 if (p->p_flag & P_SWAPPEDOUT) {
271 get_mplock();
272 p->p_flag |= P_SWAPWAIT;
273 swapin_request();
274 if (p->p_flag & P_SWAPWAIT)
275 tsleep(p, PCATCH, "SWOUT", 0);
276 p->p_flag &= ~P_SWAPWAIT;
277 rel_mplock();
278 goto recheck;
279 }
280
281 /*
282 * Make sure postsig() handled request to restore old signal mask after
283 * running signal handler.
284 */
285 KKASSERT((lp->lwp_flag & LWP_OLDMASK) == 0);
286}
287
288/*
289 * Cleanup from userenter and any passive release that might have occured.
290 * We must reclaim the current-process designation before we can return
291 * to usermode. We also handle both LWKT and USER reschedule requests.
292 */
293static __inline void
294userexit(struct lwp *lp)
295{
296 struct thread *td = lp->lwp_thread;
297 globaldata_t gd = td->td_gd;
298
c8fe38ae 299 /*
89ffa1cf
SS
300 * Handle stop requests at kernel priority. Any requests queued
301 * after this loop will generate another AST.
c8fe38ae 302 */
89ffa1cf
SS
303 while (lp->lwp_proc->p_stat == SSTOP) {
304 get_mplock();
305 tstop();
306 rel_mplock();
c8fe38ae
MD
307 }
308
309 /*
c8fe38ae
MD
310 * Reduce our priority in preparation for a return to userland. If
311 * our passive release function was still in place, our priority was
312 * never raised and does not need to be reduced.
313 */
314 if (td->td_release == NULL)
315 lwkt_setpri_self(TDPRI_USER_NORM);
316 td->td_release = NULL;
89ffa1cf
SS
317
318 /*
319 * Become the current user scheduled process if we aren't already,
320 * and deal with reschedule requests and other factors.
321 */
322 lp->lwp_proc->p_usched->acquire_curproc(lp);
323 /* WARNING: we may have migrated cpu's */
324 /* gd = td->td_gd; */
c8fe38ae
MD
325}
326
0855a2af
JG
327#if !defined(KTR_KERNENTRY)
328#define KTR_KERNENTRY KTR_ALL
329#endif
330KTR_INFO_MASTER(kernentry);
331KTR_INFO(KTR_KERNENTRY, kernentry, trap, 0, "STR",
332 sizeof(long) + sizeof(long) + sizeof(long) + sizeof(vm_offset_t));
333KTR_INFO(KTR_KERNENTRY, kernentry, trap_ret, 0, "STR",
334 sizeof(long) + sizeof(long));
335KTR_INFO(KTR_KERNENTRY, kernentry, syscall, 0, "STR",
336 sizeof(long) + sizeof(long) + sizeof(long));
337KTR_INFO(KTR_KERNENTRY, kernentry, syscall_ret, 0, "STR",
338 sizeof(long) + sizeof(long) + sizeof(long));
339KTR_INFO(KTR_KERNENTRY, kernentry, fork_ret, 0, "STR",
340 sizeof(long) + sizeof(long));
341
c8fe38ae
MD
342/*
343 * Exception, fault, and trap interface to the kernel.
344 * This common code is called from assembly language IDT gate entry
345 * routines that prepare a suitable stack frame, and restore this
346 * frame after the exception has been processed.
347 *
348 * This function is also called from doreti in an interlock to handle ASTs.
349 * For example: hardwareint->INTROUTINE->(set ast)->doreti->trap
350 *
351 * NOTE! We have to retrieve the fault address prior to obtaining the
352 * MP lock because get_mplock() may switch out. YYY cr2 really ought
353 * to be retrieved by the assembly code, not here.
354 *
355 * XXX gd_trap_nesting_level currently prevents lwkt_switch() from panicing
356 * if an attempt is made to switch from a fast interrupt or IPI. This is
357 * necessary to properly take fatal kernel traps on SMP machines if
358 * get_mplock() has to block.
359 */
360
361void
362trap(struct trapframe *frame)
363{
364 struct globaldata *gd = mycpu;
365 struct thread *td = gd->gd_curthread;
366 struct lwp *lp = td->td_lwp;
367 struct proc *p;
368 int sticks = 0;
369 int i = 0, ucode = 0, type, code;
370#ifdef SMP
371 int have_mplock = 0;
372#endif
373#ifdef INVARIANTS
374 int crit_count = td->td_pri & ~TDPRI_MASK;
375#endif
376 vm_offset_t eva;
377
378 p = td->td_proc;
379
380#ifndef JG
381 kprintf0("TRAP ");
382 kprintf0("\"%s\" type=%ld\n",
383 trap_msg[frame->tf_trapno], frame->tf_trapno);
384 kprintf0(" rip=%lx rsp=%lx\n", frame->tf_rip, frame->tf_rsp);
385 kprintf0(" err=%lx addr=%lx\n", frame->tf_err, frame->tf_addr);
386 kprintf0(" cs=%lx ss=%lx rflags=%lx\n", (unsigned long)frame->tf_cs, (unsigned long)frame->tf_ss, frame->tf_rflags);
387#endif
388
389#ifdef DDB
390 if (db_active) {
391 ++gd->gd_trap_nesting_level;
392 MAKEMPSAFE(have_mplock);
393 trap_fatal(frame, frame->tf_addr);
394 --gd->gd_trap_nesting_level;
395 goto out2;
396 }
397#endif
398#ifdef DDB
399 if (db_active) {
400 eva = (frame->tf_trapno == T_PAGEFLT ? frame->tf_addr : 0);
401 ++gd->gd_trap_nesting_level;
402 MAKEMPSAFE(have_mplock);
403 trap_fatal(frame, eva);
404 --gd->gd_trap_nesting_level;
405 goto out2;
406 }
407#endif
408
409 eva = 0;
410
411#ifdef SMP
412 if (trap_mpsafe == 0) {
413 ++gd->gd_trap_nesting_level;
414 MAKEMPSAFE(have_mplock);
415 --gd->gd_trap_nesting_level;
416 }
417#endif
418
419 if ((frame->tf_rflags & PSL_I) == 0) {
420 /*
421 * Buggy application or kernel code has disabled interrupts
422 * and then trapped. Enabling interrupts now is wrong, but
423 * it is better than running with interrupts disabled until
424 * they are accidentally enabled later.
425 */
426 type = frame->tf_trapno;
427 if (ISPL(frame->tf_cs) == SEL_UPL) {
428 MAKEMPSAFE(have_mplock);
429 /* JG curproc can be NULL */
430 kprintf(
431 "pid %ld (%s): trap %d with interrupts disabled\n",
432 (long)curproc->p_pid, curproc->p_comm, type);
433 } else if (type != T_NMI && type != T_BPTFLT &&
434 type != T_TRCTRAP) {
435 /*
436 * XXX not quite right, since this may be for a
437 * multiple fault in user mode.
438 */
439 MAKEMPSAFE(have_mplock);
440 kprintf("kernel trap %d with interrupts disabled\n",
441 type);
442 }
443 cpu_enable_intr();
444 }
445
446 type = frame->tf_trapno;
447 code = frame->tf_err;
448
449 if (ISPL(frame->tf_cs) == SEL_UPL) {
450 /* user trap */
451
0855a2af 452#if JG
c8fe38ae
MD
453 KTR_LOG(kernentry_trap, p->p_pid, lp->lwp_tid,
454 frame->tf_trapno, eva);
0855a2af
JG
455#else
456 KTR_LOG_STR(kernentry_trap, "pid=%d, tid=%d, trapno=%ld, eva=%lx", p->p_pid, lp->lwp_tid,
457 frame->tf_trapno, (frame->tf_trapno == T_PAGEFLT ? frame->tf_addr : 0));
458#endif
c8fe38ae
MD
459
460 userenter(td);
461
462 sticks = (int)td->td_sticks;
463 lp->lwp_md.md_regs = frame;
464
465 switch (type) {
466 case T_PRIVINFLT: /* privileged instruction fault */
467 ucode = ILL_PRVOPC;
468 i = SIGILL;
469 break;
470
471 case T_BPTFLT: /* bpt instruction fault */
472 case T_TRCTRAP: /* trace trap */
473 frame->tf_rflags &= ~PSL_T;
474 i = SIGTRAP;
475 break;
476
477 case T_ARITHTRAP: /* arithmetic trap */
478 ucode = code;
479 i = SIGFPE;
480#if 0
481#if JG
482 ucode = fputrap();
483#else
484 ucode = code;
485#endif
486 i = SIGFPE;
487#endif
488 break;
489
490 case T_ASTFLT: /* Allow process switch */
491 mycpu->gd_cnt.v_soft++;
492 if (mycpu->gd_reqflags & RQF_AST_OWEUPC) {
493 atomic_clear_int_nonlocked(&mycpu->gd_reqflags,
494 RQF_AST_OWEUPC);
495 addupc_task(p, p->p_prof.pr_addr,
496 p->p_prof.pr_ticks);
497 }
498 goto out;
499
500 case T_PROTFLT: /* general protection fault */
501 case T_SEGNPFLT: /* segment not present fault */
502 case T_TSSFLT: /* invalid TSS fault */
503 case T_DOUBLEFLT: /* double fault */
504 default:
505 ucode = code + BUS_SEGM_FAULT ;
506 i = SIGBUS;
507 break;
508
509 case T_PAGEFLT: /* page fault */
510 MAKEMPSAFE(have_mplock);
511 i = trap_pfault(frame, TRUE);
0855a2af 512 //kprintf("TRAP_PFAULT %d\n", i);
c8fe38ae
MD
513 if (frame->tf_rip == 0)
514 Debugger("debug");
515 if (i == -1)
516 goto out;
517 if (i == 0)
518 goto out;
519
520 ucode = T_PAGEFLT;
521 break;
522
523 case T_DIVIDE: /* integer divide fault */
524 ucode = FPE_INTDIV;
525 i = SIGFPE;
526 break;
527
528 case T_NMI:
529 MAKEMPSAFE(have_mplock);
530 /* machine/parity/power fail/"kitchen sink" faults */
531 if (isa_nmi(code) == 0) {
532#ifdef DDB
533 /*
534 * NMI can be hooked up to a pushbutton
535 * for debugging.
536 */
537 if (ddb_on_nmi) {
538 kprintf ("NMI ... going to debugger\n");
539 kdb_trap(type, 0, frame);
540 }
541#endif /* DDB */
542 goto out2;
543 } else if (panic_on_nmi)
544 panic("NMI indicates hardware failure");
545 break;
546
547 case T_OFLOW: /* integer overflow fault */
548 ucode = FPE_INTOVF;
549 i = SIGFPE;
550 break;
551
552 case T_BOUND: /* bounds check fault */
553 ucode = FPE_FLTSUB;
554 i = SIGFPE;
555 break;
556
557 case T_DNA:
558 /*
559 * Virtual kernel intercept - pass the DNA exception
560 * to the virtual kernel if it asked to handle it.
561 * This occurs when the virtual kernel is holding
562 * onto the FP context for a different emulated
563 * process then the one currently running.
564 *
565 * We must still call npxdna() since we may have
566 * saved FP state that the virtual kernel needs
567 * to hand over to a different emulated process.
568 */
569 if (lp->lwp_vkernel && lp->lwp_vkernel->ve &&
570 (td->td_pcb->pcb_flags & FP_VIRTFP)
571 ) {
572 npxdna();
573 break;
574 }
575
576 /*
577 * The kernel may have switched out the FP unit's
578 * state, causing the user process to take a fault
579 * when it tries to use the FP unit. Restore the
580 * state here
581 */
582 if (npxdna())
583 goto out;
584 i = SIGFPE;
585 ucode = FPE_FPU_NP_TRAP;
586 break;
587
588 case T_FPOPFLT: /* FPU operand fetch fault */
589 ucode = T_FPOPFLT;
590 i = SIGILL;
591 break;
592
593 case T_XMMFLT: /* SIMD floating-point exception */
594 ucode = 0; /* XXX */
595 i = SIGFPE;
596 break;
597 }
598 } else {
599 /* kernel trap */
600
601 switch (type) {
602 case T_PAGEFLT: /* page fault */
603 MAKEMPSAFE(have_mplock);
604 trap_pfault(frame, FALSE);
605 goto out2;
606
607 case T_DNA:
608 /*
609 * The kernel is apparently using fpu for copying.
610 * XXX this should be fatal unless the kernel has
611 * registered such use.
612 */
613 if (npxdna())
614 goto out2;
615 break;
616
617 case T_STKFLT: /* stack fault */
618 break;
619
620 case T_PROTFLT: /* general protection fault */
621 case T_SEGNPFLT: /* segment not present fault */
622 /*
623 * Invalid segment selectors and out of bounds
624 * %rip's and %rsp's can be set up in user mode.
625 * This causes a fault in kernel mode when the
626 * kernel tries to return to user mode. We want
627 * to get this fault so that we can fix the
628 * problem here and not have to check all the
629 * selectors and pointers when the user changes
630 * them.
631 */
632 kprintf0("trap.c line %d\n", __LINE__);
633 if (mycpu->gd_intr_nesting_level == 0) {
634 if (td->td_pcb->pcb_onfault) {
635 frame->tf_rip = (register_t)
636 td->td_pcb->pcb_onfault;
637 goto out2;
638 }
89ffa1cf
SS
639 if (frame->tf_rip == (long)doreti_iret) {
640 frame->tf_rip = (long)doreti_iret_fault;
641 goto out2;
642 }
c8fe38ae
MD
643 }
644 break;
645
646 case T_TSSFLT:
647 /*
648 * PSL_NT can be set in user mode and isn't cleared
649 * automatically when the kernel is entered. This
650 * causes a TSS fault when the kernel attempts to
651 * `iret' because the TSS link is uninitialized. We
652 * want to get this fault so that we can fix the
653 * problem here and not every time the kernel is
654 * entered.
655 */
656 if (frame->tf_rflags & PSL_NT) {
657 frame->tf_rflags &= ~PSL_NT;
658 goto out2;
659 }
660 break;
661
662 case T_TRCTRAP: /* trace trap */
663#if 0
664 if (frame->tf_rip == (int)IDTVEC(syscall)) {
665 /*
666 * We've just entered system mode via the
667 * syscall lcall. Continue single stepping
668 * silently until the syscall handler has
669 * saved the flags.
670 */
671 goto out2;
672 }
673 if (frame->tf_rip == (int)IDTVEC(syscall) + 1) {
674 /*
675 * The syscall handler has now saved the
676 * flags. Stop single stepping it.
677 */
678 frame->tf_rflags &= ~PSL_T;
679 goto out2;
680 }
681#endif
682
683 /*
684 * Ignore debug register trace traps due to
685 * accesses in the user's address space, which
686 * can happen under several conditions such as
687 * if a user sets a watchpoint on a buffer and
688 * then passes that buffer to a system call.
689 * We still want to get TRCTRAPS for addresses
690 * in kernel space because that is useful when
691 * debugging the kernel.
692 */
693#if JG
694 if (user_dbreg_trap()) {
695 /*
696 * Reset breakpoint bits because the
697 * processor doesn't
698 */
699 /* XXX check upper bits here */
700 load_dr6(rdr6() & 0xfffffff0);
701 goto out2;
702 }
703#endif
704 /*
705 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
706 */
707 case T_BPTFLT:
708 /*
709 * If DDB is enabled, let it handle the debugger trap.
710 * Otherwise, debugger traps "can't happen".
711 */
712#ifdef DDB
713 MAKEMPSAFE(have_mplock);
714 if (kdb_trap(type, 0, frame))
715 goto out2;
716#endif
717 break;
718
719 case T_NMI:
720 MAKEMPSAFE(have_mplock);
721 /* machine/parity/power fail/"kitchen sink" faults */
722#if NISA > 0
723 if (isa_nmi(code) == 0) {
724#ifdef DDB
725 /*
726 * NMI can be hooked up to a pushbutton
727 * for debugging.
728 */
729 if (ddb_on_nmi) {
730 kprintf ("NMI ... going to debugger\n");
731 kdb_trap(type, 0, frame);
732 }
733#endif /* DDB */
734 goto out2;
735 } else if (panic_on_nmi == 0)
736 goto out2;
737 /* FALL THROUGH */
738#endif /* NISA > 0 */
739 }
740 MAKEMPSAFE(have_mplock);
741 trap_fatal(frame, 0);
742 goto out2;
743 }
744
745 /*
746 * Virtual kernel intercept - if the fault is directly related to a
747 * VM context managed by a virtual kernel then let the virtual kernel
748 * handle it.
749 */
750 if (lp->lwp_vkernel && lp->lwp_vkernel->ve) {
751 vkernel_trap(lp, frame);
752 goto out2;
753 }
754
755 /*
756 * Virtual kernel intercept - if the fault is directly related to a
757 * VM context managed by a virtual kernel then let the virtual kernel
758 * handle it.
759 */
760 if (lp->lwp_vkernel && lp->lwp_vkernel->ve) {
761 vkernel_trap(lp, frame);
762 goto out;
763 }
764
765 /*
766 * Translate fault for emulators (e.g. Linux)
767 */
768 if (*p->p_sysent->sv_transtrap)
769 i = (*p->p_sysent->sv_transtrap)(i, type);
770
771 MAKEMPSAFE(have_mplock);
772 trapsignal(lp, i, ucode);
773
774#ifdef DEBUG
775 if (type <= MAX_TRAP_MSG) {
776 uprintf("fatal process exception: %s",
777 trap_msg[type]);
778 if ((type == T_PAGEFLT) || (type == T_PROTFLT))
779 uprintf(", fault VA = 0x%lx", frame->tf_addr);
780 uprintf("\n");
781 }
782#endif
783
784out:
785#ifdef SMP
786 if (ISPL(frame->tf_cs) == SEL_UPL)
787 KASSERT(td->td_mpcount == have_mplock, ("badmpcount trap/end from %p", (void *)frame->tf_rip));
788#endif
789 userret(lp, frame, sticks);
790 userexit(lp);
791out2: ;
792#ifdef SMP
793 if (have_mplock)
794 rel_mplock();
795#endif
796 if (p != NULL && lp != NULL)
0855a2af 797#if JG
c8fe38ae 798 KTR_LOG(kernentry_trap_ret, p->p_pid, lp->lwp_tid);
0855a2af
JG
799#else
800 KTR_LOG_STR(kernentry_trap_ret, "pid=%d, tid=%d", p->p_pid, lp->lwp_tid);
801#endif
c8fe38ae
MD
802#ifdef INVARIANTS
803 KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK),
804 ("syscall: critical section count mismatch! %d/%d",
805 crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT));
806#endif
807}
808
809static int
810trap_pfault(struct trapframe *frame, int usermode)
811{
812 vm_offset_t va;
813 struct vmspace *vm = NULL;
814 vm_map_t map;
815 int rv = 0;
816 vm_prot_t ftype;
817 thread_t td = curthread;
818 struct lwp *lp = td->td_lwp;
819
820 va = trunc_page(frame->tf_addr);
821 if (va >= VM_MIN_KERNEL_ADDRESS) {
822 /*
823 * Don't allow user-mode faults in kernel address space.
824 */
825 if (usermode)
826 goto nogo;
827
828 map = &kernel_map;
829 } else {
830 /*
831 * This is a fault on non-kernel virtual memory.
832 * vm is initialized above to NULL. If curproc is NULL
833 * or curproc->p_vmspace is NULL the fault is fatal.
834 */
835 if (lp != NULL)
836 vm = lp->lwp_vmspace;
837
838 if (vm == NULL)
839 goto nogo;
840
841 map = &vm->vm_map;
842 }
843
844 /*
845 * PGEX_I is defined only if the execute disable bit capability is
846 * supported and enabled.
847 */
848 if (frame->tf_err & PGEX_W)
849 ftype = VM_PROT_WRITE;
850#if JG
851 else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
852 ftype = VM_PROT_EXECUTE;
853#endif
854 else
855 ftype = VM_PROT_READ;
856
857 if (map != &kernel_map) {
858 /*
859 * Keep swapout from messing with us during this
860 * critical time.
861 */
862 PHOLD(lp->lwp_proc);
863
864 /*
865 * Grow the stack if necessary
866 */
867 /* grow_stack returns false only if va falls into
868 * a growable stack region and the stack growth
869 * fails. It returns true if va was not within
870 * a growable stack region, or if the stack
871 * growth succeeded.
872 */
873 if (!grow_stack(lp->lwp_proc, va)) {
874 rv = KERN_FAILURE;
875 PRELE(lp->lwp_proc);
876 goto nogo;
877 }
878
879 /* Fault in the user page: */
880 rv = vm_fault(map, va, ftype,
881 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY
882 : VM_FAULT_NORMAL);
883
884 PRELE(lp->lwp_proc);
885 } else {
886 /*
887 * Don't have to worry about process locking or stacks
888 * in the kernel.
889 */
890 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
891 }
892
893 if (rv == KERN_SUCCESS)
894 return (0);
895nogo:
896 if (!usermode) {
897 if (td->td_gd->gd_intr_nesting_level == 0 &&
898 td->td_pcb->pcb_onfault) {
899 frame->tf_rip = (register_t)td->td_pcb->pcb_onfault;
900 return (0);
901 }
902 trap_fatal(frame, frame->tf_addr);
903 return (-1);
904 }
905
906 /*
907 * NOTE: on amd64 we have a tf_addr field in the trapframe, no
908 * kludge is needed to pass the fault address to signal handlers.
909 */
910
911 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
912}
913
914static void
915trap_fatal(struct trapframe *frame, vm_offset_t eva)
916{
917 int code, ss;
918 u_int type;
919 long rsp;
920 struct soft_segment_descriptor softseg;
921 char *msg;
922
923 code = frame->tf_err;
924 type = frame->tf_trapno;
925 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)], &softseg);
926
927 if (type <= MAX_TRAP_MSG)
928 msg = trap_msg[type];
929 else
930 msg = "UNKNOWN";
931 kprintf("\n\nFatal trap %d: %s while in %s mode\n", type, msg,
932 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
933#ifdef SMP
934 /* two separate prints in case of a trap on an unmapped page */
935 kprintf("cpuid = %d; ", PCPU_GET(cpuid));
936 kprintf("apic id = %02x\n", PCPU_GET(apic_id));
937#endif
938 if (type == T_PAGEFLT) {
939 kprintf("fault virtual address = 0x%lx\n", eva);
940 kprintf("fault code = %s %s %s, %s\n",
941 code & PGEX_U ? "user" : "supervisor",
942 code & PGEX_W ? "write" : "read",
943 code & PGEX_I ? "instruction" : "data",
944 code & PGEX_P ? "protection violation" : "page not present");
945 }
946 kprintf("instruction pointer = 0x%lx:0x%lx\n",
947 frame->tf_cs & 0xffff, frame->tf_rip);
948 if (ISPL(frame->tf_cs) == SEL_UPL) {
949 ss = frame->tf_ss & 0xffff;
950 rsp = frame->tf_rsp;
951 } else {
952 ss = GSEL(GDATA_SEL, SEL_KPL);
953 rsp = (long)&frame->tf_rsp;
954 }
955 kprintf("stack pointer = 0x%x:0x%lx\n", ss, rsp);
956 kprintf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp);
957 kprintf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n",
958 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
959 kprintf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n",
960 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32,
961 softseg.ssd_gran);
962 kprintf("processor eflags = ");
963 if (frame->tf_rflags & PSL_T)
964 kprintf("trace trap, ");
965 if (frame->tf_rflags & PSL_I)
966 kprintf("interrupt enabled, ");
967 if (frame->tf_rflags & PSL_NT)
968 kprintf("nested task, ");
969 if (frame->tf_rflags & PSL_RF)
970 kprintf("resume, ");
971 kprintf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12);
972 kprintf("current process = ");
973 if (curproc) {
974 kprintf("%lu\n",
975 (u_long)curproc->p_pid);
976 } else {
977 kprintf("Idle\n");
978 }
979 kprintf("current thread = pri %d ", curthread->td_pri);
980 if (curthread->td_pri >= TDPRI_CRIT)
981 kprintf("(CRIT)");
982 kprintf("\n");
983
984#ifdef DDB
985 if ((debugger_on_panic || db_active) && kdb_trap(type, code, frame))
986 return;
987#endif
988 kprintf("trap number = %d\n", type);
989 if (type <= MAX_TRAP_MSG)
990 panic("%s", trap_msg[type]);
991 else
992 panic("unknown/reserved trap");
993}
994
995/*
996 * Double fault handler. Called when a fault occurs while writing
997 * a frame for a trap/exception onto the stack. This usually occurs
998 * when the stack overflows (such is the case with infinite recursion,
999 * for example).
1000 */
1001void
1002dblfault_handler(struct trapframe *frame)
1003{
1004 kprintf0("DOUBLE FAULT\n");
1005 kprintf("\nFatal double fault\n");
1006 kprintf("rip = 0x%lx\n", frame->tf_rip);
1007 kprintf("rsp = 0x%lx\n", frame->tf_rsp);
1008 kprintf("rbp = 0x%lx\n", frame->tf_rbp);
1009#ifdef SMP
1010 /* two separate prints in case of a trap on an unmapped page */
1011 kprintf("cpuid = %d; ", PCPU_GET(cpuid));
1012 kprintf("apic id = %02x\n", PCPU_GET(apic_id));
1013#endif
1014 panic("double fault");
1015}
1016
1017/*
1018 * syscall2 - MP aware system call request C handler
1019 *
1020 * A system call is essentially treated as a trap except that the
1021 * MP lock is not held on entry or return. We are responsible for
1022 * obtaining the MP lock if necessary and for handling ASTs
1023 * (e.g. a task switch) prior to return.
1024 *
1025 * In general, only simple access and manipulation of curproc and
1026 * the current stack is allowed without having to hold MP lock.
1027 *
1028 * MPSAFE - note that large sections of this routine are run without
1029 * the MP lock.
1030 */
1031void
1032syscall2(struct trapframe *frame)
1033{
1034 struct thread *td = curthread;
1035 struct proc *p = td->td_proc;
1036 struct lwp *lp = td->td_lwp;
1037 caddr_t params;
1038 struct sysent *callp;
1039 register_t orig_tf_rflags;
1040 int sticks;
1041 int error;
1042 int narg;
1043#ifdef INVARIANTS
1044 int crit_count = td->td_pri & ~TDPRI_MASK;
1045#endif
1046#ifdef SMP
1047 int have_mplock = 0;
1048#endif
1049 register_t *argp;
1050 u_int code;
1051 int reg, regcnt;
1052 union sysunion args;
1053 register_t *argsdst;
1054 kprintf0("SYSCALL rip = %016llx\n", frame->tf_rip);
1055
1056 PCPU_INC(cnt.v_syscall);
1057
1058 kprintf0("\033[31mSYSCALL %ld\033[39m\n", frame->tf_rax);
1059#ifdef DIAGNOSTIC
1060 if (ISPL(frame->tf_cs) != SEL_UPL) {
1061 get_mplock();
1062 panic("syscall");
1063 /* NOT REACHED */
1064 }
1065#endif
1066
0855a2af 1067#if JG
c8fe38ae
MD
1068 KTR_LOG(kernentry_syscall, p->p_pid, lp->lwp_tid,
1069 frame->tf_eax);
0855a2af
JG
1070#else
1071 KTR_LOG_STR(kernentry_syscall, "pid=%d, tid=%d, call=%ld", p->p_pid, lp->lwp_tid,
1072 frame->tf_rax);
1073#endif
c8fe38ae
MD
1074
1075#ifdef SMP
1076 KASSERT(td->td_mpcount == 0, ("badmpcount syscall2 from %p", (void *)frame->tf_eip));
1077 if (syscall_mpsafe == 0)
1078 MAKEMPSAFE(have_mplock);
1079#endif
1080 userenter(td); /* lazy raise our priority */
1081
1082 reg = 0;
1083 regcnt = 6;
1084 /*
1085 * Misc
1086 */
1087 sticks = (int)td->td_sticks;
1088 orig_tf_rflags = frame->tf_rflags;
1089
1090 /*
1091 * Virtual kernel intercept - if a VM context managed by a virtual
1092 * kernel issues a system call the virtual kernel handles it, not us.
1093 * Restore the virtual kernel context and return from its system
1094 * call. The current frame is copied out to the virtual kernel.
1095 */
1096 if (lp->lwp_vkernel && lp->lwp_vkernel->ve) {
1097 error = vkernel_trap(lp, frame);
1098 frame->tf_rax = error;
1099 if (error)
1100 frame->tf_rflags |= PSL_C;
1101 error = EJUSTRETURN;
1102 goto out;
1103 }
1104
1105 /*
1106 * Get the system call parameters and account for time
1107 */
1108 lp->lwp_md.md_regs = frame;
1109 params = (caddr_t)frame->tf_rsp + sizeof(register_t);
1110 code = frame->tf_rax;
1111
1112 if (p->p_sysent->sv_prepsyscall) {
1113 (*p->p_sysent->sv_prepsyscall)(
1114 frame, (int *)(&args.nosys.sysmsg + 1),
1115 &code, &params);
1116 } else {
1117 if (code == SYS_syscall || code == SYS___syscall) {
1118 code = frame->tf_rdi;
1119 reg++;
1120 regcnt--;
1121 }
1122 }
1123
1124 if (p->p_sysent->sv_mask)
1125 code &= p->p_sysent->sv_mask;
1126
1127 if (code >= p->p_sysent->sv_size)
1128 callp = &p->p_sysent->sv_table[0];
1129 else
1130 callp = &p->p_sysent->sv_table[code];
1131
1132 narg = callp->sy_narg & SYF_ARGMASK;
1133
1134 /*
1135 * On amd64 we get up to six arguments in registers. The rest are
1136 * on the stack. The first six members of 'struct trampframe' happen
1137 * to be the registers used to pass arguments, in exactly the right
1138 * order.
1139 */
1140 argp = &frame->tf_rdi;
1141 argp += reg;
1142 argsdst = (register_t *)(&args.nosys.sysmsg + 1);
1143 /*
1144 * JG can we overflow the space pointed to by 'argsdst'
1145 * either with 'bcopy' or with 'copyin'?
1146 */
1147 bcopy(argp, argsdst, sizeof(register_t) * regcnt);
1148 /*
1149 * copyin is MP aware, but the tracing code is not
1150 */
1151 if (narg > regcnt) {
1152 KASSERT(params != NULL, ("copyin args with no params!"));
1153 error = copyin(params, &argsdst[regcnt],
1154 (narg - regcnt) * sizeof(register_t));
1155 if (error) {
1156#ifdef KTRACE
1157 if (KTRPOINT(td, KTR_SYSCALL)) {
1158 MAKEMPSAFE(have_mplock);
1159
1160 ktrsyscall(lp, code, narg,
1161 (void *)(&args.nosys.sysmsg + 1));
1162 }
1163#endif
1164 goto bad;
1165 }
1166 }
1167
1168#ifdef KTRACE
1169 if (KTRPOINT(td, KTR_SYSCALL)) {
1170 MAKEMPSAFE(have_mplock);
1171 ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1));
1172 }
1173#endif
1174
1175 /*
1176 * Default return value is 0 (will be copied to %rax). Double-value
1177 * returns use %rax and %rdx. %rdx is left unchanged for system
1178 * calls which return only one result.
1179 */
1180 args.sysmsg_fds[0] = 0;
1181 args.sysmsg_fds[1] = frame->tf_rdx;
1182
1183 /*
1184 * The syscall might manipulate the trap frame. If it does it
1185 * will probably return EJUSTRETURN.
1186 */
1187 args.sysmsg_frame = frame;
1188
1189 STOPEVENT(p, S_SCE, narg); /* MP aware */
1190
1191#ifdef SMP
1192 /*
1193 * Try to run the syscall without the MP lock if the syscall
1194 * is MP safe. We have to obtain the MP lock no matter what if
1195 * we are ktracing
1196 */
1197 if ((callp->sy_narg & SYF_MPSAFE) == 0)
1198 MAKEMPSAFE(have_mplock);
1199#endif
1200
1201 error = (*callp->sy_call)(&args);
1202
1203out:
1204 /*
1205 * MP SAFE (we may or may not have the MP lock at this point)
1206 */
0855a2af 1207 //kprintf("SYSMSG %d ", error);
c8fe38ae
MD
1208 switch (error) {
1209 case 0:
1210 /*
1211 * Reinitialize proc pointer `p' as it may be different
1212 * if this is a child returning from fork syscall.
1213 */
1214 p = curproc;
1215 lp = curthread->td_lwp;
1216 frame->tf_rax = args.sysmsg_fds[0];
1217 frame->tf_rdx = args.sysmsg_fds[1];
1218 kprintf0("RESULT %lld %lld\n", frame->tf_rax, frame->tf_rdx);
1219 frame->tf_rflags &= ~PSL_C;
1220 break;
1221 case ERESTART:
1222 /*
1223 * Reconstruct pc, we know that 'syscall' is 2 bytes.
1224 * We have to do a full context restore so that %r10
1225 * (which was holding the value of %rcx) is restored for
1226 * the next iteration.
1227 */
1228 frame->tf_rip -= frame->tf_err;
1229 frame->tf_r10 = frame->tf_rcx;
1230 td->td_pcb->pcb_flags |= PCB_FULLCTX;
1231 break;
1232 case EJUSTRETURN:
1233 break;
1234 case EASYNC:
1235 panic("Unexpected EASYNC return value (for now)");
1236 default:
1237bad:
1238 if (p->p_sysent->sv_errsize) {
1239 if (error >= p->p_sysent->sv_errsize)
1240 error = -1; /* XXX */
1241 else
1242 error = p->p_sysent->sv_errtbl[error];
1243 }
1244 kprintf0("ERROR %d\n", error);
1245 frame->tf_rax = error;
1246 frame->tf_rflags |= PSL_C;
1247 break;
1248 }
1249
1250 /*
1251 * Traced syscall. trapsignal() is not MP aware.
1252 */
1253 if (orig_tf_rflags & PSL_T) {
1254 MAKEMPSAFE(have_mplock);
1255 frame->tf_rflags &= ~PSL_T;
1256 trapsignal(lp, SIGTRAP, 0);
1257 }
1258
1259 /*
1260 * Handle reschedule and other end-of-syscall issues
1261 */
1262 userret(lp, frame, sticks);
1263
1264#ifdef KTRACE
1265 if (KTRPOINT(td, KTR_SYSRET)) {
1266 MAKEMPSAFE(have_mplock);
1267 ktrsysret(lp, code, error, args.sysmsg_result);
1268 }
1269#endif
1270
1271 /*
1272 * This works because errno is findable through the
1273 * register set. If we ever support an emulation where this
1274 * is not the case, this code will need to be revisited.
1275 */
1276 STOPEVENT(p, S_SCX, code);
1277
1278 userexit(lp);
1279#ifdef SMP
1280 /*
1281 * Release the MP lock if we had to get it
1282 */
1283 KASSERT(td->td_mpcount == have_mplock,
1284 ("badmpcount syscall2/end from %p", (void *)frame->tf_eip));
1285 if (have_mplock)
1286 rel_mplock();
1287#endif
0855a2af 1288#if JG
c8fe38ae 1289 KTR_LOG(kernentry_syscall_ret, p->p_pid, lp->lwp_tid, error);
0855a2af
JG
1290#else
1291 KTR_LOG_STR(kernentry_syscall_ret, "pid=%d, tid=%d, err=%d", p->p_pid, lp->lwp_tid, error);
1292#endif
c8fe38ae
MD
1293#ifdef INVARIANTS
1294 KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK),
1295 ("syscall: critical section count mismatch! %d/%d",
1296 crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT));
1297#endif
1298}
d7f50089
YY
1299
1300void
1301fork_return(struct lwp *lp, struct trapframe *frame)
1302{
c8fe38ae
MD
1303 kprintf0("fork return\n");
1304 frame->tf_rax = 0; /* Child returns zero */
1305 frame->tf_rflags &= ~PSL_C; /* success */
1306 frame->tf_rdx = 1;
1307
1308 generic_lwp_return(lp, frame);
0855a2af 1309#if JG
c8fe38ae 1310 KTR_LOG(kernentry_fork_ret, lp->lwp_proc->p_pid, lp->lwp_tid);
0855a2af
JG
1311#else
1312 KTR_LOG_STR(kernentry_fork_ret, "pid=%d, tid=%d", lp->lwp_proc->p_pid, lp->lwp_tid);
1313#endif
d7f50089
YY
1314}
1315
1316/*
1317 * Simplified back end of syscall(), used when returning from fork()
c8fe38ae
MD
1318 * directly into user mode. MP lock is held on entry and should be
1319 * released on return. This code will return back into the fork
d7f50089
YY
1320 * trampoline code which then runs doreti.
1321 */
1322void
1323generic_lwp_return(struct lwp *lp, struct trapframe *frame)
1324{
c8fe38ae
MD
1325 kprintf0("generic_lwp_return\n");
1326 struct proc *p = lp->lwp_proc;
1327
1328 /*
1329 * Newly forked processes are given a kernel priority. We have to
1330 * adjust the priority to a normal user priority and fake entry
1331 * into the kernel (call userenter()) to install a passive release
1332 * function just in case userret() decides to stop the process. This
1333 * can occur when ^Z races a fork. If we do not install the passive
1334 * release function the current process designation will not be
1335 * released when the thread goes to sleep.
1336 */
1337 lwkt_setpri_self(TDPRI_USER_NORM);
1338 userenter(lp->lwp_thread);
1339 userret(lp, frame, 0);
1340#ifdef KTRACE
1341 if (KTRPOINT(lp->lwp_thread, KTR_SYSRET))
1342 ktrsysret(lp, SYS_fork, 0, 0);
1343#endif
1344 p->p_flag |= P_PASSIVE_ACQ;
1345 userexit(lp);
1346 p->p_flag &= ~P_PASSIVE_ACQ;
1347#ifdef SMP
1348 KKASSERT(lp->lwp_thread->td_mpcount == 1);
1349 rel_mplock();
1350#endif
d7f50089
YY
1351}
1352
1353/*
1354 * If PGEX_FPFAULT is set then set FP_VIRTFP in the PCB to force a T_DNA
1355 * fault (which is then passed back to the virtual kernel) if an attempt is
1356 * made to use the FP unit.
c8fe38ae 1357 *
d7f50089
YY
1358 * XXX this is a fairly big hack.
1359 */
1360void
1361set_vkernel_fp(struct trapframe *frame)
1362{
c8fe38ae 1363 /* JGXXX */
d7f50089 1364}