2 * Copyright (c) 1994-1996 Søren Schmidt
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * $FreeBSD: src/sys/i386/linux/linux_sysvec.c,v 1.55.2.9 2002/01/12 11:03:30 bde Exp $
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/imgact.h>
41 #include <sys/imgact_aout.h>
42 #include <sys/imgact_elf.h>
43 #include <sys/kern_syscall.h>
45 #include <sys/malloc.h>
47 #include <sys/signalvar.h>
48 #include <sys/sysent.h>
49 #include <sys/sysproto.h>
50 #include <sys/eventhandler.h>
53 #include <vm/vm_param.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_extern.h>
57 #include <sys/kernel.h>
58 #include <sys/module.h>
59 #include <machine/cpu.h>
62 #include "linux_proto.h"
63 #include "../linux_signal.h"
64 #include "../linux_util.h"
65 #include "../linux_futex.h"
66 #include "../linux_emuldata.h"
68 MODULE_VERSION(linux, 1);
70 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
72 #if BYTE_ORDER == LITTLE_ENDIAN
73 #define SHELLMAGIC 0x2123 /* #! */
75 #define SHELLMAGIC 0x2321
79 * Allow the sendsig functions to use the ldebug() facility
80 * even though they are not syscalls themselves. Map them
81 * to syscall 0. This is slightly less bogus than using
84 #define LINUX_SYS_linux_rt_sendsig 0
85 #define LINUX_SYS_linux_sendsig 0
87 extern char linux_sigcode[];
88 extern int linux_szsigcode;
90 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
92 static int linux_fixup (register_t **stack_base,
93 struct image_params *iparams);
94 static int elf_linux_fixup (register_t **stack_base,
95 struct image_params *iparams);
96 static void linux_prepsyscall (struct trapframe *tf, int *args,
97 u_int *code, caddr_t *params);
98 static void linux_sendsig (sig_t catcher, int sig, sigset_t *mask,
100 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
102 static eventhandler_tag linux_exec_tag;
103 static eventhandler_tag linux_exit_tag;
106 * Linux syscalls return negative errno's, we do positive and map them
108 static int bsd_to_linux_errno[ELAST + 1] = {
109 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
110 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
111 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
112 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
113 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
114 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
115 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
116 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
117 -6, -6, -43, -42, -75, -6, -84
120 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
121 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
122 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
123 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
124 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
125 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
126 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
127 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
128 0, LINUX_SIGUSR1, LINUX_SIGUSR2
131 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
132 SIGHUP, SIGINT, SIGQUIT, SIGILL,
133 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
134 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
135 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
136 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
137 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
138 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
139 SIGIO, SIGURG, SIGSYS
142 #define LINUX_T_UNKNOWN 255
143 static int _bsd_to_linux_trapcode[] = {
144 LINUX_T_UNKNOWN, /* 0 */
145 6, /* 1 T_PRIVINFLT */
146 LINUX_T_UNKNOWN, /* 2 */
148 LINUX_T_UNKNOWN, /* 4 */
149 LINUX_T_UNKNOWN, /* 5 */
150 16, /* 6 T_ARITHTRAP */
151 254, /* 7 T_ASTFLT */
152 LINUX_T_UNKNOWN, /* 8 */
153 13, /* 9 T_PROTFLT */
154 1, /* 10 T_TRCTRAP */
155 LINUX_T_UNKNOWN, /* 11 */
156 14, /* 12 T_PAGEFLT */
157 LINUX_T_UNKNOWN, /* 13 */
158 17, /* 14 T_ALIGNFLT */
159 LINUX_T_UNKNOWN, /* 15 */
160 LINUX_T_UNKNOWN, /* 16 */
161 LINUX_T_UNKNOWN, /* 17 */
167 8, /* 23 T_DOUBLEFLT */
168 9, /* 24 T_FPOPFLT */
169 10, /* 25 T_TSSFLT */
170 11, /* 26 T_SEGNPFLT */
171 12, /* 27 T_STKFLT */
173 19, /* 29 T_XMMFLT */
174 15 /* 30 T_RESERVED */
176 #define bsd_to_linux_trapcode(code) \
177 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
178 _bsd_to_linux_trapcode[(code)]: \
182 * If FreeBSD & Linux have a difference of opinion about what a trap
183 * means, deal with it here.
186 translate_traps(int signal, int trap_code)
188 if (signal != SIGBUS)
202 linux_fixup(register_t **stack_base, struct image_params *imgp)
204 register_t *argv, *envp;
207 envp = *stack_base + (imgp->args->argc + 1);
209 **stack_base = (intptr_t)(void *)envp;
211 **stack_base = (intptr_t)(void *)argv;
213 **stack_base = imgp->args->argc;
218 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
220 Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
223 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
225 if (args->execfd != -1) {
226 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
228 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
229 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
230 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
231 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
232 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
233 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
234 AUXARGS_ENTRY(pos, AT_BASE, args->base);
235 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
236 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
237 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
238 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
239 AUXARGS_ENTRY(pos, AT_NULL, 0);
241 kfree(imgp->auxargs, M_TEMP);
242 imgp->auxargs = NULL;
245 **stack_base = (long)imgp->args->argc;
249 extern int _ucodesel, _udatasel;
250 extern unsigned long linux_sznonrtsigcode;
253 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
255 struct proc *p = curproc;
256 struct lwp *lp = curthread->td_lwp;
257 struct trapframe *regs;
258 struct l_rt_sigframe *fp, frame;
261 regs = lp->lwp_md.md_regs;
262 oonstack = lp->lwp_sigstk.ss_flags & SS_ONSTACK;
265 if (ldebug(rt_sendsig))
266 kprintf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
267 catcher, sig, (void*)mask, code);
270 * Allocate space for the signal handler context.
272 if ((lp->lwp_flags & LWP_ALTSTACK) && !oonstack &&
273 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
274 fp = (struct l_rt_sigframe *)(lp->lwp_sigstk.ss_sp +
275 lp->lwp_sigstk.ss_size - sizeof(struct l_rt_sigframe));
276 lp->lwp_sigstk.ss_flags |= SS_ONSTACK;
278 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
281 * grow() will return FALSE if the fp will not fit inside the stack
282 * and the stack can not be grown. useracc will return FALSE
283 * if access is denied.
285 if ((vm_map_growstack(p, (vm_offset_t)fp) != KERN_SUCCESS) ||
286 !useracc((caddr_t)fp, sizeof (struct l_rt_sigframe),
289 * Process has trashed its stack; give it an illegal
290 * instruction to halt it in its tracks.
292 SIGACTION(p, SIGILL) = SIG_DFL;
293 SIGDELSET(p->p_sigignore, SIGILL);
294 SIGDELSET(p->p_sigcatch, SIGILL);
295 SIGDELSET(lp->lwp_sigmask, SIGILL);
297 if (ldebug(rt_sendsig))
298 kprintf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
301 lwpsignal(p, lp, SIGILL);
306 * Build the argument list for the signal handler.
308 if (p->p_sysent->sv_sigtbl)
309 if (sig <= p->p_sysent->sv_sigsize)
310 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
312 frame.sf_handler = catcher;
314 frame.sf_siginfo = &fp->sf_si;
315 frame.sf_ucontext = &fp->sf_sc;
317 /* Fill siginfo structure. */
318 frame.sf_si.lsi_signo = sig;
319 frame.sf_si.lsi_code = code;
320 frame.sf_si.lsi_addr = (void *)regs->tf_err;
323 * Build the signal context to be used by sigreturn.
325 frame.sf_sc.uc_flags = 0; /* XXX ??? */
326 frame.sf_sc.uc_link = NULL; /* XXX ??? */
328 frame.sf_sc.uc_stack.ss_sp = lp->lwp_sigstk.ss_sp;
329 frame.sf_sc.uc_stack.ss_size = lp->lwp_sigstk.ss_size;
330 frame.sf_sc.uc_stack.ss_flags = (lp->lwp_flags & LWP_ALTSTACK)
331 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
333 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
335 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
336 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs;
337 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
338 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
339 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
340 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
341 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
342 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
343 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
344 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
345 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
346 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
347 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
348 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
349 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
350 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
351 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
352 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
353 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
356 if (ldebug(rt_sendsig))
357 kprintf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
358 frame.sf_sc.uc_stack.ss_flags, lp->lwp_sigstk.ss_sp,
359 lp->lwp_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
362 if (copyout(&frame, fp, sizeof(frame)) != 0) {
364 * Process has trashed its stack; give it an illegal
365 * instruction to halt it in its tracks.
372 * Build context to run handler in.
374 regs->tf_esp = (int)fp;
375 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
376 linux_sznonrtsigcode;
379 * i386 abi specifies that the direction flag must be cleared
382 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
384 regs->tf_cs = _ucodesel;
385 regs->tf_ds = _udatasel;
386 regs->tf_es = _udatasel;
387 /* allow %fs and %gs to be inherited by the signal handler */
389 regs->tf_fs = _udatasel;
390 regs->tf_gs = _udatasel;
392 regs->tf_ss = _udatasel;
398 * Send an interrupt to process.
400 * Stack is set up to allow sigcode stored
401 * in u. to call routine, followed by kcall
402 * to sigreturn routine below. After sigreturn
403 * resets the signal mask, the stack, and the
404 * frame pointer, it returns to the user
409 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
411 struct proc *p = curproc;
412 struct lwp *lp = curthread->td_lwp;
413 struct trapframe *regs;
414 struct l_sigframe *fp, frame;
418 if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
419 /* Signal handler installed with SA_SIGINFO. */
420 linux_rt_sendsig(catcher, sig, mask, code);
424 regs = lp->lwp_md.md_regs;
425 oonstack = lp->lwp_sigstk.ss_flags & SS_ONSTACK;
429 kprintf(ARGS(sendsig, "%p, %d, %p, %lu"),
430 catcher, sig, (void*)mask, code);
434 * Allocate space for the signal handler context.
436 if ((lp->lwp_flags & LWP_ALTSTACK) && !oonstack &&
437 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
438 fp = (struct l_sigframe *)(lp->lwp_sigstk.ss_sp +
439 lp->lwp_sigstk.ss_size - sizeof(struct l_sigframe));
440 lp->lwp_sigstk.ss_flags |= SS_ONSTACK;
442 fp = (struct l_sigframe *)regs->tf_esp - 1;
445 * grow() will return FALSE if the fp will not fit inside the stack
446 * and the stack can not be grown. useracc will return FALSE
447 * if access is denied.
449 if ((vm_map_growstack(p, (vm_offset_t)fp) != KERN_SUCCESS) ||
450 !useracc((caddr_t)fp, sizeof (struct l_sigframe),
453 * Process has trashed its stack; give it an illegal
454 * instruction to halt it in its tracks.
456 SIGACTION(p, SIGILL) = SIG_DFL;
457 SIGDELSET(p->p_sigignore, SIGILL);
458 SIGDELSET(p->p_sigcatch, SIGILL);
459 SIGDELSET(lp->lwp_sigmask, SIGILL);
460 lwpsignal(p, lp, SIGILL);
465 * Build the argument list for the signal handler.
467 if (p->p_sysent->sv_sigtbl)
468 if (sig <= p->p_sysent->sv_sigsize)
469 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
471 frame.sf_handler = catcher;
474 bsd_to_linux_sigset(mask, &lmask);
477 * Build the signal context to be used by sigreturn.
479 frame.sf_sc.sc_mask = lmask.__bits[0];
480 frame.sf_sc.sc_gs = regs->tf_gs;
481 frame.sf_sc.sc_fs = regs->tf_fs;
482 frame.sf_sc.sc_es = regs->tf_es;
483 frame.sf_sc.sc_ds = regs->tf_ds;
484 frame.sf_sc.sc_edi = regs->tf_edi;
485 frame.sf_sc.sc_esi = regs->tf_esi;
486 frame.sf_sc.sc_ebp = regs->tf_ebp;
487 frame.sf_sc.sc_ebx = regs->tf_ebx;
488 frame.sf_sc.sc_edx = regs->tf_edx;
489 frame.sf_sc.sc_ecx = regs->tf_ecx;
490 frame.sf_sc.sc_eax = regs->tf_eax;
491 frame.sf_sc.sc_eip = regs->tf_eip;
492 frame.sf_sc.sc_cs = regs->tf_cs;
493 frame.sf_sc.sc_eflags = regs->tf_eflags;
494 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
495 frame.sf_sc.sc_ss = regs->tf_ss;
496 frame.sf_sc.sc_err = regs->tf_err;
497 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
499 bzero(&frame.sf_fpstate, sizeof(struct l_fpstate));
501 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
502 frame.sf_extramask[i] = lmask.__bits[i+1];
504 if (copyout(&frame, fp, sizeof(frame)) != 0) {
506 * Process has trashed its stack; give it an illegal
507 * instruction to halt it in its tracks.
514 * Build context to run handler in.
516 regs->tf_esp = (int)fp;
517 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
520 * i386 abi specifies that the direction flag must be cleared
523 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
525 regs->tf_cs = _ucodesel;
526 regs->tf_ds = _udatasel;
527 regs->tf_es = _udatasel;
528 /* Allow %fs and %gs to be inherited by the signal handler */
530 regs->tf_fs = _udatasel;
531 regs->tf_gs = _udatasel;
533 regs->tf_ss = _udatasel;
538 * System call to cleanup state after a signal
539 * has been taken. Reset signal mask and
540 * stack state from context left by sendsig (above).
541 * Return to previous pc and psl as specified by
542 * context left by sendsig. Check carefully to
543 * make sure that the user has not modified the
544 * psl to gain improper privileges or to cause
550 sys_linux_sigreturn(struct linux_sigreturn_args *args)
552 struct lwp *lp = curthread->td_lwp;
553 struct l_sigframe frame;
554 struct trapframe *regs;
558 regs = lp->lwp_md.md_regs;
561 if (ldebug(sigreturn))
562 kprintf(ARGS(sigreturn, "%p"), (void *)args->sfp);
565 * The trampoline code hands us the sigframe.
566 * It is unsafe to keep track of it ourselves, in the event that a
567 * program jumps out of a signal handler.
569 if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
573 * Check for security violations.
575 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
576 eflags = frame.sf_sc.sc_eflags;
578 * XXX do allow users to change the privileged flag PSL_RF. The
579 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
580 * sometimes set it there too. tf_eflags is kept in the signal
581 * context during signal handling and there is no other place
582 * to remember it, so the PSL_RF bit may be corrupted by the
583 * signal handler without us knowing. Corruption of the PSL_RF
584 * bit at worst causes one more or one less debugger trap, so
585 * allowing it is fairly harmless.
587 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
592 * Don't allow users to load a valid privileged %cs. Let the
593 * hardware check for invalid selectors, excess privilege in
594 * other selectors, invalid %eip's and invalid %esp's.
596 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
597 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
598 trapsignal(lp, SIGBUS, T_PROTFLT);
602 lp->lwp_sigstk.ss_flags &= ~SS_ONSTACK;
603 lmask.__bits[0] = frame.sf_sc.sc_mask;
604 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
605 lmask.__bits[i+1] = frame.sf_extramask[i];
606 linux_to_bsd_sigset(&lmask, &lp->lwp_sigmask);
607 SIG_CANTMASK(lp->lwp_sigmask);
610 * Restore signal context.
612 /* %gs was restored by the trampoline. */
613 regs->tf_fs = frame.sf_sc.sc_fs;
614 regs->tf_es = frame.sf_sc.sc_es;
615 regs->tf_ds = frame.sf_sc.sc_ds;
616 regs->tf_edi = frame.sf_sc.sc_edi;
617 regs->tf_esi = frame.sf_sc.sc_esi;
618 regs->tf_ebp = frame.sf_sc.sc_ebp;
619 regs->tf_ebx = frame.sf_sc.sc_ebx;
620 regs->tf_edx = frame.sf_sc.sc_edx;
621 regs->tf_ecx = frame.sf_sc.sc_ecx;
622 regs->tf_eax = frame.sf_sc.sc_eax;
623 regs->tf_eip = frame.sf_sc.sc_eip;
624 regs->tf_cs = frame.sf_sc.sc_cs;
625 regs->tf_eflags = eflags;
626 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
627 regs->tf_ss = frame.sf_sc.sc_ss;
630 return (EJUSTRETURN);
634 * System call to cleanup state after a signal
635 * has been taken. Reset signal mask and
636 * stack state from context left by rt_sendsig (above).
637 * Return to previous pc and psl as specified by
638 * context left by sendsig. Check carefully to
639 * make sure that the user has not modified the
640 * psl to gain improper privileges or to cause
646 sys_linux_rt_sigreturn(struct linux_rt_sigreturn_args *args)
648 struct lwp *lp = curthread->td_lwp;
649 struct l_ucontext uc;
650 struct l_sigcontext *context;
653 struct trapframe *regs;
656 regs = lp->lwp_md.md_regs;
659 if (ldebug(rt_sigreturn))
660 kprintf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
663 * The trampoline code hands us the ucontext.
664 * It is unsafe to keep track of it ourselves, in the event that a
665 * program jumps out of a signal handler.
667 if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
670 context = &uc.uc_mcontext;
673 * Check for security violations.
675 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
676 eflags = context->sc_eflags;
678 * XXX do allow users to change the privileged flag PSL_RF. The
679 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
680 * sometimes set it there too. tf_eflags is kept in the signal
681 * context during signal handling and there is no other place
682 * to remember it, so the PSL_RF bit may be corrupted by the
683 * signal handler without us knowing. Corruption of the PSL_RF
684 * bit at worst causes one more or one less debugger trap, so
685 * allowing it is fairly harmless.
687 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
692 * Don't allow users to load a valid privileged %cs. Let the
693 * hardware check for invalid selectors, excess privilege in
694 * other selectors, invalid %eip's and invalid %esp's.
696 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
697 if (!CS_SECURE(context->sc_cs)) {
698 trapsignal(lp, SIGBUS, T_PROTFLT);
702 lp->lwp_sigstk.ss_flags &= ~SS_ONSTACK;
703 linux_to_bsd_sigset(&uc.uc_sigmask, &lp->lwp_sigmask);
704 SIG_CANTMASK(lp->lwp_sigmask);
707 * Restore signal context
709 /* %gs was restored by the trampoline. */
710 regs->tf_fs = context->sc_fs;
711 regs->tf_es = context->sc_es;
712 regs->tf_ds = context->sc_ds;
713 regs->tf_edi = context->sc_edi;
714 regs->tf_esi = context->sc_esi;
715 regs->tf_ebp = context->sc_ebp;
716 regs->tf_ebx = context->sc_ebx;
717 regs->tf_edx = context->sc_edx;
718 regs->tf_ecx = context->sc_ecx;
719 regs->tf_eax = context->sc_eax;
720 regs->tf_eip = context->sc_eip;
721 regs->tf_cs = context->sc_cs;
722 regs->tf_eflags = eflags;
723 regs->tf_esp = context->sc_esp_at_signal;
724 regs->tf_ss = context->sc_ss;
727 * call sigaltstack & ignore results..
730 ss.ss_sp = lss->ss_sp;
731 ss.ss_size = lss->ss_size;
732 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
735 if (ldebug(rt_sigreturn))
736 kprintf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
737 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
739 kern_sigaltstack(&ss, NULL);
742 return (EJUSTRETURN);
751 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
753 args[0] = tf->tf_ebx;
754 args[1] = tf->tf_ecx;
755 args[2] = tf->tf_edx;
756 args[3] = tf->tf_esi;
757 args[4] = tf->tf_edi;
758 args[5] = tf->tf_ebp;
759 *params = NULL; /* no copyin */
763 * If a linux binary is exec'ing something, try this image activator
764 * first. We override standard shell script execution in order to
765 * be able to modify the interpreter path. We only do this if a linux
766 * binary is doing the exec, so we do not create an EXEC module for it.
768 static int exec_linux_imgact_try (struct image_params *iparams);
771 exec_linux_imgact_try(struct image_params *imgp)
773 const char *head = (const char *)imgp->image_header;
777 * The interpreter for shell scripts run from a linux binary needs
778 * to be located in /compat/linux if possible in order to recursively
779 * maintain linux path emulation.
781 if (((const short *)head)[0] == SHELLMAGIC) {
783 * Run our normal shell image activator. If it succeeds attempt
784 * to use the alternate path for the interpreter. If an alternate
785 * path is found, use our stringspace to store it.
787 if ((error = exec_shell_imgact(imgp)) == 0) {
788 linux_translate_path(imgp->interpreter_name,
795 struct sysentvec linux_sysvec = {
796 .sv_size = LINUX_SYS_MAXSYSCALL,
797 .sv_table = linux_sysent,
798 .sv_mask = 0xffffffff,
799 .sv_sigsize = LINUX_SIGTBLSZ,
800 .sv_sigtbl = bsd_to_linux_signal,
801 .sv_errsize = ELAST + 1,
802 .sv_errtbl = bsd_to_linux_errno,
803 .sv_transtrap = translate_traps,
804 .sv_fixup = linux_fixup,
805 .sv_sendsig = linux_sendsig,
806 .sv_sigcode = linux_sigcode,
807 .sv_szsigcode = &linux_szsigcode,
808 .sv_prepsyscall = linux_prepsyscall,
809 .sv_name = "Linux a.out",
811 .sv_imgact_try = exec_linux_imgact_try,
812 .sv_minsigstksz = LINUX_MINSIGSTKSZ
815 struct sysentvec elf_linux_sysvec = {
816 .sv_size = LINUX_SYS_MAXSYSCALL,
817 .sv_table = linux_sysent,
818 .sv_mask = 0xffffffff,
819 .sv_sigsize = LINUX_SIGTBLSZ,
820 .sv_sigtbl = bsd_to_linux_signal,
821 .sv_errsize = ELAST + 1,
822 .sv_errtbl = bsd_to_linux_errno,
823 .sv_transtrap = translate_traps,
824 .sv_fixup = elf_linux_fixup,
825 .sv_sendsig = linux_sendsig,
826 .sv_sigcode = linux_sigcode,
827 .sv_szsigcode = &linux_szsigcode,
828 .sv_prepsyscall = linux_prepsyscall,
829 .sv_name = "Linux ELF32",
830 .sv_coredump = elf32_coredump,
831 .sv_imgact_try = exec_linux_imgact_try,
832 .sv_minsigstksz = LINUX_MINSIGSTKSZ
835 static const char GNU_ABI_VENDOR[] = "GNU";
836 static const char SUSE_ABI_VENDOR[] = "SuSE";
837 static int GNULINUX_ABI_DESC = 0;
840 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
842 const Elf32_Word *desc;
845 p = (uintptr_t)(note + 1);
846 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
848 desc = (const Elf32_Word *)p;
849 if (desc[0] != GNULINUX_ABI_DESC)
852 * For Linux we encode osrel as follows:
853 * VVVMMMIII (version, major, minor)
855 *osrel = desc[1] * 1000000 +
862 static Elf_Brandnote linux32_generic_brandnote = {
863 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
866 .vendor = GNU_ABI_VENDOR,
867 .flags = BN_TRANSLATE_OSREL,
868 .trans_osrel = linux_trans_osrel,
871 static Elf_Brandnote linux32_suse_brandnote = {
872 .hdr.n_namesz = sizeof(SUSE_ABI_VENDOR),
875 .vendor = SUSE_ABI_VENDOR,
876 .flags = BN_TRANSLATE_OSREL,
877 .trans_osrel = linux_trans_osrel,
880 static Elf32_Brandinfo linux32_brand = {
881 .brand = ELFOSABI_LINUX,
883 .compat_3_brand = "Linux",
884 .emul_path = "/compat/linux",
885 .interp_path = "/lib/ld-linux.so.1",
886 .sysvec = &elf_linux_sysvec,
887 .interp_newpath = NULL,
888 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
889 .brand_note = &linux32_generic_brandnote,
892 static Elf32_Brandinfo linux32_glibc2_brand = {
893 .brand = ELFOSABI_LINUX,
895 .compat_3_brand = "Linux",
896 .emul_path = "/compat/linux",
897 .interp_path = "/lib/ld-linux.so.2",
898 .sysvec = &elf_linux_sysvec,
899 .interp_newpath = NULL,
900 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
901 .brand_note = &linux32_generic_brandnote,
904 static Elf32_Brandinfo linux32_suse_brand = {
905 .brand = ELFOSABI_LINUX,
907 .compat_3_brand = "Linux",
908 .emul_path = "/compat/linux",
909 .interp_path = "/lib/ld-linux.so.2",
910 .sysvec = &elf_linux_sysvec,
911 .interp_newpath = NULL,
912 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
913 .brand_note = &linux32_suse_brandnote,
916 Elf32_Brandinfo *linux_brandlist[] = {
918 &linux32_glibc2_brand,
924 linux_elf_modevent(module_t mod, int type, void *data)
926 Elf32_Brandinfo **brandinfo;
933 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
935 if (elf32_insert_brand_entry(*brandinfo) < 0)
939 kprintf("Linux ELF exec handler installed\n");
941 kprintf("cannot insert Linux ELF brand handler\n");
944 lockinit(&futex_mtx, "linftxs", 0, LK_CANRECURSE);
945 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_transition,
947 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, emuldata_exit,
951 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
953 if (elf32_brand_inuse(*brandinfo))
956 for (brandinfo = &linux_brandlist[0];
957 *brandinfo != NULL; ++brandinfo)
958 if (elf32_remove_brand_entry(*brandinfo) < 0)
963 kprintf("Linux ELF exec handler removed\n");
965 kprintf("Could not deinstall ELF interpreter entry\n");
967 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
968 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
969 lockuninit(&futex_mtx);
978 static moduledata_t linux_elf_mod = {
984 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);