kernel - Major signal path adjustments to fix races, tsleep race fixes, +more
[dragonfly.git] / sys / emulation / linux / i386 / linux_sysvec.c
CommitLineData
984263bc
MD
1/*-
2 * Copyright (c) 1994-1996 Søren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
4d9022e3 15 * derived from this software without specific prior written permission
984263bc
MD
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: src/sys/i386/linux/linux_sysvec.c,v 1.55.2.9 2002/01/12 11:03:30 bde Exp $
29 */
30
31/* XXX we use functions that might not exist. */
32#include "opt_compat.h"
33
34#ifndef COMPAT_43
35#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36#endif
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/imgact.h>
41#include <sys/imgact_aout.h>
42#include <sys/imgact_elf.h>
1058bc2a 43#include <sys/kern_syscall.h>
984263bc
MD
44#include <sys/lock.h>
45#include <sys/malloc.h>
46#include <sys/proc.h>
47#include <sys/signalvar.h>
48#include <sys/sysent.h>
49#include <sys/sysproto.h>
a1f82243 50#include <sys/eventhandler.h>
984263bc
MD
51
52#include <vm/vm.h>
53#include <vm/vm_param.h>
54#include <vm/vm_page.h>
55#include <vm/vm_extern.h>
56#include <sys/exec.h>
57#include <sys/kernel.h>
58#include <sys/module.h>
59#include <machine/cpu.h>
60
1f2de5d4
MD
61#include "linux.h"
62#include "linux_proto.h"
63#include "../linux_signal.h"
64#include "../linux_util.h"
a1f82243
AH
65#include "../linux_futex.h"
66#include "../linux_emuldata.h"
984263bc 67
09c280ec
SS
68MODULE_VERSION(linux, 1);
69
984263bc
MD
70MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
71
72#if BYTE_ORDER == LITTLE_ENDIAN
73#define SHELLMAGIC 0x2123 /* #! */
74#else
75#define SHELLMAGIC 0x2321
76#endif
77
78/*
79 * Allow the sendsig functions to use the ldebug() facility
80 * even though they are not syscalls themselves. Map them
81 * to syscall 0. This is slightly less bogus than using
82 * ldebug(sigreturn).
83 */
84#define LINUX_SYS_linux_rt_sendsig 0
85#define LINUX_SYS_linux_sendsig 0
86
87extern char linux_sigcode[];
88extern int linux_szsigcode;
89
90extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
91
b1efe3b4
RG
92static int linux_fixup (register_t **stack_base,
93 struct image_params *iparams);
94static int elf_linux_fixup (register_t **stack_base,
95 struct image_params *iparams);
96static void linux_prepsyscall (struct trapframe *tf, int *args,
97 u_int *code, caddr_t *params);
98static void linux_sendsig (sig_t catcher, int sig, sigset_t *mask,
99 u_long code);
f2000797 100static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
984263bc 101
a1f82243
AH
102static eventhandler_tag linux_exec_tag;
103static eventhandler_tag linux_exit_tag;
104
984263bc
MD
105/*
106 * Linux syscalls return negative errno's, we do positive and map them
107 */
108static int bsd_to_linux_errno[ELAST + 1] = {
109 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
110 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
111 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
112 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
113 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
114 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
115 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
116 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
117 -6, -6, -43, -42, -75, -6, -84
118};
119
120int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
121 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
122 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
a1f82243 123 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
984263bc
MD
124 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
125 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
126 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
127 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
128 0, LINUX_SIGUSR1, LINUX_SIGUSR2
129};
130
131int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
132 SIGHUP, SIGINT, SIGQUIT, SIGILL,
133 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
134 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
135 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
136 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
137 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
138 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
a1f82243 139 SIGIO, SIGURG, SIGSYS
984263bc
MD
140};
141
142#define LINUX_T_UNKNOWN 255
143static int _bsd_to_linux_trapcode[] = {
144 LINUX_T_UNKNOWN, /* 0 */
145 6, /* 1 T_PRIVINFLT */
146 LINUX_T_UNKNOWN, /* 2 */
147 3, /* 3 T_BPTFLT */
148 LINUX_T_UNKNOWN, /* 4 */
149 LINUX_T_UNKNOWN, /* 5 */
150 16, /* 6 T_ARITHTRAP */
151 254, /* 7 T_ASTFLT */
152 LINUX_T_UNKNOWN, /* 8 */
153 13, /* 9 T_PROTFLT */
154 1, /* 10 T_TRCTRAP */
155 LINUX_T_UNKNOWN, /* 11 */
156 14, /* 12 T_PAGEFLT */
157 LINUX_T_UNKNOWN, /* 13 */
158 17, /* 14 T_ALIGNFLT */
159 LINUX_T_UNKNOWN, /* 15 */
160 LINUX_T_UNKNOWN, /* 16 */
161 LINUX_T_UNKNOWN, /* 17 */
162 0, /* 18 T_DIVIDE */
163 2, /* 19 T_NMI */
164 4, /* 20 T_OFLOW */
165 5, /* 21 T_BOUND */
166 7, /* 22 T_DNA */
167 8, /* 23 T_DOUBLEFLT */
168 9, /* 24 T_FPOPFLT */
169 10, /* 25 T_TSSFLT */
170 11, /* 26 T_SEGNPFLT */
171 12, /* 27 T_STKFLT */
172 18, /* 28 T_MCHK */
173 19, /* 29 T_XMMFLT */
174 15 /* 30 T_RESERVED */
175};
176#define bsd_to_linux_trapcode(code) \
177 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
178 _bsd_to_linux_trapcode[(code)]: \
179 LINUX_T_UNKNOWN)
180
181/*
182 * If FreeBSD & Linux have a difference of opinion about what a trap
183 * means, deal with it here.
184 */
185static int
186translate_traps(int signal, int trap_code)
187{
188 if (signal != SIGBUS)
189 return signal;
190 switch (trap_code) {
191 case T_PROTFLT:
192 case T_TSSFLT:
193 case T_DOUBLEFLT:
194 case T_PAGEFLT:
195 return SIGSEGV;
196 default:
197 return signal;
198 }
199}
200
201static int
202linux_fixup(register_t **stack_base, struct image_params *imgp)
203{
204 register_t *argv, *envp;
205
206 argv = *stack_base;
2bd9d75c 207 envp = *stack_base + (imgp->args->argc + 1);
984263bc
MD
208 (*stack_base)--;
209 **stack_base = (intptr_t)(void *)envp;
210 (*stack_base)--;
211 **stack_base = (intptr_t)(void *)argv;
212 (*stack_base)--;
2bd9d75c 213 **stack_base = imgp->args->argc;
984263bc
MD
214 return 0;
215}
216
217static int
218elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
219{
220 Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
221 register_t *pos;
222
2bd9d75c 223 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
984263bc 224
984263bc
MD
225 if (args->execfd != -1) {
226 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
227 }
228 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
229 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
230 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
231 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
232 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
233 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
234 AUXARGS_ENTRY(pos, AT_BASE, args->base);
41c20dac
MD
235 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
236 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
237 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
238 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
984263bc
MD
239 AUXARGS_ENTRY(pos, AT_NULL, 0);
240
efda3bd0 241 kfree(imgp->auxargs, M_TEMP);
984263bc
MD
242 imgp->auxargs = NULL;
243
244 (*stack_base)--;
2bd9d75c 245 **stack_base = (long)imgp->args->argc;
984263bc
MD
246 return 0;
247}
248
249extern int _ucodesel, _udatasel;
250extern unsigned long linux_sznonrtsigcode;
251
252static void
253linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
254{
41c20dac 255 struct proc *p = curproc;
08f2f1bb 256 struct lwp *lp = curthread->td_lwp;
41c20dac 257 struct trapframe *regs;
984263bc
MD
258 struct l_rt_sigframe *fp, frame;
259 int oonstack;
260
08f2f1bb
SS
261 regs = lp->lwp_md.md_regs;
262 oonstack = lp->lwp_sigstk.ss_flags & SS_ONSTACK;
984263bc
MD
263
264#ifdef DEBUG
265 if (ldebug(rt_sendsig))
26be20a0 266 kprintf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
984263bc
MD
267 catcher, sig, (void*)mask, code);
268#endif
269 /*
270 * Allocate space for the signal handler context.
271 */
4643740a 272 if ((lp->lwp_flags & LWP_ALTSTACK) && !oonstack &&
984263bc 273 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
08f2f1bb
SS
274 fp = (struct l_rt_sigframe *)(lp->lwp_sigstk.ss_sp +
275 lp->lwp_sigstk.ss_size - sizeof(struct l_rt_sigframe));
276 lp->lwp_sigstk.ss_flags |= SS_ONSTACK;
984263bc
MD
277 } else
278 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
279
280 /*
281 * grow() will return FALSE if the fp will not fit inside the stack
282 * and the stack can not be grown. useracc will return FALSE
283 * if access is denied.
284 */
8d496bf9 285 if ((vm_map_growstack(p, (vm_offset_t)fp) != KERN_SUCCESS) ||
984263bc
MD
286 !useracc((caddr_t)fp, sizeof (struct l_rt_sigframe),
287 VM_PROT_WRITE)) {
288 /*
289 * Process has trashed its stack; give it an illegal
290 * instruction to halt it in its tracks.
291 */
292 SIGACTION(p, SIGILL) = SIG_DFL;
293 SIGDELSET(p->p_sigignore, SIGILL);
294 SIGDELSET(p->p_sigcatch, SIGILL);
08f2f1bb 295 SIGDELSET(lp->lwp_sigmask, SIGILL);
984263bc
MD
296#ifdef DEBUG
297 if (ldebug(rt_sendsig))
26be20a0 298 kprintf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
984263bc
MD
299 fp, oonstack);
300#endif
7278a846 301 lwpsignal(p, lp, SIGILL);
984263bc
MD
302 return;
303 }
304
305 /*
306 * Build the argument list for the signal handler.
307 */
308 if (p->p_sysent->sv_sigtbl)
309 if (sig <= p->p_sysent->sv_sigsize)
310 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
311
312 frame.sf_handler = catcher;
313 frame.sf_sig = sig;
314 frame.sf_siginfo = &fp->sf_si;
315 frame.sf_ucontext = &fp->sf_sc;
316
317 /* Fill siginfo structure. */
318 frame.sf_si.lsi_signo = sig;
319 frame.sf_si.lsi_code = code;
320 frame.sf_si.lsi_addr = (void *)regs->tf_err;
321
322 /*
323 * Build the signal context to be used by sigreturn.
324 */
325 frame.sf_sc.uc_flags = 0; /* XXX ??? */
326 frame.sf_sc.uc_link = NULL; /* XXX ??? */
327
08f2f1bb
SS
328 frame.sf_sc.uc_stack.ss_sp = lp->lwp_sigstk.ss_sp;
329 frame.sf_sc.uc_stack.ss_size = lp->lwp_sigstk.ss_size;
4643740a 330 frame.sf_sc.uc_stack.ss_flags = (lp->lwp_flags & LWP_ALTSTACK)
984263bc
MD
331 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
332
333 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
334
335 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
4e7c41c5 336 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs;
984263bc
MD
337 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
338 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
339 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
340 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
341 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
342 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
343 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
344 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
345 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
346 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
347 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
348 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
349 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
350 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
351 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
352 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
353 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
354
355#ifdef DEBUG
356 if (ldebug(rt_sendsig))
26be20a0 357 kprintf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
48cca561 358 frame.sf_sc.uc_stack.ss_flags, lp->lwp_sigstk.ss_sp,
08f2f1bb 359 lp->lwp_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
984263bc
MD
360#endif
361
362 if (copyout(&frame, fp, sizeof(frame)) != 0) {
363 /*
364 * Process has trashed its stack; give it an illegal
365 * instruction to halt it in its tracks.
366 */
b276424c 367 sigexit(lp, SIGILL);
984263bc
MD
368 /* NOTREACHED */
369 }
370
371 /*
372 * Build context to run handler in.
373 */
374 regs->tf_esp = (int)fp;
375 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
376 linux_sznonrtsigcode;
8688c24a
AE
377
378 /*
379 * i386 abi specifies that the direction flag must be cleared
380 * on function entry
381 */
382 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
383
984263bc
MD
384 regs->tf_cs = _ucodesel;
385 regs->tf_ds = _udatasel;
386 regs->tf_es = _udatasel;
4e7c41c5
MD
387 /* allow %fs and %gs to be inherited by the signal handler */
388 /*
984263bc 389 regs->tf_fs = _udatasel;
4e7c41c5
MD
390 regs->tf_gs = _udatasel;
391 */
984263bc 392 regs->tf_ss = _udatasel;
f2081646 393 clear_quickret();
984263bc
MD
394}
395
396
397/*
398 * Send an interrupt to process.
399 *
400 * Stack is set up to allow sigcode stored
401 * in u. to call routine, followed by kcall
402 * to sigreturn routine below. After sigreturn
403 * resets the signal mask, the stack, and the
404 * frame pointer, it returns to the user
405 * specified pc, psl.
406 */
407
408static void
409linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
410{
41c20dac 411 struct proc *p = curproc;
08f2f1bb 412 struct lwp *lp = curthread->td_lwp;
41c20dac 413 struct trapframe *regs;
984263bc
MD
414 struct l_sigframe *fp, frame;
415 l_sigset_t lmask;
416 int oonstack, i;
417
418 if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
419 /* Signal handler installed with SA_SIGINFO. */
420 linux_rt_sendsig(catcher, sig, mask, code);
421 return;
422 }
423
08f2f1bb
SS
424 regs = lp->lwp_md.md_regs;
425 oonstack = lp->lwp_sigstk.ss_flags & SS_ONSTACK;
984263bc
MD
426
427#ifdef DEBUG
428 if (ldebug(sendsig))
26be20a0 429 kprintf(ARGS(sendsig, "%p, %d, %p, %lu"),
984263bc
MD
430 catcher, sig, (void*)mask, code);
431#endif
432
433 /*
434 * Allocate space for the signal handler context.
435 */
4643740a 436 if ((lp->lwp_flags & LWP_ALTSTACK) && !oonstack &&
984263bc 437 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
08f2f1bb
SS
438 fp = (struct l_sigframe *)(lp->lwp_sigstk.ss_sp +
439 lp->lwp_sigstk.ss_size - sizeof(struct l_sigframe));
440 lp->lwp_sigstk.ss_flags |= SS_ONSTACK;
984263bc
MD
441 } else
442 fp = (struct l_sigframe *)regs->tf_esp - 1;
443
444 /*
445 * grow() will return FALSE if the fp will not fit inside the stack
446 * and the stack can not be grown. useracc will return FALSE
447 * if access is denied.
448 */
8d496bf9 449 if ((vm_map_growstack(p, (vm_offset_t)fp) != KERN_SUCCESS) ||
984263bc
MD
450 !useracc((caddr_t)fp, sizeof (struct l_sigframe),
451 VM_PROT_WRITE)) {
452 /*
453 * Process has trashed its stack; give it an illegal
454 * instruction to halt it in its tracks.
455 */
456 SIGACTION(p, SIGILL) = SIG_DFL;
457 SIGDELSET(p->p_sigignore, SIGILL);
458 SIGDELSET(p->p_sigcatch, SIGILL);
08f2f1bb 459 SIGDELSET(lp->lwp_sigmask, SIGILL);
7278a846 460 lwpsignal(p, lp, SIGILL);
984263bc
MD
461 return;
462 }
463
464 /*
465 * Build the argument list for the signal handler.
466 */
467 if (p->p_sysent->sv_sigtbl)
468 if (sig <= p->p_sysent->sv_sigsize)
469 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
470
471 frame.sf_handler = catcher;
472 frame.sf_sig = sig;
473
474 bsd_to_linux_sigset(mask, &lmask);
475
476 /*
477 * Build the signal context to be used by sigreturn.
478 */
479 frame.sf_sc.sc_mask = lmask.__bits[0];
4e7c41c5 480 frame.sf_sc.sc_gs = regs->tf_gs;
984263bc
MD
481 frame.sf_sc.sc_fs = regs->tf_fs;
482 frame.sf_sc.sc_es = regs->tf_es;
483 frame.sf_sc.sc_ds = regs->tf_ds;
484 frame.sf_sc.sc_edi = regs->tf_edi;
485 frame.sf_sc.sc_esi = regs->tf_esi;
486 frame.sf_sc.sc_ebp = regs->tf_ebp;
487 frame.sf_sc.sc_ebx = regs->tf_ebx;
488 frame.sf_sc.sc_edx = regs->tf_edx;
489 frame.sf_sc.sc_ecx = regs->tf_ecx;
490 frame.sf_sc.sc_eax = regs->tf_eax;
491 frame.sf_sc.sc_eip = regs->tf_eip;
492 frame.sf_sc.sc_cs = regs->tf_cs;
493 frame.sf_sc.sc_eflags = regs->tf_eflags;
494 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
495 frame.sf_sc.sc_ss = regs->tf_ss;
496 frame.sf_sc.sc_err = regs->tf_err;
497 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
498
499 bzero(&frame.sf_fpstate, sizeof(struct l_fpstate));
500
501 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
502 frame.sf_extramask[i] = lmask.__bits[i+1];
503
504 if (copyout(&frame, fp, sizeof(frame)) != 0) {
505 /*
506 * Process has trashed its stack; give it an illegal
507 * instruction to halt it in its tracks.
508 */
b276424c 509 sigexit(lp, SIGILL);
984263bc
MD
510 /* NOTREACHED */
511 }
512
513 /*
514 * Build context to run handler in.
515 */
516 regs->tf_esp = (int)fp;
517 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
8688c24a
AE
518
519 /*
520 * i386 abi specifies that the direction flag must be cleared
521 * on function entry
522 */
523 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
524
984263bc
MD
525 regs->tf_cs = _ucodesel;
526 regs->tf_ds = _udatasel;
527 regs->tf_es = _udatasel;
4e7c41c5
MD
528 /* Allow %fs and %gs to be inherited by the signal handler */
529 /*
984263bc 530 regs->tf_fs = _udatasel;
4e7c41c5
MD
531 regs->tf_gs = _udatasel;
532 */
984263bc 533 regs->tf_ss = _udatasel;
f2081646 534 clear_quickret();
984263bc
MD
535}
536
537/*
538 * System call to cleanup state after a signal
539 * has been taken. Reset signal mask and
540 * stack state from context left by sendsig (above).
541 * Return to previous pc and psl as specified by
542 * context left by sendsig. Check carefully to
543 * make sure that the user has not modified the
544 * psl to gain improper privileges or to cause
545 * a machine fault.
3919ced0
MD
546 *
547 * MPSAFE
984263bc
MD
548 */
549int
753fd850 550sys_linux_sigreturn(struct linux_sigreturn_args *args)
984263bc 551{
08f2f1bb 552 struct lwp *lp = curthread->td_lwp;
984263bc 553 struct l_sigframe frame;
c9faf524 554 struct trapframe *regs;
984263bc
MD
555 l_sigset_t lmask;
556 int eflags, i;
557
08f2f1bb 558 regs = lp->lwp_md.md_regs;
984263bc
MD
559
560#ifdef DEBUG
561 if (ldebug(sigreturn))
26be20a0 562 kprintf(ARGS(sigreturn, "%p"), (void *)args->sfp);
984263bc
MD
563#endif
564 /*
565 * The trampoline code hands us the sigframe.
566 * It is unsafe to keep track of it ourselves, in the event that a
567 * program jumps out of a signal handler.
568 */
569 if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
570 return (EFAULT);
571
572 /*
573 * Check for security violations.
574 */
575#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
576 eflags = frame.sf_sc.sc_eflags;
577 /*
578 * XXX do allow users to change the privileged flag PSL_RF. The
579 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
580 * sometimes set it there too. tf_eflags is kept in the signal
581 * context during signal handling and there is no other place
582 * to remember it, so the PSL_RF bit may be corrupted by the
583 * signal handler without us knowing. Corruption of the PSL_RF
584 * bit at worst causes one more or one less debugger trap, so
585 * allowing it is fairly harmless.
586 */
587 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
588 return(EINVAL);
589 }
590
591 /*
592 * Don't allow users to load a valid privileged %cs. Let the
593 * hardware check for invalid selectors, excess privilege in
594 * other selectors, invalid %eip's and invalid %esp's.
595 */
596#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
597 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
08f2f1bb 598 trapsignal(lp, SIGBUS, T_PROTFLT);
984263bc
MD
599 return(EINVAL);
600 }
601
08f2f1bb 602 lp->lwp_sigstk.ss_flags &= ~SS_ONSTACK;
984263bc
MD
603 lmask.__bits[0] = frame.sf_sc.sc_mask;
604 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
605 lmask.__bits[i+1] = frame.sf_extramask[i];
08f2f1bb
SS
606 linux_to_bsd_sigset(&lmask, &lp->lwp_sigmask);
607 SIG_CANTMASK(lp->lwp_sigmask);
984263bc
MD
608
609 /*
610 * Restore signal context.
611 */
612 /* %gs was restored by the trampoline. */
613 regs->tf_fs = frame.sf_sc.sc_fs;
614 regs->tf_es = frame.sf_sc.sc_es;
615 regs->tf_ds = frame.sf_sc.sc_ds;
616 regs->tf_edi = frame.sf_sc.sc_edi;
617 regs->tf_esi = frame.sf_sc.sc_esi;
618 regs->tf_ebp = frame.sf_sc.sc_ebp;
619 regs->tf_ebx = frame.sf_sc.sc_ebx;
620 regs->tf_edx = frame.sf_sc.sc_edx;
621 regs->tf_ecx = frame.sf_sc.sc_ecx;
622 regs->tf_eax = frame.sf_sc.sc_eax;
623 regs->tf_eip = frame.sf_sc.sc_eip;
624 regs->tf_cs = frame.sf_sc.sc_cs;
625 regs->tf_eflags = eflags;
626 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
627 regs->tf_ss = frame.sf_sc.sc_ss;
f2081646 628 clear_quickret();
984263bc
MD
629
630 return (EJUSTRETURN);
631}
632
633/*
634 * System call to cleanup state after a signal
635 * has been taken. Reset signal mask and
636 * stack state from context left by rt_sendsig (above).
637 * Return to previous pc and psl as specified by
638 * context left by sendsig. Check carefully to
639 * make sure that the user has not modified the
640 * psl to gain improper privileges or to cause
641 * a machine fault.
3919ced0
MD
642 *
643 * MPSAFE
984263bc
MD
644 */
645int
753fd850 646sys_linux_rt_sigreturn(struct linux_rt_sigreturn_args *args)
984263bc 647{
08f2f1bb 648 struct lwp *lp = curthread->td_lwp;
984263bc
MD
649 struct l_ucontext uc;
650 struct l_sigcontext *context;
651 l_stack_t *lss;
1058bc2a 652 stack_t ss;
c9faf524 653 struct trapframe *regs;
984263bc 654 int eflags;
984263bc 655
08f2f1bb 656 regs = lp->lwp_md.md_regs;
984263bc
MD
657
658#ifdef DEBUG
659 if (ldebug(rt_sigreturn))
26be20a0 660 kprintf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
984263bc
MD
661#endif
662 /*
663 * The trampoline code hands us the ucontext.
664 * It is unsafe to keep track of it ourselves, in the event that a
665 * program jumps out of a signal handler.
666 */
667 if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
668 return (EFAULT);
669
670 context = &uc.uc_mcontext;
671
672 /*
673 * Check for security violations.
674 */
675#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
676 eflags = context->sc_eflags;
677 /*
678 * XXX do allow users to change the privileged flag PSL_RF. The
679 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
680 * sometimes set it there too. tf_eflags is kept in the signal
681 * context during signal handling and there is no other place
682 * to remember it, so the PSL_RF bit may be corrupted by the
683 * signal handler without us knowing. Corruption of the PSL_RF
684 * bit at worst causes one more or one less debugger trap, so
685 * allowing it is fairly harmless.
686 */
687 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
688 return(EINVAL);
689 }
690
691 /*
692 * Don't allow users to load a valid privileged %cs. Let the
693 * hardware check for invalid selectors, excess privilege in
694 * other selectors, invalid %eip's and invalid %esp's.
695 */
696#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
697 if (!CS_SECURE(context->sc_cs)) {
08f2f1bb 698 trapsignal(lp, SIGBUS, T_PROTFLT);
984263bc
MD
699 return(EINVAL);
700 }
701
08f2f1bb
SS
702 lp->lwp_sigstk.ss_flags &= ~SS_ONSTACK;
703 linux_to_bsd_sigset(&uc.uc_sigmask, &lp->lwp_sigmask);
704 SIG_CANTMASK(lp->lwp_sigmask);
984263bc
MD
705
706 /*
707 * Restore signal context
708 */
709 /* %gs was restored by the trampoline. */
710 regs->tf_fs = context->sc_fs;
711 regs->tf_es = context->sc_es;
712 regs->tf_ds = context->sc_ds;
713 regs->tf_edi = context->sc_edi;
714 regs->tf_esi = context->sc_esi;
715 regs->tf_ebp = context->sc_ebp;
716 regs->tf_ebx = context->sc_ebx;
717 regs->tf_edx = context->sc_edx;
718 regs->tf_ecx = context->sc_ecx;
719 regs->tf_eax = context->sc_eax;
720 regs->tf_eip = context->sc_eip;
721 regs->tf_cs = context->sc_cs;
722 regs->tf_eflags = eflags;
723 regs->tf_esp = context->sc_esp_at_signal;
724 regs->tf_ss = context->sc_ss;
725
726 /*
727 * call sigaltstack & ignore results..
728 */
984263bc 729 lss = &uc.uc_stack;
1058bc2a
DRJ
730 ss.ss_sp = lss->ss_sp;
731 ss.ss_size = lss->ss_size;
732 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
984263bc
MD
733
734#ifdef DEBUG
735 if (ldebug(rt_sigreturn))
26be20a0 736 kprintf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
1058bc2a 737 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
984263bc 738#endif
1058bc2a 739 kern_sigaltstack(&ss, NULL);
f2081646 740 clear_quickret();
984263bc
MD
741
742 return (EJUSTRETURN);
743}
744
270ac911
MD
745/*
746 * Prep arguments.
747 *
748 * MUST BE MPSAFE
749 */
984263bc
MD
750static void
751linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
752{
753 args[0] = tf->tf_ebx;
754 args[1] = tf->tf_ecx;
755 args[2] = tf->tf_edx;
756 args[3] = tf->tf_esi;
757 args[4] = tf->tf_edi;
22d86f66 758 args[5] = tf->tf_ebp;
984263bc
MD
759 *params = NULL; /* no copyin */
760}
761
762/*
763 * If a linux binary is exec'ing something, try this image activator
764 * first. We override standard shell script execution in order to
765 * be able to modify the interpreter path. We only do this if a linux
766 * binary is doing the exec, so we do not create an EXEC module for it.
767 */
b1efe3b4 768static int exec_linux_imgact_try (struct image_params *iparams);
984263bc
MD
769
770static int
2da2a8af 771exec_linux_imgact_try(struct image_params *imgp)
984263bc
MD
772{
773 const char *head = (const char *)imgp->image_header;
774 int error = -1;
775
776 /*
777 * The interpreter for shell scripts run from a linux binary needs
778 * to be located in /compat/linux if possible in order to recursively
779 * maintain linux path emulation.
780 */
781 if (((const short *)head)[0] == SHELLMAGIC) {
782 /*
783 * Run our normal shell image activator. If it succeeds attempt
784 * to use the alternate path for the interpreter. If an alternate
785 * path is found, use our stringspace to store it.
786 */
787 if ((error = exec_shell_imgact(imgp)) == 0) {
136178b3
DRJ
788 linux_translate_path(imgp->interpreter_name,
789 MAXSHELLCMDLEN);
984263bc
MD
790 }
791 }
792 return(error);
793}
794
d571622e 795struct sysentvec linux_sysvec = {
315b8b8b
JM
796 .sv_size = LINUX_SYS_MAXSYSCALL,
797 .sv_table = linux_sysent,
798 .sv_mask = 0xffffffff,
799 .sv_sigsize = LINUX_SIGTBLSZ,
800 .sv_sigtbl = bsd_to_linux_signal,
801 .sv_errsize = ELAST + 1,
802 .sv_errtbl = bsd_to_linux_errno,
803 .sv_transtrap = translate_traps,
804 .sv_fixup = linux_fixup,
805 .sv_sendsig = linux_sendsig,
806 .sv_sigcode = linux_sigcode,
807 .sv_szsigcode = &linux_szsigcode,
808 .sv_prepsyscall = linux_prepsyscall,
809 .sv_name = "Linux a.out",
810 .sv_coredump = NULL,
811 .sv_imgact_try = exec_linux_imgact_try,
812 .sv_minsigstksz = LINUX_MINSIGSTKSZ
984263bc
MD
813};
814
d571622e 815struct sysentvec elf_linux_sysvec = {
315b8b8b
JM
816 .sv_size = LINUX_SYS_MAXSYSCALL,
817 .sv_table = linux_sysent,
818 .sv_mask = 0xffffffff,
819 .sv_sigsize = LINUX_SIGTBLSZ,
820 .sv_sigtbl = bsd_to_linux_signal,
821 .sv_errsize = ELAST + 1,
822 .sv_errtbl = bsd_to_linux_errno,
823 .sv_transtrap = translate_traps,
824 .sv_fixup = elf_linux_fixup,
825 .sv_sendsig = linux_sendsig,
826 .sv_sigcode = linux_sigcode,
827 .sv_szsigcode = &linux_szsigcode,
828 .sv_prepsyscall = linux_prepsyscall,
829 .sv_name = "Linux ELF32",
830 .sv_coredump = elf32_coredump,
831 .sv_imgact_try = exec_linux_imgact_try,
832 .sv_minsigstksz = LINUX_MINSIGSTKSZ
984263bc
MD
833};
834
315b8b8b
JM
835static const char GNU_ABI_VENDOR[] = "GNU";
836static const char SUSE_ABI_VENDOR[] = "SuSE";
f2000797
JM
837static int GNULINUX_ABI_DESC = 0;
838
839static boolean_t
840linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
841{
842 const Elf32_Word *desc;
843 uintptr_t p;
844
845 p = (uintptr_t)(note + 1);
846 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
847
848 desc = (const Elf32_Word *)p;
849 if (desc[0] != GNULINUX_ABI_DESC)
850 return (FALSE);
851 /*
852 * For Linux we encode osrel as follows:
853 * VVVMMMIII (version, major, minor)
854 */
855 *osrel = desc[1] * 1000000 +
856 desc[2] * 1000 +
857 desc[3];
858
859 return (TRUE);
860}
984263bc 861
315b8b8b 862static Elf_Brandnote linux32_generic_brandnote = {
f2000797
JM
863 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
864 .hdr.n_descsz = 16,
865 .hdr.n_type = 1,
866 .vendor = GNU_ABI_VENDOR,
867 .flags = BN_TRANSLATE_OSREL,
868 .trans_osrel = linux_trans_osrel,
315b8b8b 869};
8d0415e1 870
315b8b8b 871static Elf_Brandnote linux32_suse_brandnote = {
f2000797
JM
872 .hdr.n_namesz = sizeof(SUSE_ABI_VENDOR),
873 .hdr.n_descsz = 16,
874 .hdr.n_type = 1,
875 .vendor = SUSE_ABI_VENDOR,
876 .flags = BN_TRANSLATE_OSREL,
877 .trans_osrel = linux_trans_osrel,
315b8b8b 878};
8900652c 879
315b8b8b
JM
880static Elf32_Brandinfo linux32_brand = {
881 .brand = ELFOSABI_LINUX,
882 .machine = EM_386,
883 .compat_3_brand = "Linux",
884 .emul_path = "/compat/linux",
885 .interp_path = "/lib/ld-linux.so.1",
d571622e 886 .sysvec = &elf_linux_sysvec,
315b8b8b 887 .interp_newpath = NULL,
14dc24a3 888 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
315b8b8b
JM
889 .brand_note = &linux32_generic_brandnote,
890};
8900652c 891
315b8b8b
JM
892static Elf32_Brandinfo linux32_glibc2_brand = {
893 .brand = ELFOSABI_LINUX,
894 .machine = EM_386,
895 .compat_3_brand = "Linux",
896 .emul_path = "/compat/linux",
897 .interp_path = "/lib/ld-linux.so.2",
d571622e 898 .sysvec = &elf_linux_sysvec,
315b8b8b 899 .interp_newpath = NULL,
14dc24a3 900 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
315b8b8b
JM
901 .brand_note = &linux32_generic_brandnote,
902};
903
904static Elf32_Brandinfo linux32_suse_brand = {
905 .brand = ELFOSABI_LINUX,
906 .machine = EM_386,
907 .compat_3_brand = "Linux",
908 .emul_path = "/compat/linux",
909 .interp_path = "/lib/ld-linux.so.2",
d571622e 910 .sysvec = &elf_linux_sysvec,
315b8b8b 911 .interp_newpath = NULL,
14dc24a3 912 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
315b8b8b
JM
913 .brand_note = &linux32_suse_brandnote,
914};
915
916Elf32_Brandinfo *linux_brandlist[] = {
917 &linux32_brand,
918 &linux32_glibc2_brand,
919 &linux32_suse_brand,
14dc24a3 920 NULL
315b8b8b 921};
8900652c
JS
922
923static int
984263bc
MD
924linux_elf_modevent(module_t mod, int type, void *data)
925{
926 Elf32_Brandinfo **brandinfo;
927 int error;
928
929 error = 0;
930
931 switch(type) {
932 case MOD_LOAD:
933 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
934 ++brandinfo)
315b8b8b 935 if (elf32_insert_brand_entry(*brandinfo) < 0)
984263bc
MD
936 error = EINVAL;
937 if (error == 0) {
984263bc 938 if (bootverbose)
26be20a0 939 kprintf("Linux ELF exec handler installed\n");
a1f82243 940 } else {
26be20a0 941 kprintf("cannot insert Linux ELF brand handler\n");
a1f82243
AH
942 }
943 EMUL_LOCKINIT();
944 lockinit(&futex_mtx, "linftxs", 0, LK_CANRECURSE);
945 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_transition,
946 NULL, 1000);
947 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, emuldata_exit,
948 NULL, 1000);
984263bc
MD
949 break;
950 case MOD_UNLOAD:
951 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
952 ++brandinfo)
315b8b8b 953 if (elf32_brand_inuse(*brandinfo))
984263bc
MD
954 error = EBUSY;
955 if (error == 0) {
956 for (brandinfo = &linux_brandlist[0];
957 *brandinfo != NULL; ++brandinfo)
315b8b8b 958 if (elf32_remove_brand_entry(*brandinfo) < 0)
984263bc
MD
959 error = EINVAL;
960 }
961 if (error == 0) {
984263bc 962 if (bootverbose)
26be20a0 963 kprintf("Linux ELF exec handler removed\n");
a1f82243 964 } else {
26be20a0 965 kprintf("Could not deinstall ELF interpreter entry\n");
a1f82243
AH
966 }
967 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
968 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
969 lockuninit(&futex_mtx);
970 EMUL_LOCKUNINIT();
984263bc
MD
971 break;
972 default:
973 break;
974 }
975 return error;
976}
977
978static moduledata_t linux_elf_mod = {
979 "linuxelf",
980 linux_elf_modevent,
981 0
982};
983
984DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);