proc->thread stage 4: rework the VFS and DEVICE subsystems to take thread
[dragonfly.git] / sys / kern / sys_process.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1994, Sean Eric Fagan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by Sean Eric Fagan.
16 * 4. The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * $FreeBSD: src/sys/kern/sys_process.c,v 1.51.2.6 2003/01/08 03:06:45 kan Exp $
dadab5e9 32 * $DragonFly: src/sys/kern/sys_process.c,v 1.4 2003/06/25 03:55:57 dillon Exp $
984263bc
MD
33 */
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/sysproto.h>
38#include <sys/proc.h>
39#include <sys/vnode.h>
40#include <sys/ptrace.h>
41
42#include <machine/reg.h>
43#include <vm/vm.h>
44#include <sys/lock.h>
45#include <vm/pmap.h>
46#include <vm/vm_map.h>
47#include <vm/vm_page.h>
48
49#include <sys/user.h>
50#include <miscfs/procfs/procfs.h>
51
52/* use the equivalent procfs code */
53#if 0
54static int
55pread (struct proc *procp, unsigned int addr, unsigned int *retval) {
56 int rv;
57 vm_map_t map, tmap;
58 vm_object_t object;
59 vm_offset_t kva = 0;
60 int page_offset; /* offset into page */
61 vm_offset_t pageno; /* page number */
62 vm_map_entry_t out_entry;
63 vm_prot_t out_prot;
64 boolean_t wired;
65 vm_pindex_t pindex;
66
67 /* Map page into kernel space */
68
69 map = &procp->p_vmspace->vm_map;
70
71 page_offset = addr - trunc_page(addr);
72 pageno = trunc_page(addr);
73
74 tmap = map;
75 rv = vm_map_lookup (&tmap, pageno, VM_PROT_READ, &out_entry,
76 &object, &pindex, &out_prot, &wired);
77
78 if (rv != KERN_SUCCESS)
79 return EINVAL;
80
81 vm_map_lookup_done (tmap, out_entry);
82
83 /* Find space in kernel_map for the page we're interested in */
84 rv = vm_map_find (kernel_map, object, IDX_TO_OFF(pindex),
85 &kva, PAGE_SIZE, 0, VM_PROT_ALL, VM_PROT_ALL, 0);
86
87 if (!rv) {
88 vm_object_reference (object);
89
90 rv = vm_map_pageable (kernel_map, kva, kva + PAGE_SIZE, 0);
91 if (!rv) {
92 *retval = 0;
93 bcopy ((caddr_t)kva + page_offset,
94 retval, sizeof *retval);
95 }
96 vm_map_remove (kernel_map, kva, kva + PAGE_SIZE);
97 }
98
99 return rv;
100}
101
102static int
103pwrite (struct proc *procp, unsigned int addr, unsigned int datum) {
104 int rv;
105 vm_map_t map, tmap;
106 vm_object_t object;
107 vm_offset_t kva = 0;
108 int page_offset; /* offset into page */
109 vm_offset_t pageno; /* page number */
110 vm_map_entry_t out_entry;
111 vm_prot_t out_prot;
112 boolean_t wired;
113 vm_pindex_t pindex;
114 boolean_t fix_prot = 0;
115
116 /* Map page into kernel space */
117
118 map = &procp->p_vmspace->vm_map;
119
120 page_offset = addr - trunc_page(addr);
121 pageno = trunc_page(addr);
122
123 /*
124 * Check the permissions for the area we're interested in.
125 */
126
127 if (vm_map_check_protection (map, pageno, pageno + PAGE_SIZE,
128 VM_PROT_WRITE) == FALSE) {
129 /*
130 * If the page was not writable, we make it so.
131 * XXX It is possible a page may *not* be read/executable,
132 * if a process changes that!
133 */
134 fix_prot = 1;
135 /* The page isn't writable, so let's try making it so... */
136 if ((rv = vm_map_protect (map, pageno, pageno + PAGE_SIZE,
137 VM_PROT_ALL, 0)) != KERN_SUCCESS)
138 return EFAULT; /* I guess... */
139 }
140
141 /*
142 * Now we need to get the page. out_entry, out_prot, wired, and
143 * single_use aren't used. One would think the vm code would be
144 * a *bit* nicer... We use tmap because vm_map_lookup() can
145 * change the map argument.
146 */
147
148 tmap = map;
149 rv = vm_map_lookup (&tmap, pageno, VM_PROT_WRITE, &out_entry,
150 &object, &pindex, &out_prot, &wired);
151 if (rv != KERN_SUCCESS) {
152 return EINVAL;
153 }
154
155 /*
156 * Okay, we've got the page. Let's release tmap.
157 */
158
159 vm_map_lookup_done (tmap, out_entry);
160
161 /*
162 * Fault the page in...
163 */
164
165 rv = vm_fault(map, pageno, VM_PROT_WRITE|VM_PROT_READ, FALSE);
166 if (rv != KERN_SUCCESS)
167 return EFAULT;
168
169 /* Find space in kernel_map for the page we're interested in */
170 rv = vm_map_find (kernel_map, object, IDX_TO_OFF(pindex),
171 &kva, PAGE_SIZE, 0,
172 VM_PROT_ALL, VM_PROT_ALL, 0);
173 if (!rv) {
174 vm_object_reference (object);
175
176 rv = vm_map_pageable (kernel_map, kva, kva + PAGE_SIZE, 0);
177 if (!rv) {
178 bcopy (&datum, (caddr_t)kva + page_offset, sizeof datum);
179 }
180 vm_map_remove (kernel_map, kva, kva + PAGE_SIZE);
181 }
182
183 if (fix_prot)
184 vm_map_protect (map, pageno, pageno + PAGE_SIZE,
185 VM_PROT_READ|VM_PROT_EXECUTE, 0);
186 return rv;
187}
188#endif
189
190/*
191 * Process debugging system call.
192 */
193#ifndef _SYS_SYSPROTO_H_
194struct ptrace_args {
195 int req;
196 pid_t pid;
197 caddr_t addr;
198 int data;
199};
200#endif
201
202int
41c20dac 203ptrace(struct ptrace_args *uap)
984263bc 204{
41c20dac
MD
205 struct proc *p = curproc;
206
984263bc
MD
207 /*
208 * XXX this obfuscation is to reduce stack usage, but the register
209 * structs may be too large to put on the stack anyway.
210 */
211 union {
212 struct ptrace_io_desc piod;
213 struct dbreg dbreg;
214 struct fpreg fpreg;
215 struct reg reg;
216 } r;
217 void *addr;
218 int error = 0;
219
220 addr = &r;
221 switch (uap->req) {
222 case PT_GETREGS:
223 case PT_GETFPREGS:
224#ifdef PT_GETDBREGS
225 case PT_GETDBREGS:
226#endif
227 break;
228 case PT_SETREGS:
229 error = copyin(uap->addr, &r.reg, sizeof r.reg);
230 break;
231 case PT_SETFPREGS:
232 error = copyin(uap->addr, &r.fpreg, sizeof r.fpreg);
233 break;
234#ifdef PT_SETDBREGS
235 case PT_SETDBREGS:
236 error = copyin(uap->addr, &r.dbreg, sizeof r.dbreg);
237 break;
238#endif
239 case PT_IO:
240 error = copyin(uap->addr, &r.piod, sizeof r.piod);
241 break;
242 default:
243 addr = uap->addr;
244 }
245 if (error)
246 return (error);
247
248 error = kern_ptrace(p, uap->req, uap->pid, addr, uap->data);
249 if (error)
250 return (error);
251
252 switch (uap->req) {
253 case PT_IO:
254 (void)copyout(&r.piod, uap->addr, sizeof r.piod);
255 break;
256 case PT_GETREGS:
257 error = copyout(&r.reg, uap->addr, sizeof r.reg);
258 break;
259 case PT_GETFPREGS:
260 error = copyout(&r.fpreg, uap->addr, sizeof r.fpreg);
261 break;
262#ifdef PT_GETDBREGS
263 case PT_GETDBREGS:
264 error = copyout(&r.dbreg, uap->addr, sizeof r.dbreg);
265 break;
266#endif
267 }
268
269 return (error);
270}
271
272int
273kern_ptrace(struct proc *curp, int req, pid_t pid, void *addr, int data)
274{
275 struct proc *p, *pp;
276 struct iovec iov;
277 struct uio uio;
278 struct ptrace_io_desc *piod;
279 int error = 0;
280 int write, tmp, s;
281
282 write = 0;
283 if (req == PT_TRACE_ME)
284 p = curp;
285 else {
286 if ((p = pfind(pid)) == NULL)
287 return ESRCH;
288 }
41c20dac 289 if (!PRISON_CHECK(curp->p_ucred, p->p_ucred))
984263bc
MD
290 return (ESRCH);
291
292 /* Can't trace a process that's currently exec'ing. */
293 if ((p->p_flag & P_INEXEC) != 0)
294 return EAGAIN;
295
296 /*
297 * Permissions check
298 */
299 switch (req) {
300 case PT_TRACE_ME:
301 /* Always legal. */
302 break;
303
304 case PT_ATTACH:
305 /* Self */
306 if (p->p_pid == curp->p_pid)
307 return EINVAL;
308
309 /* Already traced */
310 if (p->p_flag & P_TRACED)
311 return EBUSY;
312
313 if (curp->p_flag & P_TRACED)
314 for (pp = curp->p_pptr; pp != NULL; pp = pp->p_pptr)
315 if (pp == p)
316 return (EINVAL);
317
318 /* not owned by you, has done setuid (unless you're root) */
41c20dac 319 if ((p->p_ucred->cr_ruid != curp->p_ucred->cr_ruid) ||
984263bc 320 (p->p_flag & P_SUGID)) {
dadab5e9 321 if ((error = suser(curp->p_thread)) != 0)
984263bc
MD
322 return error;
323 }
324
325 /* can't trace init when securelevel > 0 */
326 if (securelevel > 0 && p->p_pid == 1)
327 return EPERM;
328
329 /* OK */
330 break;
331
332 case PT_READ_I:
333 case PT_READ_D:
334 case PT_WRITE_I:
335 case PT_WRITE_D:
336 case PT_IO:
337 case PT_CONTINUE:
338 case PT_KILL:
339 case PT_STEP:
340 case PT_DETACH:
341#ifdef PT_GETREGS
342 case PT_GETREGS:
343#endif
344#ifdef PT_SETREGS
345 case PT_SETREGS:
346#endif
347#ifdef PT_GETFPREGS
348 case PT_GETFPREGS:
349#endif
350#ifdef PT_SETFPREGS
351 case PT_SETFPREGS:
352#endif
353#ifdef PT_GETDBREGS
354 case PT_GETDBREGS:
355#endif
356#ifdef PT_SETDBREGS
357 case PT_SETDBREGS:
358#endif
359 /* not being traced... */
360 if ((p->p_flag & P_TRACED) == 0)
361 return EPERM;
362
363 /* not being traced by YOU */
364 if (p->p_pptr != curp)
365 return EBUSY;
366
367 /* not currently stopped */
368 if (p->p_stat != SSTOP || (p->p_flag & P_WAITED) == 0)
369 return EBUSY;
370
371 /* OK */
372 break;
373
374 default:
375 return EINVAL;
376 }
377
378#ifdef FIX_SSTEP
379 /*
380 * Single step fixup ala procfs
381 */
382 FIX_SSTEP(p);
383#endif
384
385 /*
386 * Actually do the requests
387 */
388
389 curp->p_retval[0] = 0;
390
391 switch (req) {
392 case PT_TRACE_ME:
393 /* set my trace flag and "owner" so it can read/write me */
394 p->p_flag |= P_TRACED;
395 p->p_oppid = p->p_pptr->p_pid;
396 return 0;
397
398 case PT_ATTACH:
399 /* security check done above */
400 p->p_flag |= P_TRACED;
401 p->p_oppid = p->p_pptr->p_pid;
402 if (p->p_pptr != curp)
403 proc_reparent(p, curp);
404 data = SIGSTOP;
405 goto sendsig; /* in PT_CONTINUE below */
406
407 case PT_STEP:
408 case PT_CONTINUE:
409 case PT_DETACH:
410 if ((req != PT_STEP) && ((unsigned)data > _SIG_MAXSIG))
411 return EINVAL;
412
413 PHOLD(p);
414
415 if (req == PT_STEP) {
416 if ((error = ptrace_single_step (p))) {
417 PRELE(p);
418 return error;
419 }
420 }
421
422 if (addr != (void *)1) {
423 if ((error = ptrace_set_pc (p,
424 (u_long)(uintfptr_t)addr))) {
425 PRELE(p);
426 return error;
427 }
428 }
429 PRELE(p);
430
431 if (req == PT_DETACH) {
432 /* reset process parent */
433 if (p->p_oppid != p->p_pptr->p_pid) {
434 struct proc *pp;
435
436 pp = pfind(p->p_oppid);
437 proc_reparent(p, pp ? pp : initproc);
438 }
439
440 p->p_flag &= ~(P_TRACED | P_WAITED);
441 p->p_oppid = 0;
442
443 /* should we send SIGCHLD? */
444 }
445
446 sendsig:
447 /* deliver or queue signal */
448 s = splhigh();
449 if (p->p_stat == SSTOP) {
450 p->p_xstat = data;
451 setrunnable(p);
452 } else if (data) {
453 psignal(p, data);
454 }
455 splx(s);
456 return 0;
457
458 case PT_WRITE_I:
459 case PT_WRITE_D:
460 write = 1;
461 /* fallthrough */
462 case PT_READ_I:
463 case PT_READ_D:
464 tmp = 0;
465 /* write = 0 set above */
466 iov.iov_base = write ? (caddr_t)&data : (caddr_t)&tmp;
467 iov.iov_len = sizeof(int);
468 uio.uio_iov = &iov;
469 uio.uio_iovcnt = 1;
470 uio.uio_offset = (off_t)(uintptr_t)addr;
471 uio.uio_resid = sizeof(int);
472 uio.uio_segflg = UIO_SYSSPACE;
473 uio.uio_rw = write ? UIO_WRITE : UIO_READ;
dadab5e9 474 uio.uio_td = p->p_thread;
984263bc
MD
475 error = procfs_domem(curp, p, NULL, &uio);
476 if (uio.uio_resid != 0) {
477 /*
478 * XXX procfs_domem() doesn't currently return ENOSPC,
479 * so I think write() can bogusly return 0.
480 * XXX what happens for short writes? We don't want
481 * to write partial data.
482 * XXX procfs_domem() returns EPERM for other invalid
483 * addresses. Convert this to EINVAL. Does this
484 * clobber returns of EPERM for other reasons?
485 */
486 if (error == 0 || error == ENOSPC || error == EPERM)
487 error = EINVAL; /* EOF */
488 }
489 if (!write)
490 curp->p_retval[0] = tmp;
491 return (error);
492
493 case PT_IO:
494 piod = addr;
495 iov.iov_base = piod->piod_addr;
496 iov.iov_len = piod->piod_len;
497 uio.uio_iov = &iov;
498 uio.uio_iovcnt = 1;
499 uio.uio_offset = (off_t)(uintptr_t)piod->piod_offs;
500 uio.uio_resid = piod->piod_len;
501 uio.uio_segflg = UIO_USERSPACE;
dadab5e9 502 uio.uio_td = p->p_thread;
984263bc
MD
503 switch (piod->piod_op) {
504 case PIOD_READ_D:
505 case PIOD_READ_I:
506 uio.uio_rw = UIO_READ;
507 break;
508 case PIOD_WRITE_D:
509 case PIOD_WRITE_I:
510 uio.uio_rw = UIO_WRITE;
511 break;
512 default:
513 return (EINVAL);
514 }
515 error = procfs_domem(curp, p, NULL, &uio);
516 piod->piod_len -= uio.uio_resid;
517 return (error);
518
519 case PT_KILL:
520 data = SIGKILL;
521 goto sendsig; /* in PT_CONTINUE above */
522
523#ifdef PT_SETREGS
524 case PT_SETREGS:
525 write = 1;
526 /* fallthrough */
527#endif /* PT_SETREGS */
528#ifdef PT_GETREGS
529 case PT_GETREGS:
530 /* write = 0 above */
531#endif /* PT_SETREGS */
532#if defined(PT_SETREGS) || defined(PT_GETREGS)
533 if (!procfs_validregs(p)) /* no P_SYSTEM procs please */
534 return EINVAL;
535 else {
536 iov.iov_base = addr;
537 iov.iov_len = sizeof(struct reg);
538 uio.uio_iov = &iov;
539 uio.uio_iovcnt = 1;
540 uio.uio_offset = 0;
541 uio.uio_resid = sizeof(struct reg);
542 uio.uio_segflg = UIO_SYSSPACE;
543 uio.uio_rw = write ? UIO_WRITE : UIO_READ;
dadab5e9 544 uio.uio_td = curp->p_thread;
984263bc
MD
545 return (procfs_doregs(curp, p, NULL, &uio));
546 }
547#endif /* defined(PT_SETREGS) || defined(PT_GETREGS) */
548
549#ifdef PT_SETFPREGS
550 case PT_SETFPREGS:
551 write = 1;
552 /* fallthrough */
553#endif /* PT_SETFPREGS */
554#ifdef PT_GETFPREGS
555 case PT_GETFPREGS:
556 /* write = 0 above */
557#endif /* PT_SETFPREGS */
558#if defined(PT_SETFPREGS) || defined(PT_GETFPREGS)
559 if (!procfs_validfpregs(p)) /* no P_SYSTEM procs please */
560 return EINVAL;
561 else {
562 iov.iov_base = addr;
563 iov.iov_len = sizeof(struct fpreg);
564 uio.uio_iov = &iov;
565 uio.uio_iovcnt = 1;
566 uio.uio_offset = 0;
567 uio.uio_resid = sizeof(struct fpreg);
568 uio.uio_segflg = UIO_SYSSPACE;
569 uio.uio_rw = write ? UIO_WRITE : UIO_READ;
dadab5e9 570 uio.uio_td = curp->p_thread;
984263bc
MD
571 return (procfs_dofpregs(curp, p, NULL, &uio));
572 }
573#endif /* defined(PT_SETFPREGS) || defined(PT_GETFPREGS) */
574
575#ifdef PT_SETDBREGS
576 case PT_SETDBREGS:
577 write = 1;
578 /* fallthrough */
579#endif /* PT_SETDBREGS */
580#ifdef PT_GETDBREGS
581 case PT_GETDBREGS:
582 /* write = 0 above */
583#endif /* PT_SETDBREGS */
584#if defined(PT_SETDBREGS) || defined(PT_GETDBREGS)
585 if (!procfs_validdbregs(p)) /* no P_SYSTEM procs please */
586 return EINVAL;
587 else {
588 iov.iov_base = addr;
589 iov.iov_len = sizeof(struct dbreg);
590 uio.uio_iov = &iov;
591 uio.uio_iovcnt = 1;
592 uio.uio_offset = 0;
593 uio.uio_resid = sizeof(struct dbreg);
594 uio.uio_segflg = UIO_SYSSPACE;
595 uio.uio_rw = write ? UIO_WRITE : UIO_READ;
dadab5e9 596 uio.uio_td = curp->p_thread;
984263bc
MD
597 return (procfs_dodbregs(curp, p, NULL, &uio));
598 }
599#endif /* defined(PT_SETDBREGS) || defined(PT_GETDBREGS) */
600
601 default:
602 break;
603 }
604
605 return 0;
606}
607
608int
609trace_req(p)
610 struct proc *p;
611{
612 return 1;
613}
614
615/*
616 * stopevent()
617 * Stop a process because of a procfs event;
618 * stay stopped until p->p_step is cleared
619 * (cleared by PIOCCONT in procfs).
620 */
621
622void
623stopevent(struct proc *p, unsigned int event, unsigned int val) {
624 p->p_step = 1;
625
626 do {
627 p->p_xstat = val;
628 p->p_stype = event; /* Which event caused the stop? */
629 wakeup(&p->p_stype); /* Wake up any PIOCWAIT'ing procs */
630 tsleep(&p->p_step, PWAIT, "stopevent", 0);
631 } while (p->p_step);
632}