Cleanup the manual page:
[dragonfly.git] / sys / vfs / procfs / procfs_vnops.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1993, 1995 Jan-Simon Pendry
3 * Copyright (c) 1993, 1995
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Jan-Simon Pendry.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
38 *
39 * $FreeBSD: src/sys/miscfs/procfs/procfs_vnops.c,v 1.76.2.7 2002/01/22 17:22:59 nectar Exp $
ac424f9b 40 * $DragonFly: src/sys/vfs/procfs/procfs_vnops.c,v 1.13 2004/05/02 03:05:11 cpressey Exp $
984263bc
MD
41 */
42
43/*
44 * procfs vnode interface
45 */
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/time.h>
50#include <sys/kernel.h>
51#include <sys/lock.h>
52#include <sys/fcntl.h>
53#include <sys/proc.h>
54#include <sys/signalvar.h>
55#include <sys/vnode.h>
56#include <sys/uio.h>
57#include <sys/mount.h>
58#include <sys/namei.h>
59#include <sys/dirent.h>
60#include <sys/malloc.h>
61#include <machine/reg.h>
62#include <vm/vm_zone.h>
1f2de5d4 63#include <vfs/procfs/procfs.h>
984263bc
MD
64#include <sys/pioctl.h>
65
a6ee311a
RG
66static int procfs_access (struct vop_access_args *);
67static int procfs_badop (void);
68static int procfs_bmap (struct vop_bmap_args *);
69static int procfs_close (struct vop_close_args *);
70static int procfs_getattr (struct vop_getattr_args *);
71static int procfs_inactive (struct vop_inactive_args *);
72static int procfs_ioctl (struct vop_ioctl_args *);
73static int procfs_lookup (struct vop_lookup_args *);
74static int procfs_open (struct vop_open_args *);
75static int procfs_print (struct vop_print_args *);
76static int procfs_readdir (struct vop_readdir_args *);
77static int procfs_readlink (struct vop_readlink_args *);
78static int procfs_reclaim (struct vop_reclaim_args *);
79static int procfs_setattr (struct vop_setattr_args *);
984263bc
MD
80
81/*
82 * This is a list of the valid names in the
83 * process-specific sub-directories. It is
84 * used in procfs_lookup and procfs_readdir
85 */
86static struct proc_target {
87 u_char pt_type;
88 u_char pt_namlen;
89 char *pt_name;
90 pfstype pt_pfstype;
a6ee311a 91 int (*pt_valid) (struct proc *p);
984263bc
MD
92} proc_targets[] = {
93#define N(s) sizeof(s)-1, s
94 /* name type validp */
95 { DT_DIR, N("."), Pproc, NULL },
96 { DT_DIR, N(".."), Proot, NULL },
97 { DT_REG, N("mem"), Pmem, NULL },
98 { DT_REG, N("regs"), Pregs, procfs_validregs },
99 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs },
100 { DT_REG, N("dbregs"), Pdbregs, procfs_validdbregs },
101 { DT_REG, N("ctl"), Pctl, NULL },
102 { DT_REG, N("status"), Pstatus, NULL },
103 { DT_REG, N("note"), Pnote, NULL },
104 { DT_REG, N("notepg"), Pnotepg, NULL },
105 { DT_REG, N("map"), Pmap, procfs_validmap },
106 { DT_REG, N("etype"), Ptype, procfs_validtype },
107 { DT_REG, N("cmdline"), Pcmdline, NULL },
108 { DT_REG, N("rlimit"), Prlimit, NULL },
109 { DT_LNK, N("file"), Pfile, NULL },
110#undef N
111};
112static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
113
a6ee311a 114static pid_t atopid (const char *, u_int);
984263bc
MD
115
116/*
117 * set things up for doing i/o on
118 * the pfsnode (vp). (vp) is locked
119 * on entry, and should be left locked
120 * on exit.
121 *
122 * for procfs we don't need to do anything
123 * in particular for i/o. all that is done
124 * is to support exclusive open on process
125 * memory images.
ac424f9b
CP
126 *
127 * procfs_open(struct vnode *a_vp, int a_mode, struct ucred *a_cred,
128 * struct thread *a_td)
984263bc
MD
129 */
130static int
ac424f9b 131procfs_open(struct vop_open_args *ap)
984263bc
MD
132{
133 struct pfsnode *pfs = VTOPFS(ap->a_vp);
134 struct proc *p1, *p2;
135
136 p2 = PFIND(pfs->pfs_pid);
137 if (p2 == NULL)
138 return (ENOENT);
41c20dac 139 if (pfs->pfs_pid && !PRISON_CHECK(ap->a_cred, p2->p_ucred))
984263bc
MD
140 return (ENOENT);
141
142 switch (pfs->pfs_type) {
143 case Pmem:
144 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
145 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
146 return (EBUSY);
147
dadab5e9
MD
148 p1 = ap->a_td->td_proc;
149 KKASSERT(p1);
984263bc
MD
150 /* Can't trace a process that's currently exec'ing. */
151 if ((p2->p_flag & P_INEXEC) != 0)
152 return EAGAIN;
41c20dac 153 if (!CHECKIO(p1, p2) || p_trespass(ap->a_cred, p2->p_ucred))
984263bc
MD
154 return (EPERM);
155
156 if (ap->a_mode & FWRITE)
157 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
158
159 return (0);
160
161 default:
162 break;
163 }
164
165 return (0);
166}
167
168/*
169 * close the pfsnode (vp) after doing i/o.
170 * (vp) is not locked on entry or exit.
171 *
172 * nothing to do for procfs other than undo
173 * any exclusive open flag (see _open above).
ac424f9b
CP
174 *
175 * procfs_close(struct vnode *a_vp, int a_fflag, struct ucred *a_cred,
176 * struct thread *a_td)
984263bc
MD
177 */
178static int
ac424f9b 179procfs_close(struct vop_close_args *ap)
984263bc
MD
180{
181 struct pfsnode *pfs = VTOPFS(ap->a_vp);
182 struct proc *p;
183
184 switch (pfs->pfs_type) {
185 case Pmem:
186 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
187 pfs->pfs_flags &= ~(FWRITE|O_EXCL);
188 /*
189 * This rather complicated-looking code is trying to
190 * determine if this was the last close on this particular
191 * vnode. While one would expect v_usecount to be 1 at
192 * that point, it seems that (according to John Dyson)
193 * the VM system will bump up the usecount. So: if the
194 * usecount is 2, and VOBJBUF is set, then this is really
195 * the last close. Otherwise, if the usecount is < 2
196 * then it is definitely the last close.
197 * If this is the last close, then it checks to see if
198 * the target process has PF_LINGER set in p_pfsflags,
199 * if this is *not* the case, then the process' stop flags
200 * are cleared, and the process is woken up. This is
201 * to help prevent the case where a process has been
202 * told to stop on an event, but then the requesting process
203 * has gone away or forgotten about it.
204 */
205 if ((ap->a_vp->v_usecount < 2)
206 && (p = pfind(pfs->pfs_pid))
207 && !(p->p_pfsflags & PF_LINGER)) {
208 p->p_stops = 0;
209 p->p_step = 0;
210 wakeup(&p->p_step);
211 }
212 break;
213 default:
214 break;
215 }
216
217 return (0);
218}
219
220/*
221 * do an ioctl operation on a pfsnode (vp).
222 * (vp) is not locked on entry or exit.
223 */
224static int
ac424f9b 225procfs_ioctl(struct vop_ioctl_args *ap)
984263bc
MD
226{
227 struct pfsnode *pfs = VTOPFS(ap->a_vp);
dadab5e9
MD
228 struct proc *procp;
229 struct proc *p;
984263bc
MD
230 int error;
231 int signo;
232 struct procfs_status *psp;
233 unsigned char flags;
234
984263bc 235 procp = pfind(pfs->pfs_pid);
dadab5e9 236 if (procp == NULL)
984263bc 237 return ENOTTY;
dadab5e9
MD
238 p = ap->a_td->td_proc;
239 if (p == NULL)
240 return EINVAL;
984263bc
MD
241
242 /* Can't trace a process that's currently exec'ing. */
243 if ((procp->p_flag & P_INEXEC) != 0)
244 return EAGAIN;
41c20dac 245 if (!CHECKIO(p, procp) || p_trespass(ap->a_cred, procp->p_ucred))
984263bc
MD
246 return EPERM;
247
248 switch (ap->a_command) {
249 case PIOCBIS:
250 procp->p_stops |= *(unsigned int*)ap->a_data;
251 break;
252 case PIOCBIC:
253 procp->p_stops &= ~*(unsigned int*)ap->a_data;
254 break;
255 case PIOCSFL:
256 /*
257 * NFLAGS is "non-suser_xxx flags" -- currently, only
258 * PFS_ISUGID ("ignore set u/g id");
259 */
260#define NFLAGS (PF_ISUGID)
261 flags = (unsigned char)*(unsigned int*)ap->a_data;
dadab5e9 262 if (flags & NFLAGS && (error = suser_cred(ap->a_cred, 0)))
984263bc
MD
263 return error;
264 procp->p_pfsflags = flags;
265 break;
266 case PIOCGFL:
267 *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags;
268 break;
269 case PIOCSTATUS:
270 psp = (struct procfs_status *)ap->a_data;
271 psp->state = (procp->p_step == 0);
272 psp->flags = procp->p_pfsflags;
273 psp->events = procp->p_stops;
274 if (procp->p_step) {
275 psp->why = procp->p_stype;
276 psp->val = procp->p_xstat;
277 } else {
278 psp->why = psp->val = 0; /* Not defined values */
279 }
280 break;
281 case PIOCWAIT:
282 psp = (struct procfs_status *)ap->a_data;
283 if (procp->p_step == 0) {
377d4740 284 error = tsleep(&procp->p_stype, PCATCH, "piocwait", 0);
984263bc
MD
285 if (error)
286 return error;
287 }
288 psp->state = 1; /* It stopped */
289 psp->flags = procp->p_pfsflags;
290 psp->events = procp->p_stops;
291 psp->why = procp->p_stype; /* why it stopped */
292 psp->val = procp->p_xstat; /* any extra info */
293 break;
294 case PIOCCONT: /* Restart a proc */
295 if (procp->p_step == 0)
296 return EINVAL; /* Can only start a stopped process */
297 if ((signo = *(int*)ap->a_data) != 0) {
298 if (signo >= NSIG || signo <= 0)
299 return EINVAL;
300 psignal(procp, signo);
301 }
302 procp->p_step = 0;
303 wakeup(&procp->p_step);
304 break;
305 default:
306 return (ENOTTY);
307 }
308 return 0;
309}
310
311/*
312 * do block mapping for pfsnode (vp).
313 * since we don't use the buffer cache
314 * for procfs this function should never
315 * be called. in any case, it's not clear
316 * what part of the kernel ever makes use
317 * of this function. for sanity, this is the
318 * usual no-op bmap, although returning
319 * (EIO) would be a reasonable alternative.
ac424f9b
CP
320 *
321 * procfs_bmap(struct vnode *a_vp, daddr_t a_bn, struct vnode **a_vpp,
322 * daddr_t *a_bnp, int *a_runp)
984263bc
MD
323 */
324static int
ac424f9b 325procfs_bmap(struct vop_bmap_args *ap)
984263bc 326{
984263bc
MD
327 if (ap->a_vpp != NULL)
328 *ap->a_vpp = ap->a_vp;
329 if (ap->a_bnp != NULL)
330 *ap->a_bnp = ap->a_bn;
331 if (ap->a_runp != NULL)
332 *ap->a_runp = 0;
333 return (0);
334}
335
336/*
337 * procfs_inactive is called when the pfsnode
338 * is vrele'd and the reference count goes
339 * to zero. (vp) will be on the vnode free
340 * list, so to get it back vget() must be
341 * used.
342 *
343 * (vp) is locked on entry, but must be unlocked on exit.
ac424f9b
CP
344 *
345 * procfs_inactive(struct vnode *a_vp, struct thread *a_td)
984263bc
MD
346 */
347static int
ac424f9b 348procfs_inactive(struct vop_inactive_args *ap)
984263bc
MD
349{
350 struct vnode *vp = ap->a_vp;
351
41a01a4d 352 VOP_UNLOCK(vp, NULL, 0, ap->a_td);
984263bc
MD
353
354 return (0);
355}
356
357/*
358 * _reclaim is called when getnewvnode()
359 * wants to make use of an entry on the vnode
360 * free list. at this time the filesystem needs
361 * to free any private data and remove the node
362 * from any private lists.
ac424f9b
CP
363 *
364 * procfs_reclaim(struct vnode *a_vp)
984263bc
MD
365 */
366static int
ac424f9b 367procfs_reclaim(struct vop_reclaim_args *ap)
984263bc 368{
984263bc
MD
369 return (procfs_freevp(ap->a_vp));
370}
371
372/*
373 * _print is used for debugging.
374 * just print a readable description
375 * of (vp).
ac424f9b
CP
376 *
377 * procfs_print(struct vnode *a_vp)
984263bc
MD
378 */
379static int
ac424f9b 380procfs_print(struct vop_print_args *ap)
984263bc
MD
381{
382 struct pfsnode *pfs = VTOPFS(ap->a_vp);
383
384 printf("tag VT_PROCFS, type %d, pid %ld, mode %x, flags %lx\n",
385 pfs->pfs_type, (long)pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
386 return (0);
387}
388
389/*
390 * generic entry point for unsupported operations
391 */
392static int
ac424f9b 393procfs_badop(void)
984263bc 394{
984263bc
MD
395 return (EIO);
396}
397
398/*
399 * Invent attributes for pfsnode (vp) and store
400 * them in (vap).
401 * Directories lengths are returned as zero since
402 * any real length would require the genuine size
403 * to be computed, and nothing cares anyway.
404 *
405 * this is relatively minimal for procfs.
ac424f9b
CP
406 *
407 * procfs_getattr(struct vnode *a_vp, struct vattr *a_vap,
408 * struct ucred *a_cred, struct thread *a_td)
984263bc
MD
409 */
410static int
ac424f9b 411procfs_getattr(struct vop_getattr_args *ap)
984263bc
MD
412{
413 struct pfsnode *pfs = VTOPFS(ap->a_vp);
414 struct vattr *vap = ap->a_vap;
415 struct proc *procp;
416 int error;
417
418 /*
419 * First make sure that the process and its credentials
420 * still exist.
421 */
422 switch (pfs->pfs_type) {
423 case Proot:
424 case Pcurproc:
425 procp = 0;
426 break;
427
428 default:
429 procp = PFIND(pfs->pfs_pid);
41c20dac 430 if (procp == NULL || procp->p_ucred == NULL)
984263bc
MD
431 return (ENOENT);
432 }
433
434 error = 0;
435
436 /* start by zeroing out the attributes */
437 VATTR_NULL(vap);
438
439 /* next do all the common fields */
440 vap->va_type = ap->a_vp->v_type;
441 vap->va_mode = pfs->pfs_mode;
442 vap->va_fileid = pfs->pfs_fileno;
443 vap->va_flags = 0;
444 vap->va_blocksize = PAGE_SIZE;
445 vap->va_bytes = vap->va_size = 0;
446 vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
447
448 /*
449 * Make all times be current TOD.
450 * It would be possible to get the process start
451 * time from the p_stat structure, but there's
452 * no "file creation" time stamp anyway, and the
453 * p_stat structure is not addressible if u. gets
454 * swapped out for that process.
455 */
456 nanotime(&vap->va_ctime);
457 vap->va_atime = vap->va_mtime = vap->va_ctime;
458
459 /*
460 * If the process has exercised some setuid or setgid
461 * privilege, then rip away read/write permission so
462 * that only root can gain access.
463 */
464 switch (pfs->pfs_type) {
465 case Pctl:
466 case Pregs:
467 case Pfpregs:
468 case Pdbregs:
469 case Pmem:
470 if (procp->p_flag & P_SUGID)
471 vap->va_mode &= ~((VREAD|VWRITE)|
472 ((VREAD|VWRITE)>>3)|
473 ((VREAD|VWRITE)>>6));
474 break;
475 default:
476 break;
477 }
478
479 /*
480 * now do the object specific fields
481 *
482 * The size could be set from struct reg, but it's hardly
483 * worth the trouble, and it puts some (potentially) machine
484 * dependent data into this machine-independent code. If it
485 * becomes important then this function should break out into
486 * a per-file stat function in the corresponding .c file.
487 */
488
489 vap->va_nlink = 1;
490 if (procp) {
491 vap->va_uid = procp->p_ucred->cr_uid;
492 vap->va_gid = procp->p_ucred->cr_gid;
493 }
494
495 switch (pfs->pfs_type) {
496 case Proot:
497 /*
498 * Set nlink to 1 to tell fts(3) we don't actually know.
499 */
500 vap->va_nlink = 1;
501 vap->va_uid = 0;
502 vap->va_gid = 0;
503 vap->va_size = vap->va_bytes = DEV_BSIZE;
504 break;
505
506 case Pcurproc: {
507 char buf[16]; /* should be enough */
508 vap->va_uid = 0;
509 vap->va_gid = 0;
510 vap->va_size = vap->va_bytes =
511 snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
512 break;
513 }
514
515 case Pproc:
516 vap->va_nlink = nproc_targets;
517 vap->va_size = vap->va_bytes = DEV_BSIZE;
518 break;
519
520 case Pfile: {
521 char *fullpath, *freepath;
522 error = textvp_fullpath(procp, &fullpath, &freepath);
523 if (error == 0) {
524 vap->va_size = strlen(fullpath);
525 free(freepath, M_TEMP);
526 } else {
527 vap->va_size = sizeof("unknown") - 1;
528 error = 0;
529 }
530 vap->va_bytes = vap->va_size;
531 break;
532 }
533
534 case Pmem:
535 /*
536 * If we denied owner access earlier, then we have to
537 * change the owner to root - otherwise 'ps' and friends
538 * will break even though they are setgid kmem. *SIGH*
539 */
540 if (procp->p_flag & P_SUGID)
541 vap->va_uid = 0;
542 else
543 vap->va_uid = procp->p_ucred->cr_uid;
544 break;
545
546 case Pregs:
547 vap->va_bytes = vap->va_size = sizeof(struct reg);
548 break;
549
550 case Pfpregs:
551 vap->va_bytes = vap->va_size = sizeof(struct fpreg);
552 break;
553
554 case Pdbregs:
555 vap->va_bytes = vap->va_size = sizeof(struct dbreg);
556 break;
557
558 case Ptype:
559 case Pmap:
560 case Pctl:
561 case Pstatus:
562 case Pnote:
563 case Pnotepg:
564 case Pcmdline:
565 case Prlimit:
566 break;
567
568 default:
569 panic("procfs_getattr");
570 }
571
572 return (error);
573}
574
ac424f9b
CP
575/*
576 * procfs_setattr(struct vnode *a_vp, struct vattr *a_vap,
577 * struct ucred *a_cred, struct thread *a_td)
578 */
984263bc 579static int
ac424f9b 580procfs_setattr(struct vop_setattr_args *ap)
984263bc 581{
984263bc
MD
582 if (ap->a_vap->va_flags != VNOVAL)
583 return (EOPNOTSUPP);
584
585 /*
586 * just fake out attribute setting
587 * it's not good to generate an error
588 * return, otherwise things like creat()
589 * will fail when they try to set the
590 * file length to 0. worse, this means
591 * that echo $note > /proc/$pid/note will fail.
592 */
593
594 return (0);
595}
596
597/*
598 * implement access checking.
599 *
600 * something very similar to this code is duplicated
601 * throughout the 4bsd kernel and should be moved
602 * into kern/vfs_subr.c sometime.
603 *
604 * actually, the check for super-user is slightly
605 * broken since it will allow read access to write-only
606 * objects. this doesn't cause any particular trouble
607 * but does mean that the i/o entry points need to check
608 * that the operation really does make sense.
ac424f9b
CP
609 *
610 * procfs_access(struct vnode *a_vp, int a_mode, struct ucred *a_cred,
611 * struct thread *a_td)
984263bc
MD
612 */
613static int
ac424f9b 614procfs_access(struct vop_access_args *ap)
984263bc
MD
615{
616 struct vattr *vap;
617 struct vattr vattr;
618 int error;
619
620 /*
621 * If you're the super-user,
622 * you always get access.
623 */
624 if (ap->a_cred->cr_uid == 0)
625 return (0);
626
627 vap = &vattr;
3b568787 628 error = VOP_GETATTR(ap->a_vp, vap, ap->a_td);
984263bc
MD
629 if (error)
630 return (error);
631
632 /*
633 * Access check is based on only one of owner, group, public.
634 * If not owner, then check group. If not a member of the
635 * group, then check public access.
636 */
637 if (ap->a_cred->cr_uid != vap->va_uid) {
638 gid_t *gp;
639 int i;
640
641 ap->a_mode >>= 3;
642 gp = ap->a_cred->cr_groups;
643 for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++)
644 if (vap->va_gid == *gp)
645 goto found;
646 ap->a_mode >>= 3;
647found:
648 ;
649 }
650
651 if ((vap->va_mode & ap->a_mode) == ap->a_mode)
652 return (0);
653
654 return (EACCES);
655}
656
657/*
658 * lookup. this is incredibly complicated in the
659 * general case, however for most pseudo-filesystems
660 * very little needs to be done.
661 *
662 * unless you want to get a migraine, just make sure your
663 * filesystem doesn't do any locking of its own. otherwise
664 * read and inwardly digest ufs_lookup().
ac424f9b
CP
665 *
666 * procfs_lookup(struct vnode *a_dvp, struct vnode **a_vpp,
667 * struct componentname *a_cnp)
984263bc
MD
668 */
669static int
ac424f9b 670procfs_lookup(struct vop_lookup_args *ap)
984263bc
MD
671{
672 struct componentname *cnp = ap->a_cnp;
673 struct vnode **vpp = ap->a_vpp;
674 struct vnode *dvp = ap->a_dvp;
675 char *pname = cnp->cn_nameptr;
676 /* struct proc *curp = cnp->cn_proc; */
677 struct proc_target *pt;
678 pid_t pid;
679 struct pfsnode *pfs;
680 struct proc *p;
681 int i;
682
683 *vpp = NULL;
684
2b69e610 685 if (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME)
984263bc
MD
686 return (EROFS);
687
688 if (cnp->cn_namelen == 1 && *pname == '.') {
689 *vpp = dvp;
597aea93 690 vref(dvp);
41a01a4d 691 /* vn_lock(dvp, NULL, LK_EXCLUSIVE | LK_RETRY, curp); */
984263bc
MD
692 return (0);
693 }
694
695 pfs = VTOPFS(dvp);
696 switch (pfs->pfs_type) {
697 case Proot:
2b69e610 698 if (cnp->cn_flags & CNP_ISDOTDOT)
984263bc
MD
699 return (EIO);
700
701 if (CNEQ(cnp, "curproc", 7))
702 return (procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc));
703
704 pid = atopid(pname, cnp->cn_namelen);
705 if (pid == NO_PID)
706 break;
707
708 p = PFIND(pid);
709 if (p == NULL)
710 break;
711
28c57d20
HP
712 if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 &&
713 ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid)
714 break;
715
984263bc
MD
716 return (procfs_allocvp(dvp->v_mount, vpp, pid, Pproc));
717
718 case Pproc:
2b69e610 719 if (cnp->cn_flags & CNP_ISDOTDOT)
984263bc
MD
720 return (procfs_root(dvp->v_mount, vpp));
721
722 p = PFIND(pfs->pfs_pid);
723 if (p == NULL)
724 break;
725
28c57d20
HP
726 if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 &&
727 ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid)
728 break;
729
984263bc
MD
730 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
731 if (cnp->cn_namelen == pt->pt_namlen &&
732 bcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
733 (pt->pt_valid == NULL || (*pt->pt_valid)(p)))
734 goto found;
735 }
736 break;
737 found:
738 return (procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
739 pt->pt_pfstype));
740
741 default:
742 return (ENOTDIR);
743 }
744
2b69e610 745 return (cnp->cn_nameiop == NAMEI_LOOKUP ? ENOENT : EROFS);
984263bc
MD
746}
747
748/*
749 * Does this process have a text file?
750 */
751int
ac424f9b 752procfs_validfile(struct proc *p)
984263bc 753{
984263bc
MD
754 return (procfs_findtextvp(p) != NULLVP);
755}
756
757/*
758 * readdir() returns directory entries from pfsnode (vp).
759 *
760 * We generate just one directory entry at a time, as it would probably
761 * not pay off to buffer several entries locally to save uiomove calls.
ac424f9b
CP
762 *
763 * procfs_readdir(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred,
764 * int *a_eofflag, int *a_ncookies, u_long **a_cookies)
984263bc
MD
765 */
766static int
ac424f9b 767procfs_readdir(struct vop_readdir_args *ap)
984263bc
MD
768{
769 struct uio *uio = ap->a_uio;
770 struct dirent d;
771 struct dirent *dp = &d;
772 struct pfsnode *pfs;
773 int count, error, i, off;
774 static u_int delen;
775
776 if (!delen) {
777
778 d.d_namlen = PROCFS_NAMELEN;
779 delen = GENERIC_DIRSIZ(&d);
780 }
781
782 pfs = VTOPFS(ap->a_vp);
783
784 off = (int)uio->uio_offset;
785 if (off != uio->uio_offset || off < 0 ||
786 off % delen != 0 || uio->uio_resid < delen)
787 return (EINVAL);
788
789 error = 0;
790 count = 0;
791 i = off / delen;
792
793 switch (pfs->pfs_type) {
794 /*
795 * this is for the process-specific sub-directories.
796 * all that is needed to is copy out all the entries
797 * from the procent[] table (top of this file).
798 */
799 case Pproc: {
800 struct proc *p;
801 struct proc_target *pt;
802
803 p = PFIND(pfs->pfs_pid);
804 if (p == NULL)
805 break;
41c20dac 806 if (!PRISON_CHECK(ap->a_cred, p->p_ucred))
984263bc
MD
807 break;
808
809 for (pt = &proc_targets[i];
810 uio->uio_resid >= delen && i < nproc_targets; pt++, i++) {
811 if (pt->pt_valid && (*pt->pt_valid)(p) == 0)
812 continue;
813
814 dp->d_reclen = delen;
815 dp->d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype);
816 dp->d_namlen = pt->pt_namlen;
817 bcopy(pt->pt_name, dp->d_name, pt->pt_namlen + 1);
818 dp->d_type = pt->pt_type;
819
820 if ((error = uiomove((caddr_t)dp, delen, uio)) != 0)
821 break;
822 }
823
824 break;
825 }
826
827 /*
828 * this is for the root of the procfs filesystem
829 * what is needed is a special entry for "curproc"
830 * followed by an entry for each process on allproc
831#ifdef PROCFS_ZOMBIE
832 * and zombproc.
833#endif
834 */
835
836 case Proot: {
837#ifdef PROCFS_ZOMBIE
838 int doingzomb = 0;
839#endif
840 int pcnt = 0;
841 volatile struct proc *p = allproc.lh_first;
842
843 for (; p && uio->uio_resid >= delen; i++, pcnt++) {
844 bzero((char *) dp, delen);
845 dp->d_reclen = delen;
846
847 switch (i) {
848 case 0: /* `.' */
849 case 1: /* `..' */
850 dp->d_fileno = PROCFS_FILENO(0, Proot);
851 dp->d_namlen = i + 1;
852 bcopy("..", dp->d_name, dp->d_namlen);
853 dp->d_name[i + 1] = '\0';
854 dp->d_type = DT_DIR;
855 break;
856
857 case 2:
858 dp->d_fileno = PROCFS_FILENO(0, Pcurproc);
859 dp->d_namlen = 7;
860 bcopy("curproc", dp->d_name, 8);
861 dp->d_type = DT_LNK;
862 break;
863
864 default:
865 while (pcnt < i) {
866 p = p->p_list.le_next;
867 if (!p)
868 goto done;
41c20dac 869 if (!PRISON_CHECK(ap->a_cred, p->p_ucred))
984263bc
MD
870 continue;
871 pcnt++;
872 }
41c20dac 873 while (!PRISON_CHECK(ap->a_cred, p->p_ucred)) {
984263bc
MD
874 p = p->p_list.le_next;
875 if (!p)
876 goto done;
877 }
28c57d20
HP
878 if (ps_showallprocs == 0 &&
879 ap->a_cred->cr_uid != 0 &&
880 ap->a_cred->cr_uid !=
881 p->p_ucred->cr_uid) {
882 p = p->p_list.le_next;
883 if (!p)
884 goto done;
885 break;
886 }
887
984263bc
MD
888 dp->d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
889 dp->d_namlen = sprintf(dp->d_name, "%ld",
890 (long)p->p_pid);
891 dp->d_type = DT_DIR;
892 p = p->p_list.le_next;
893 break;
894 }
895
896 if ((error = uiomove((caddr_t)dp, delen, uio)) != 0)
897 break;
898 }
899 done:
900
901#ifdef PROCFS_ZOMBIE
902 if (p == NULL && doingzomb == 0) {
903 doingzomb = 1;
904 p = zombproc.lh_first;
905 goto again;
906 }
907#endif
908
909 break;
910
911 }
912
913 default:
914 error = ENOTDIR;
915 break;
916 }
917
918 uio->uio_offset = i * delen;
919
920 return (error);
921}
922
923/*
924 * readlink reads the link of `curproc' or `file'
925 */
926static int
ac424f9b 927procfs_readlink(struct vop_readlink_args *ap)
984263bc
MD
928{
929 char buf[16]; /* should be enough */
930 struct proc *procp;
931 struct vnode *vp = ap->a_vp;
932 struct pfsnode *pfs = VTOPFS(vp);
933 char *fullpath, *freepath;
934 int error, len;
935
936 switch (pfs->pfs_type) {
937 case Pcurproc:
938 if (pfs->pfs_fileno != PROCFS_FILENO(0, Pcurproc))
939 return (EINVAL);
940
941 len = snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
942
943 return (uiomove(buf, len, ap->a_uio));
944 /*
945 * There _should_ be no way for an entire process to disappear
946 * from under us...
947 */
948 case Pfile:
949 procp = PFIND(pfs->pfs_pid);
41c20dac 950 if (procp == NULL || procp->p_ucred == NULL) {
984263bc
MD
951 printf("procfs_readlink: pid %d disappeared\n",
952 pfs->pfs_pid);
953 return (uiomove("unknown", sizeof("unknown") - 1,
954 ap->a_uio));
955 }
956 error = textvp_fullpath(procp, &fullpath, &freepath);
957 if (error != 0)
958 return (uiomove("unknown", sizeof("unknown") - 1,
959 ap->a_uio));
960 error = uiomove(fullpath, strlen(fullpath), ap->a_uio);
961 free(freepath, M_TEMP);
962 return (error);
963 default:
964 return (EINVAL);
965 }
966}
967
968/*
969 * convert decimal ascii to pid_t
970 */
971static pid_t
ac424f9b 972atopid(const char *b, u_int len)
984263bc
MD
973{
974 pid_t p = 0;
975
976 while (len--) {
977 char c = *b++;
978 if (c < '0' || c > '9')
979 return (NO_PID);
980 p = 10 * p + (c - '0');
981 if (p > PID_MAX)
982 return (NO_PID);
983 }
984
985 return (p);
986}
987
988/*
989 * procfs vnode operations.
990 */
991vop_t **procfs_vnodeop_p;
992static struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
993 { &vop_default_desc, (vop_t *) vop_defaultop },
994 { &vop_access_desc, (vop_t *) procfs_access },
995 { &vop_advlock_desc, (vop_t *) procfs_badop },
996 { &vop_bmap_desc, (vop_t *) procfs_bmap },
997 { &vop_close_desc, (vop_t *) procfs_close },
998 { &vop_create_desc, (vop_t *) procfs_badop },
999 { &vop_getattr_desc, (vop_t *) procfs_getattr },
1000 { &vop_inactive_desc, (vop_t *) procfs_inactive },
1001 { &vop_link_desc, (vop_t *) procfs_badop },
1002 { &vop_lookup_desc, (vop_t *) procfs_lookup },
1003 { &vop_mkdir_desc, (vop_t *) procfs_badop },
1004 { &vop_mknod_desc, (vop_t *) procfs_badop },
1005 { &vop_open_desc, (vop_t *) procfs_open },
1006 { &vop_pathconf_desc, (vop_t *) vop_stdpathconf },
1007 { &vop_print_desc, (vop_t *) procfs_print },
1008 { &vop_read_desc, (vop_t *) procfs_rw },
1009 { &vop_readdir_desc, (vop_t *) procfs_readdir },
1010 { &vop_readlink_desc, (vop_t *) procfs_readlink },
1011 { &vop_reclaim_desc, (vop_t *) procfs_reclaim },
1012 { &vop_remove_desc, (vop_t *) procfs_badop },
1013 { &vop_rename_desc, (vop_t *) procfs_badop },
1014 { &vop_rmdir_desc, (vop_t *) procfs_badop },
1015 { &vop_setattr_desc, (vop_t *) procfs_setattr },
1016 { &vop_symlink_desc, (vop_t *) procfs_badop },
1017 { &vop_write_desc, (vop_t *) procfs_rw },
1018 { &vop_ioctl_desc, (vop_t *) procfs_ioctl },
1019 { NULL, NULL }
1020};
1021static struct vnodeopv_desc procfs_vnodeop_opv_desc =
1022 { &procfs_vnodeop_p, procfs_vnodeop_entries };
1023
1024VNODEOP_SET(procfs_vnodeop_opv_desc);