Optimize lwkt_rwlock.c a bit
[dragonfly.git] / sys / vfs / procfs / procfs_vnops.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 1993, 1995 Jan-Simon Pendry
3 * Copyright (c) 1993, 1995
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Jan-Simon Pendry.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
38 *
39 * $FreeBSD: src/sys/miscfs/procfs/procfs_vnops.c,v 1.76.2.7 2002/01/22 17:22:59 nectar Exp $
40 * $DragonFly: src/sys/vfs/procfs/procfs_vnops.c,v 1.2 2003/06/17 04:28:42 dillon Exp $
41 */
42
43/*
44 * procfs vnode interface
45 */
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/time.h>
50#include <sys/kernel.h>
51#include <sys/lock.h>
52#include <sys/fcntl.h>
53#include <sys/proc.h>
54#include <sys/signalvar.h>
55#include <sys/vnode.h>
56#include <sys/uio.h>
57#include <sys/mount.h>
58#include <sys/namei.h>
59#include <sys/dirent.h>
60#include <sys/malloc.h>
61#include <machine/reg.h>
62#include <vm/vm_zone.h>
63#include <miscfs/procfs/procfs.h>
64#include <sys/pioctl.h>
65
66static int procfs_access __P((struct vop_access_args *));
67static int procfs_badop __P((void));
68static int procfs_bmap __P((struct vop_bmap_args *));
69static int procfs_close __P((struct vop_close_args *));
70static int procfs_getattr __P((struct vop_getattr_args *));
71static int procfs_inactive __P((struct vop_inactive_args *));
72static int procfs_ioctl __P((struct vop_ioctl_args *));
73static int procfs_lookup __P((struct vop_lookup_args *));
74static int procfs_open __P((struct vop_open_args *));
75static int procfs_print __P((struct vop_print_args *));
76static int procfs_readdir __P((struct vop_readdir_args *));
77static int procfs_readlink __P((struct vop_readlink_args *));
78static int procfs_reclaim __P((struct vop_reclaim_args *));
79static int procfs_setattr __P((struct vop_setattr_args *));
80
81/*
82 * This is a list of the valid names in the
83 * process-specific sub-directories. It is
84 * used in procfs_lookup and procfs_readdir
85 */
86static struct proc_target {
87 u_char pt_type;
88 u_char pt_namlen;
89 char *pt_name;
90 pfstype pt_pfstype;
91 int (*pt_valid) __P((struct proc *p));
92} proc_targets[] = {
93#define N(s) sizeof(s)-1, s
94 /* name type validp */
95 { DT_DIR, N("."), Pproc, NULL },
96 { DT_DIR, N(".."), Proot, NULL },
97 { DT_REG, N("mem"), Pmem, NULL },
98 { DT_REG, N("regs"), Pregs, procfs_validregs },
99 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs },
100 { DT_REG, N("dbregs"), Pdbregs, procfs_validdbregs },
101 { DT_REG, N("ctl"), Pctl, NULL },
102 { DT_REG, N("status"), Pstatus, NULL },
103 { DT_REG, N("note"), Pnote, NULL },
104 { DT_REG, N("notepg"), Pnotepg, NULL },
105 { DT_REG, N("map"), Pmap, procfs_validmap },
106 { DT_REG, N("etype"), Ptype, procfs_validtype },
107 { DT_REG, N("cmdline"), Pcmdline, NULL },
108 { DT_REG, N("rlimit"), Prlimit, NULL },
109 { DT_LNK, N("file"), Pfile, NULL },
110#undef N
111};
112static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
113
114static pid_t atopid __P((const char *, u_int));
115
116/*
117 * set things up for doing i/o on
118 * the pfsnode (vp). (vp) is locked
119 * on entry, and should be left locked
120 * on exit.
121 *
122 * for procfs we don't need to do anything
123 * in particular for i/o. all that is done
124 * is to support exclusive open on process
125 * memory images.
126 */
127static int
128procfs_open(ap)
129 struct vop_open_args /* {
130 struct vnode *a_vp;
131 int a_mode;
132 struct ucred *a_cred;
133 struct proc *a_p;
134 } */ *ap;
135{
136 struct pfsnode *pfs = VTOPFS(ap->a_vp);
137 struct proc *p1, *p2;
138
139 p2 = PFIND(pfs->pfs_pid);
140 if (p2 == NULL)
141 return (ENOENT);
142 if (pfs->pfs_pid && !PRISON_CHECK(ap->a_p, p2))
143 return (ENOENT);
144
145 switch (pfs->pfs_type) {
146 case Pmem:
147 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
148 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
149 return (EBUSY);
150
151 p1 = ap->a_p;
152 /* Can't trace a process that's currently exec'ing. */
153 if ((p2->p_flag & P_INEXEC) != 0)
154 return EAGAIN;
155 if (!CHECKIO(p1, p2) || p_trespass(p1, p2))
156 return (EPERM);
157
158 if (ap->a_mode & FWRITE)
159 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
160
161 return (0);
162
163 default:
164 break;
165 }
166
167 return (0);
168}
169
170/*
171 * close the pfsnode (vp) after doing i/o.
172 * (vp) is not locked on entry or exit.
173 *
174 * nothing to do for procfs other than undo
175 * any exclusive open flag (see _open above).
176 */
177static int
178procfs_close(ap)
179 struct vop_close_args /* {
180 struct vnode *a_vp;
181 int a_fflag;
182 struct ucred *a_cred;
183 struct proc *a_p;
184 } */ *ap;
185{
186 struct pfsnode *pfs = VTOPFS(ap->a_vp);
187 struct proc *p;
188
189 switch (pfs->pfs_type) {
190 case Pmem:
191 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
192 pfs->pfs_flags &= ~(FWRITE|O_EXCL);
193 /*
194 * This rather complicated-looking code is trying to
195 * determine if this was the last close on this particular
196 * vnode. While one would expect v_usecount to be 1 at
197 * that point, it seems that (according to John Dyson)
198 * the VM system will bump up the usecount. So: if the
199 * usecount is 2, and VOBJBUF is set, then this is really
200 * the last close. Otherwise, if the usecount is < 2
201 * then it is definitely the last close.
202 * If this is the last close, then it checks to see if
203 * the target process has PF_LINGER set in p_pfsflags,
204 * if this is *not* the case, then the process' stop flags
205 * are cleared, and the process is woken up. This is
206 * to help prevent the case where a process has been
207 * told to stop on an event, but then the requesting process
208 * has gone away or forgotten about it.
209 */
210 if ((ap->a_vp->v_usecount < 2)
211 && (p = pfind(pfs->pfs_pid))
212 && !(p->p_pfsflags & PF_LINGER)) {
213 p->p_stops = 0;
214 p->p_step = 0;
215 wakeup(&p->p_step);
216 }
217 break;
218 default:
219 break;
220 }
221
222 return (0);
223}
224
225/*
226 * do an ioctl operation on a pfsnode (vp).
227 * (vp) is not locked on entry or exit.
228 */
229static int
230procfs_ioctl(ap)
231 struct vop_ioctl_args *ap;
232{
233 struct pfsnode *pfs = VTOPFS(ap->a_vp);
234 struct proc *procp, *p;
235 int error;
236 int signo;
237 struct procfs_status *psp;
238 unsigned char flags;
239
240 p = ap->a_p;
241 procp = pfind(pfs->pfs_pid);
242 if (procp == NULL) {
243 return ENOTTY;
244 }
245
246 /* Can't trace a process that's currently exec'ing. */
247 if ((procp->p_flag & P_INEXEC) != 0)
248 return EAGAIN;
249 if (!CHECKIO(p, procp) || p_trespass(p, procp))
250 return EPERM;
251
252 switch (ap->a_command) {
253 case PIOCBIS:
254 procp->p_stops |= *(unsigned int*)ap->a_data;
255 break;
256 case PIOCBIC:
257 procp->p_stops &= ~*(unsigned int*)ap->a_data;
258 break;
259 case PIOCSFL:
260 /*
261 * NFLAGS is "non-suser_xxx flags" -- currently, only
262 * PFS_ISUGID ("ignore set u/g id");
263 */
264#define NFLAGS (PF_ISUGID)
265 flags = (unsigned char)*(unsigned int*)ap->a_data;
266 if (flags & NFLAGS && (error = suser(p)))
267 return error;
268 procp->p_pfsflags = flags;
269 break;
270 case PIOCGFL:
271 *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags;
272 break;
273 case PIOCSTATUS:
274 psp = (struct procfs_status *)ap->a_data;
275 psp->state = (procp->p_step == 0);
276 psp->flags = procp->p_pfsflags;
277 psp->events = procp->p_stops;
278 if (procp->p_step) {
279 psp->why = procp->p_stype;
280 psp->val = procp->p_xstat;
281 } else {
282 psp->why = psp->val = 0; /* Not defined values */
283 }
284 break;
285 case PIOCWAIT:
286 psp = (struct procfs_status *)ap->a_data;
287 if (procp->p_step == 0) {
288 error = tsleep(&procp->p_stype, PWAIT | PCATCH, "piocwait", 0);
289 if (error)
290 return error;
291 }
292 psp->state = 1; /* It stopped */
293 psp->flags = procp->p_pfsflags;
294 psp->events = procp->p_stops;
295 psp->why = procp->p_stype; /* why it stopped */
296 psp->val = procp->p_xstat; /* any extra info */
297 break;
298 case PIOCCONT: /* Restart a proc */
299 if (procp->p_step == 0)
300 return EINVAL; /* Can only start a stopped process */
301 if ((signo = *(int*)ap->a_data) != 0) {
302 if (signo >= NSIG || signo <= 0)
303 return EINVAL;
304 psignal(procp, signo);
305 }
306 procp->p_step = 0;
307 wakeup(&procp->p_step);
308 break;
309 default:
310 return (ENOTTY);
311 }
312 return 0;
313}
314
315/*
316 * do block mapping for pfsnode (vp).
317 * since we don't use the buffer cache
318 * for procfs this function should never
319 * be called. in any case, it's not clear
320 * what part of the kernel ever makes use
321 * of this function. for sanity, this is the
322 * usual no-op bmap, although returning
323 * (EIO) would be a reasonable alternative.
324 */
325static int
326procfs_bmap(ap)
327 struct vop_bmap_args /* {
328 struct vnode *a_vp;
329 daddr_t a_bn;
330 struct vnode **a_vpp;
331 daddr_t *a_bnp;
332 int *a_runp;
333 } */ *ap;
334{
335
336 if (ap->a_vpp != NULL)
337 *ap->a_vpp = ap->a_vp;
338 if (ap->a_bnp != NULL)
339 *ap->a_bnp = ap->a_bn;
340 if (ap->a_runp != NULL)
341 *ap->a_runp = 0;
342 return (0);
343}
344
345/*
346 * procfs_inactive is called when the pfsnode
347 * is vrele'd and the reference count goes
348 * to zero. (vp) will be on the vnode free
349 * list, so to get it back vget() must be
350 * used.
351 *
352 * (vp) is locked on entry, but must be unlocked on exit.
353 */
354static int
355procfs_inactive(ap)
356 struct vop_inactive_args /* {
357 struct vnode *a_vp;
358 } */ *ap;
359{
360 struct vnode *vp = ap->a_vp;
361
362 VOP_UNLOCK(vp, 0, ap->a_p);
363
364 return (0);
365}
366
367/*
368 * _reclaim is called when getnewvnode()
369 * wants to make use of an entry on the vnode
370 * free list. at this time the filesystem needs
371 * to free any private data and remove the node
372 * from any private lists.
373 */
374static int
375procfs_reclaim(ap)
376 struct vop_reclaim_args /* {
377 struct vnode *a_vp;
378 } */ *ap;
379{
380
381 return (procfs_freevp(ap->a_vp));
382}
383
384/*
385 * _print is used for debugging.
386 * just print a readable description
387 * of (vp).
388 */
389static int
390procfs_print(ap)
391 struct vop_print_args /* {
392 struct vnode *a_vp;
393 } */ *ap;
394{
395 struct pfsnode *pfs = VTOPFS(ap->a_vp);
396
397 printf("tag VT_PROCFS, type %d, pid %ld, mode %x, flags %lx\n",
398 pfs->pfs_type, (long)pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
399 return (0);
400}
401
402/*
403 * generic entry point for unsupported operations
404 */
405static int
406procfs_badop()
407{
408
409 return (EIO);
410}
411
412/*
413 * Invent attributes for pfsnode (vp) and store
414 * them in (vap).
415 * Directories lengths are returned as zero since
416 * any real length would require the genuine size
417 * to be computed, and nothing cares anyway.
418 *
419 * this is relatively minimal for procfs.
420 */
421static int
422procfs_getattr(ap)
423 struct vop_getattr_args /* {
424 struct vnode *a_vp;
425 struct vattr *a_vap;
426 struct ucred *a_cred;
427 struct proc *a_p;
428 } */ *ap;
429{
430 struct pfsnode *pfs = VTOPFS(ap->a_vp);
431 struct vattr *vap = ap->a_vap;
432 struct proc *procp;
433 int error;
434
435 /*
436 * First make sure that the process and its credentials
437 * still exist.
438 */
439 switch (pfs->pfs_type) {
440 case Proot:
441 case Pcurproc:
442 procp = 0;
443 break;
444
445 default:
446 procp = PFIND(pfs->pfs_pid);
447 if (procp == NULL || procp->p_cred == NULL ||
448 procp->p_ucred == NULL)
449 return (ENOENT);
450 }
451
452 error = 0;
453
454 /* start by zeroing out the attributes */
455 VATTR_NULL(vap);
456
457 /* next do all the common fields */
458 vap->va_type = ap->a_vp->v_type;
459 vap->va_mode = pfs->pfs_mode;
460 vap->va_fileid = pfs->pfs_fileno;
461 vap->va_flags = 0;
462 vap->va_blocksize = PAGE_SIZE;
463 vap->va_bytes = vap->va_size = 0;
464 vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
465
466 /*
467 * Make all times be current TOD.
468 * It would be possible to get the process start
469 * time from the p_stat structure, but there's
470 * no "file creation" time stamp anyway, and the
471 * p_stat structure is not addressible if u. gets
472 * swapped out for that process.
473 */
474 nanotime(&vap->va_ctime);
475 vap->va_atime = vap->va_mtime = vap->va_ctime;
476
477 /*
478 * If the process has exercised some setuid or setgid
479 * privilege, then rip away read/write permission so
480 * that only root can gain access.
481 */
482 switch (pfs->pfs_type) {
483 case Pctl:
484 case Pregs:
485 case Pfpregs:
486 case Pdbregs:
487 case Pmem:
488 if (procp->p_flag & P_SUGID)
489 vap->va_mode &= ~((VREAD|VWRITE)|
490 ((VREAD|VWRITE)>>3)|
491 ((VREAD|VWRITE)>>6));
492 break;
493 default:
494 break;
495 }
496
497 /*
498 * now do the object specific fields
499 *
500 * The size could be set from struct reg, but it's hardly
501 * worth the trouble, and it puts some (potentially) machine
502 * dependent data into this machine-independent code. If it
503 * becomes important then this function should break out into
504 * a per-file stat function in the corresponding .c file.
505 */
506
507 vap->va_nlink = 1;
508 if (procp) {
509 vap->va_uid = procp->p_ucred->cr_uid;
510 vap->va_gid = procp->p_ucred->cr_gid;
511 }
512
513 switch (pfs->pfs_type) {
514 case Proot:
515 /*
516 * Set nlink to 1 to tell fts(3) we don't actually know.
517 */
518 vap->va_nlink = 1;
519 vap->va_uid = 0;
520 vap->va_gid = 0;
521 vap->va_size = vap->va_bytes = DEV_BSIZE;
522 break;
523
524 case Pcurproc: {
525 char buf[16]; /* should be enough */
526 vap->va_uid = 0;
527 vap->va_gid = 0;
528 vap->va_size = vap->va_bytes =
529 snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
530 break;
531 }
532
533 case Pproc:
534 vap->va_nlink = nproc_targets;
535 vap->va_size = vap->va_bytes = DEV_BSIZE;
536 break;
537
538 case Pfile: {
539 char *fullpath, *freepath;
540 error = textvp_fullpath(procp, &fullpath, &freepath);
541 if (error == 0) {
542 vap->va_size = strlen(fullpath);
543 free(freepath, M_TEMP);
544 } else {
545 vap->va_size = sizeof("unknown") - 1;
546 error = 0;
547 }
548 vap->va_bytes = vap->va_size;
549 break;
550 }
551
552 case Pmem:
553 /*
554 * If we denied owner access earlier, then we have to
555 * change the owner to root - otherwise 'ps' and friends
556 * will break even though they are setgid kmem. *SIGH*
557 */
558 if (procp->p_flag & P_SUGID)
559 vap->va_uid = 0;
560 else
561 vap->va_uid = procp->p_ucred->cr_uid;
562 break;
563
564 case Pregs:
565 vap->va_bytes = vap->va_size = sizeof(struct reg);
566 break;
567
568 case Pfpregs:
569 vap->va_bytes = vap->va_size = sizeof(struct fpreg);
570 break;
571
572 case Pdbregs:
573 vap->va_bytes = vap->va_size = sizeof(struct dbreg);
574 break;
575
576 case Ptype:
577 case Pmap:
578 case Pctl:
579 case Pstatus:
580 case Pnote:
581 case Pnotepg:
582 case Pcmdline:
583 case Prlimit:
584 break;
585
586 default:
587 panic("procfs_getattr");
588 }
589
590 return (error);
591}
592
593static int
594procfs_setattr(ap)
595 struct vop_setattr_args /* {
596 struct vnode *a_vp;
597 struct vattr *a_vap;
598 struct ucred *a_cred;
599 struct proc *a_p;
600 } */ *ap;
601{
602
603 if (ap->a_vap->va_flags != VNOVAL)
604 return (EOPNOTSUPP);
605
606 /*
607 * just fake out attribute setting
608 * it's not good to generate an error
609 * return, otherwise things like creat()
610 * will fail when they try to set the
611 * file length to 0. worse, this means
612 * that echo $note > /proc/$pid/note will fail.
613 */
614
615 return (0);
616}
617
618/*
619 * implement access checking.
620 *
621 * something very similar to this code is duplicated
622 * throughout the 4bsd kernel and should be moved
623 * into kern/vfs_subr.c sometime.
624 *
625 * actually, the check for super-user is slightly
626 * broken since it will allow read access to write-only
627 * objects. this doesn't cause any particular trouble
628 * but does mean that the i/o entry points need to check
629 * that the operation really does make sense.
630 */
631static int
632procfs_access(ap)
633 struct vop_access_args /* {
634 struct vnode *a_vp;
635 int a_mode;
636 struct ucred *a_cred;
637 struct proc *a_p;
638 } */ *ap;
639{
640 struct vattr *vap;
641 struct vattr vattr;
642 int error;
643
644 /*
645 * If you're the super-user,
646 * you always get access.
647 */
648 if (ap->a_cred->cr_uid == 0)
649 return (0);
650
651 vap = &vattr;
652 error = VOP_GETATTR(ap->a_vp, vap, ap->a_cred, ap->a_p);
653 if (error)
654 return (error);
655
656 /*
657 * Access check is based on only one of owner, group, public.
658 * If not owner, then check group. If not a member of the
659 * group, then check public access.
660 */
661 if (ap->a_cred->cr_uid != vap->va_uid) {
662 gid_t *gp;
663 int i;
664
665 ap->a_mode >>= 3;
666 gp = ap->a_cred->cr_groups;
667 for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++)
668 if (vap->va_gid == *gp)
669 goto found;
670 ap->a_mode >>= 3;
671found:
672 ;
673 }
674
675 if ((vap->va_mode & ap->a_mode) == ap->a_mode)
676 return (0);
677
678 return (EACCES);
679}
680
681/*
682 * lookup. this is incredibly complicated in the
683 * general case, however for most pseudo-filesystems
684 * very little needs to be done.
685 *
686 * unless you want to get a migraine, just make sure your
687 * filesystem doesn't do any locking of its own. otherwise
688 * read and inwardly digest ufs_lookup().
689 */
690static int
691procfs_lookup(ap)
692 struct vop_lookup_args /* {
693 struct vnode * a_dvp;
694 struct vnode ** a_vpp;
695 struct componentname * a_cnp;
696 } */ *ap;
697{
698 struct componentname *cnp = ap->a_cnp;
699 struct vnode **vpp = ap->a_vpp;
700 struct vnode *dvp = ap->a_dvp;
701 char *pname = cnp->cn_nameptr;
702 /* struct proc *curp = cnp->cn_proc; */
703 struct proc_target *pt;
704 pid_t pid;
705 struct pfsnode *pfs;
706 struct proc *p;
707 int i;
708
709 *vpp = NULL;
710
711 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
712 return (EROFS);
713
714 if (cnp->cn_namelen == 1 && *pname == '.') {
715 *vpp = dvp;
716 VREF(dvp);
717 /* vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, curp); */
718 return (0);
719 }
720
721 pfs = VTOPFS(dvp);
722 switch (pfs->pfs_type) {
723 case Proot:
724 if (cnp->cn_flags & ISDOTDOT)
725 return (EIO);
726
727 if (CNEQ(cnp, "curproc", 7))
728 return (procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc));
729
730 pid = atopid(pname, cnp->cn_namelen);
731 if (pid == NO_PID)
732 break;
733
734 p = PFIND(pid);
735 if (p == NULL)
736 break;
737
738 return (procfs_allocvp(dvp->v_mount, vpp, pid, Pproc));
739
740 case Pproc:
741 if (cnp->cn_flags & ISDOTDOT)
742 return (procfs_root(dvp->v_mount, vpp));
743
744 p = PFIND(pfs->pfs_pid);
745 if (p == NULL)
746 break;
747
748 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
749 if (cnp->cn_namelen == pt->pt_namlen &&
750 bcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
751 (pt->pt_valid == NULL || (*pt->pt_valid)(p)))
752 goto found;
753 }
754 break;
755 found:
756 return (procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
757 pt->pt_pfstype));
758
759 default:
760 return (ENOTDIR);
761 }
762
763 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
764}
765
766/*
767 * Does this process have a text file?
768 */
769int
770procfs_validfile(p)
771 struct proc *p;
772{
773
774 return (procfs_findtextvp(p) != NULLVP);
775}
776
777/*
778 * readdir() returns directory entries from pfsnode (vp).
779 *
780 * We generate just one directory entry at a time, as it would probably
781 * not pay off to buffer several entries locally to save uiomove calls.
782 */
783static int
784procfs_readdir(ap)
785 struct vop_readdir_args /* {
786 struct vnode *a_vp;
787 struct uio *a_uio;
788 struct ucred *a_cred;
789 int *a_eofflag;
790 int *a_ncookies;
791 u_long **a_cookies;
792 } */ *ap;
793{
794 struct uio *uio = ap->a_uio;
795 struct dirent d;
796 struct dirent *dp = &d;
797 struct pfsnode *pfs;
798 int count, error, i, off;
799 static u_int delen;
800
801 if (!delen) {
802
803 d.d_namlen = PROCFS_NAMELEN;
804 delen = GENERIC_DIRSIZ(&d);
805 }
806
807 pfs = VTOPFS(ap->a_vp);
808
809 off = (int)uio->uio_offset;
810 if (off != uio->uio_offset || off < 0 ||
811 off % delen != 0 || uio->uio_resid < delen)
812 return (EINVAL);
813
814 error = 0;
815 count = 0;
816 i = off / delen;
817
818 switch (pfs->pfs_type) {
819 /*
820 * this is for the process-specific sub-directories.
821 * all that is needed to is copy out all the entries
822 * from the procent[] table (top of this file).
823 */
824 case Pproc: {
825 struct proc *p;
826 struct proc_target *pt;
827
828 p = PFIND(pfs->pfs_pid);
829 if (p == NULL)
830 break;
831 if (!PRISON_CHECK(curproc, p))
832 break;
833
834 for (pt = &proc_targets[i];
835 uio->uio_resid >= delen && i < nproc_targets; pt++, i++) {
836 if (pt->pt_valid && (*pt->pt_valid)(p) == 0)
837 continue;
838
839 dp->d_reclen = delen;
840 dp->d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype);
841 dp->d_namlen = pt->pt_namlen;
842 bcopy(pt->pt_name, dp->d_name, pt->pt_namlen + 1);
843 dp->d_type = pt->pt_type;
844
845 if ((error = uiomove((caddr_t)dp, delen, uio)) != 0)
846 break;
847 }
848
849 break;
850 }
851
852 /*
853 * this is for the root of the procfs filesystem
854 * what is needed is a special entry for "curproc"
855 * followed by an entry for each process on allproc
856#ifdef PROCFS_ZOMBIE
857 * and zombproc.
858#endif
859 */
860
861 case Proot: {
862#ifdef PROCFS_ZOMBIE
863 int doingzomb = 0;
864#endif
865 int pcnt = 0;
866 volatile struct proc *p = allproc.lh_first;
867
868 for (; p && uio->uio_resid >= delen; i++, pcnt++) {
869 bzero((char *) dp, delen);
870 dp->d_reclen = delen;
871
872 switch (i) {
873 case 0: /* `.' */
874 case 1: /* `..' */
875 dp->d_fileno = PROCFS_FILENO(0, Proot);
876 dp->d_namlen = i + 1;
877 bcopy("..", dp->d_name, dp->d_namlen);
878 dp->d_name[i + 1] = '\0';
879 dp->d_type = DT_DIR;
880 break;
881
882 case 2:
883 dp->d_fileno = PROCFS_FILENO(0, Pcurproc);
884 dp->d_namlen = 7;
885 bcopy("curproc", dp->d_name, 8);
886 dp->d_type = DT_LNK;
887 break;
888
889 default:
890 while (pcnt < i) {
891 p = p->p_list.le_next;
892 if (!p)
893 goto done;
894 if (!PRISON_CHECK(curproc, p))
895 continue;
896 pcnt++;
897 }
898 while (!PRISON_CHECK(curproc, p)) {
899 p = p->p_list.le_next;
900 if (!p)
901 goto done;
902 }
903 dp->d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
904 dp->d_namlen = sprintf(dp->d_name, "%ld",
905 (long)p->p_pid);
906 dp->d_type = DT_DIR;
907 p = p->p_list.le_next;
908 break;
909 }
910
911 if ((error = uiomove((caddr_t)dp, delen, uio)) != 0)
912 break;
913 }
914 done:
915
916#ifdef PROCFS_ZOMBIE
917 if (p == NULL && doingzomb == 0) {
918 doingzomb = 1;
919 p = zombproc.lh_first;
920 goto again;
921 }
922#endif
923
924 break;
925
926 }
927
928 default:
929 error = ENOTDIR;
930 break;
931 }
932
933 uio->uio_offset = i * delen;
934
935 return (error);
936}
937
938/*
939 * readlink reads the link of `curproc' or `file'
940 */
941static int
942procfs_readlink(ap)
943 struct vop_readlink_args *ap;
944{
945 char buf[16]; /* should be enough */
946 struct proc *procp;
947 struct vnode *vp = ap->a_vp;
948 struct pfsnode *pfs = VTOPFS(vp);
949 char *fullpath, *freepath;
950 int error, len;
951
952 switch (pfs->pfs_type) {
953 case Pcurproc:
954 if (pfs->pfs_fileno != PROCFS_FILENO(0, Pcurproc))
955 return (EINVAL);
956
957 len = snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
958
959 return (uiomove(buf, len, ap->a_uio));
960 /*
961 * There _should_ be no way for an entire process to disappear
962 * from under us...
963 */
964 case Pfile:
965 procp = PFIND(pfs->pfs_pid);
966 if (procp == NULL || procp->p_cred == NULL ||
967 procp->p_ucred == NULL) {
968 printf("procfs_readlink: pid %d disappeared\n",
969 pfs->pfs_pid);
970 return (uiomove("unknown", sizeof("unknown") - 1,
971 ap->a_uio));
972 }
973 error = textvp_fullpath(procp, &fullpath, &freepath);
974 if (error != 0)
975 return (uiomove("unknown", sizeof("unknown") - 1,
976 ap->a_uio));
977 error = uiomove(fullpath, strlen(fullpath), ap->a_uio);
978 free(freepath, M_TEMP);
979 return (error);
980 default:
981 return (EINVAL);
982 }
983}
984
985/*
986 * convert decimal ascii to pid_t
987 */
988static pid_t
989atopid(b, len)
990 const char *b;
991 u_int len;
992{
993 pid_t p = 0;
994
995 while (len--) {
996 char c = *b++;
997 if (c < '0' || c > '9')
998 return (NO_PID);
999 p = 10 * p + (c - '0');
1000 if (p > PID_MAX)
1001 return (NO_PID);
1002 }
1003
1004 return (p);
1005}
1006
1007/*
1008 * procfs vnode operations.
1009 */
1010vop_t **procfs_vnodeop_p;
1011static struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
1012 { &vop_default_desc, (vop_t *) vop_defaultop },
1013 { &vop_access_desc, (vop_t *) procfs_access },
1014 { &vop_advlock_desc, (vop_t *) procfs_badop },
1015 { &vop_bmap_desc, (vop_t *) procfs_bmap },
1016 { &vop_close_desc, (vop_t *) procfs_close },
1017 { &vop_create_desc, (vop_t *) procfs_badop },
1018 { &vop_getattr_desc, (vop_t *) procfs_getattr },
1019 { &vop_inactive_desc, (vop_t *) procfs_inactive },
1020 { &vop_link_desc, (vop_t *) procfs_badop },
1021 { &vop_lookup_desc, (vop_t *) procfs_lookup },
1022 { &vop_mkdir_desc, (vop_t *) procfs_badop },
1023 { &vop_mknod_desc, (vop_t *) procfs_badop },
1024 { &vop_open_desc, (vop_t *) procfs_open },
1025 { &vop_pathconf_desc, (vop_t *) vop_stdpathconf },
1026 { &vop_print_desc, (vop_t *) procfs_print },
1027 { &vop_read_desc, (vop_t *) procfs_rw },
1028 { &vop_readdir_desc, (vop_t *) procfs_readdir },
1029 { &vop_readlink_desc, (vop_t *) procfs_readlink },
1030 { &vop_reclaim_desc, (vop_t *) procfs_reclaim },
1031 { &vop_remove_desc, (vop_t *) procfs_badop },
1032 { &vop_rename_desc, (vop_t *) procfs_badop },
1033 { &vop_rmdir_desc, (vop_t *) procfs_badop },
1034 { &vop_setattr_desc, (vop_t *) procfs_setattr },
1035 { &vop_symlink_desc, (vop_t *) procfs_badop },
1036 { &vop_write_desc, (vop_t *) procfs_rw },
1037 { &vop_ioctl_desc, (vop_t *) procfs_ioctl },
1038 { NULL, NULL }
1039};
1040static struct vnodeopv_desc procfs_vnodeop_opv_desc =
1041 { &procfs_vnodeop_p, procfs_vnodeop_entries };
1042
1043VNODEOP_SET(procfs_vnodeop_opv_desc);