Add notation on the alignment requirement for the twe driver.
[dragonfly.git] / sys / vfs / procfs / procfs_vnops.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1993, 1995 Jan-Simon Pendry
3 * Copyright (c) 1993, 1995
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Jan-Simon Pendry.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
38 *
39 * $FreeBSD: src/sys/miscfs/procfs/procfs_vnops.c,v 1.76.2.7 2002/01/22 17:22:59 nectar Exp $
28c57d20 40 * $DragonFly: src/sys/vfs/procfs/procfs_vnops.c,v 1.9 2003/09/01 01:14:55 hmp Exp $
984263bc
MD
41 */
42
43/*
44 * procfs vnode interface
45 */
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/time.h>
50#include <sys/kernel.h>
51#include <sys/lock.h>
52#include <sys/fcntl.h>
53#include <sys/proc.h>
54#include <sys/signalvar.h>
55#include <sys/vnode.h>
56#include <sys/uio.h>
57#include <sys/mount.h>
58#include <sys/namei.h>
59#include <sys/dirent.h>
60#include <sys/malloc.h>
61#include <machine/reg.h>
62#include <vm/vm_zone.h>
1f2de5d4 63#include <vfs/procfs/procfs.h>
984263bc
MD
64#include <sys/pioctl.h>
65
a6ee311a
RG
66static int procfs_access (struct vop_access_args *);
67static int procfs_badop (void);
68static int procfs_bmap (struct vop_bmap_args *);
69static int procfs_close (struct vop_close_args *);
70static int procfs_getattr (struct vop_getattr_args *);
71static int procfs_inactive (struct vop_inactive_args *);
72static int procfs_ioctl (struct vop_ioctl_args *);
73static int procfs_lookup (struct vop_lookup_args *);
74static int procfs_open (struct vop_open_args *);
75static int procfs_print (struct vop_print_args *);
76static int procfs_readdir (struct vop_readdir_args *);
77static int procfs_readlink (struct vop_readlink_args *);
78static int procfs_reclaim (struct vop_reclaim_args *);
79static int procfs_setattr (struct vop_setattr_args *);
984263bc
MD
80
81/*
82 * This is a list of the valid names in the
83 * process-specific sub-directories. It is
84 * used in procfs_lookup and procfs_readdir
85 */
86static struct proc_target {
87 u_char pt_type;
88 u_char pt_namlen;
89 char *pt_name;
90 pfstype pt_pfstype;
a6ee311a 91 int (*pt_valid) (struct proc *p);
984263bc
MD
92} proc_targets[] = {
93#define N(s) sizeof(s)-1, s
94 /* name type validp */
95 { DT_DIR, N("."), Pproc, NULL },
96 { DT_DIR, N(".."), Proot, NULL },
97 { DT_REG, N("mem"), Pmem, NULL },
98 { DT_REG, N("regs"), Pregs, procfs_validregs },
99 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs },
100 { DT_REG, N("dbregs"), Pdbregs, procfs_validdbregs },
101 { DT_REG, N("ctl"), Pctl, NULL },
102 { DT_REG, N("status"), Pstatus, NULL },
103 { DT_REG, N("note"), Pnote, NULL },
104 { DT_REG, N("notepg"), Pnotepg, NULL },
105 { DT_REG, N("map"), Pmap, procfs_validmap },
106 { DT_REG, N("etype"), Ptype, procfs_validtype },
107 { DT_REG, N("cmdline"), Pcmdline, NULL },
108 { DT_REG, N("rlimit"), Prlimit, NULL },
109 { DT_LNK, N("file"), Pfile, NULL },
110#undef N
111};
112static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
113
a6ee311a 114static pid_t atopid (const char *, u_int);
984263bc
MD
115
116/*
117 * set things up for doing i/o on
118 * the pfsnode (vp). (vp) is locked
119 * on entry, and should be left locked
120 * on exit.
121 *
122 * for procfs we don't need to do anything
123 * in particular for i/o. all that is done
124 * is to support exclusive open on process
125 * memory images.
126 */
127static int
128procfs_open(ap)
129 struct vop_open_args /* {
130 struct vnode *a_vp;
131 int a_mode;
132 struct ucred *a_cred;
dadab5e9 133 struct thread *a_td;
984263bc
MD
134 } */ *ap;
135{
136 struct pfsnode *pfs = VTOPFS(ap->a_vp);
137 struct proc *p1, *p2;
138
139 p2 = PFIND(pfs->pfs_pid);
140 if (p2 == NULL)
141 return (ENOENT);
41c20dac 142 if (pfs->pfs_pid && !PRISON_CHECK(ap->a_cred, p2->p_ucred))
984263bc
MD
143 return (ENOENT);
144
145 switch (pfs->pfs_type) {
146 case Pmem:
147 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
148 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
149 return (EBUSY);
150
dadab5e9
MD
151 p1 = ap->a_td->td_proc;
152 KKASSERT(p1);
984263bc
MD
153 /* Can't trace a process that's currently exec'ing. */
154 if ((p2->p_flag & P_INEXEC) != 0)
155 return EAGAIN;
41c20dac 156 if (!CHECKIO(p1, p2) || p_trespass(ap->a_cred, p2->p_ucred))
984263bc
MD
157 return (EPERM);
158
159 if (ap->a_mode & FWRITE)
160 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
161
162 return (0);
163
164 default:
165 break;
166 }
167
168 return (0);
169}
170
171/*
172 * close the pfsnode (vp) after doing i/o.
173 * (vp) is not locked on entry or exit.
174 *
175 * nothing to do for procfs other than undo
176 * any exclusive open flag (see _open above).
177 */
178static int
179procfs_close(ap)
180 struct vop_close_args /* {
181 struct vnode *a_vp;
182 int a_fflag;
183 struct ucred *a_cred;
dadab5e9 184 struct thread *a_td;
984263bc
MD
185 } */ *ap;
186{
187 struct pfsnode *pfs = VTOPFS(ap->a_vp);
188 struct proc *p;
189
190 switch (pfs->pfs_type) {
191 case Pmem:
192 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
193 pfs->pfs_flags &= ~(FWRITE|O_EXCL);
194 /*
195 * This rather complicated-looking code is trying to
196 * determine if this was the last close on this particular
197 * vnode. While one would expect v_usecount to be 1 at
198 * that point, it seems that (according to John Dyson)
199 * the VM system will bump up the usecount. So: if the
200 * usecount is 2, and VOBJBUF is set, then this is really
201 * the last close. Otherwise, if the usecount is < 2
202 * then it is definitely the last close.
203 * If this is the last close, then it checks to see if
204 * the target process has PF_LINGER set in p_pfsflags,
205 * if this is *not* the case, then the process' stop flags
206 * are cleared, and the process is woken up. This is
207 * to help prevent the case where a process has been
208 * told to stop on an event, but then the requesting process
209 * has gone away or forgotten about it.
210 */
211 if ((ap->a_vp->v_usecount < 2)
212 && (p = pfind(pfs->pfs_pid))
213 && !(p->p_pfsflags & PF_LINGER)) {
214 p->p_stops = 0;
215 p->p_step = 0;
216 wakeup(&p->p_step);
217 }
218 break;
219 default:
220 break;
221 }
222
223 return (0);
224}
225
226/*
227 * do an ioctl operation on a pfsnode (vp).
228 * (vp) is not locked on entry or exit.
229 */
230static int
231procfs_ioctl(ap)
232 struct vop_ioctl_args *ap;
233{
234 struct pfsnode *pfs = VTOPFS(ap->a_vp);
dadab5e9
MD
235 struct proc *procp;
236 struct proc *p;
984263bc
MD
237 int error;
238 int signo;
239 struct procfs_status *psp;
240 unsigned char flags;
241
984263bc 242 procp = pfind(pfs->pfs_pid);
dadab5e9 243 if (procp == NULL)
984263bc 244 return ENOTTY;
dadab5e9
MD
245 p = ap->a_td->td_proc;
246 if (p == NULL)
247 return EINVAL;
984263bc
MD
248
249 /* Can't trace a process that's currently exec'ing. */
250 if ((procp->p_flag & P_INEXEC) != 0)
251 return EAGAIN;
41c20dac 252 if (!CHECKIO(p, procp) || p_trespass(ap->a_cred, procp->p_ucred))
984263bc
MD
253 return EPERM;
254
255 switch (ap->a_command) {
256 case PIOCBIS:
257 procp->p_stops |= *(unsigned int*)ap->a_data;
258 break;
259 case PIOCBIC:
260 procp->p_stops &= ~*(unsigned int*)ap->a_data;
261 break;
262 case PIOCSFL:
263 /*
264 * NFLAGS is "non-suser_xxx flags" -- currently, only
265 * PFS_ISUGID ("ignore set u/g id");
266 */
267#define NFLAGS (PF_ISUGID)
268 flags = (unsigned char)*(unsigned int*)ap->a_data;
dadab5e9 269 if (flags & NFLAGS && (error = suser_cred(ap->a_cred, 0)))
984263bc
MD
270 return error;
271 procp->p_pfsflags = flags;
272 break;
273 case PIOCGFL:
274 *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags;
275 break;
276 case PIOCSTATUS:
277 psp = (struct procfs_status *)ap->a_data;
278 psp->state = (procp->p_step == 0);
279 psp->flags = procp->p_pfsflags;
280 psp->events = procp->p_stops;
281 if (procp->p_step) {
282 psp->why = procp->p_stype;
283 psp->val = procp->p_xstat;
284 } else {
285 psp->why = psp->val = 0; /* Not defined values */
286 }
287 break;
288 case PIOCWAIT:
289 psp = (struct procfs_status *)ap->a_data;
290 if (procp->p_step == 0) {
377d4740 291 error = tsleep(&procp->p_stype, PCATCH, "piocwait", 0);
984263bc
MD
292 if (error)
293 return error;
294 }
295 psp->state = 1; /* It stopped */
296 psp->flags = procp->p_pfsflags;
297 psp->events = procp->p_stops;
298 psp->why = procp->p_stype; /* why it stopped */
299 psp->val = procp->p_xstat; /* any extra info */
300 break;
301 case PIOCCONT: /* Restart a proc */
302 if (procp->p_step == 0)
303 return EINVAL; /* Can only start a stopped process */
304 if ((signo = *(int*)ap->a_data) != 0) {
305 if (signo >= NSIG || signo <= 0)
306 return EINVAL;
307 psignal(procp, signo);
308 }
309 procp->p_step = 0;
310 wakeup(&procp->p_step);
311 break;
312 default:
313 return (ENOTTY);
314 }
315 return 0;
316}
317
318/*
319 * do block mapping for pfsnode (vp).
320 * since we don't use the buffer cache
321 * for procfs this function should never
322 * be called. in any case, it's not clear
323 * what part of the kernel ever makes use
324 * of this function. for sanity, this is the
325 * usual no-op bmap, although returning
326 * (EIO) would be a reasonable alternative.
327 */
328static int
329procfs_bmap(ap)
330 struct vop_bmap_args /* {
331 struct vnode *a_vp;
332 daddr_t a_bn;
333 struct vnode **a_vpp;
334 daddr_t *a_bnp;
335 int *a_runp;
336 } */ *ap;
337{
338
339 if (ap->a_vpp != NULL)
340 *ap->a_vpp = ap->a_vp;
341 if (ap->a_bnp != NULL)
342 *ap->a_bnp = ap->a_bn;
343 if (ap->a_runp != NULL)
344 *ap->a_runp = 0;
345 return (0);
346}
347
348/*
349 * procfs_inactive is called when the pfsnode
350 * is vrele'd and the reference count goes
351 * to zero. (vp) will be on the vnode free
352 * list, so to get it back vget() must be
353 * used.
354 *
355 * (vp) is locked on entry, but must be unlocked on exit.
356 */
357static int
358procfs_inactive(ap)
359 struct vop_inactive_args /* {
360 struct vnode *a_vp;
dadab5e9 361 struct thread *a_td;
984263bc
MD
362 } */ *ap;
363{
364 struct vnode *vp = ap->a_vp;
365
dadab5e9 366 VOP_UNLOCK(vp, 0, ap->a_td);
984263bc
MD
367
368 return (0);
369}
370
371/*
372 * _reclaim is called when getnewvnode()
373 * wants to make use of an entry on the vnode
374 * free list. at this time the filesystem needs
375 * to free any private data and remove the node
376 * from any private lists.
377 */
378static int
379procfs_reclaim(ap)
380 struct vop_reclaim_args /* {
381 struct vnode *a_vp;
382 } */ *ap;
383{
384
385 return (procfs_freevp(ap->a_vp));
386}
387
388/*
389 * _print is used for debugging.
390 * just print a readable description
391 * of (vp).
392 */
393static int
394procfs_print(ap)
395 struct vop_print_args /* {
396 struct vnode *a_vp;
397 } */ *ap;
398{
399 struct pfsnode *pfs = VTOPFS(ap->a_vp);
400
401 printf("tag VT_PROCFS, type %d, pid %ld, mode %x, flags %lx\n",
402 pfs->pfs_type, (long)pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
403 return (0);
404}
405
406/*
407 * generic entry point for unsupported operations
408 */
409static int
410procfs_badop()
411{
412
413 return (EIO);
414}
415
416/*
417 * Invent attributes for pfsnode (vp) and store
418 * them in (vap).
419 * Directories lengths are returned as zero since
420 * any real length would require the genuine size
421 * to be computed, and nothing cares anyway.
422 *
423 * this is relatively minimal for procfs.
424 */
425static int
426procfs_getattr(ap)
427 struct vop_getattr_args /* {
428 struct vnode *a_vp;
429 struct vattr *a_vap;
430 struct ucred *a_cred;
dadab5e9 431 struct thread *a_td;
984263bc
MD
432 } */ *ap;
433{
434 struct pfsnode *pfs = VTOPFS(ap->a_vp);
435 struct vattr *vap = ap->a_vap;
436 struct proc *procp;
437 int error;
438
439 /*
440 * First make sure that the process and its credentials
441 * still exist.
442 */
443 switch (pfs->pfs_type) {
444 case Proot:
445 case Pcurproc:
446 procp = 0;
447 break;
448
449 default:
450 procp = PFIND(pfs->pfs_pid);
41c20dac 451 if (procp == NULL || procp->p_ucred == NULL)
984263bc
MD
452 return (ENOENT);
453 }
454
455 error = 0;
456
457 /* start by zeroing out the attributes */
458 VATTR_NULL(vap);
459
460 /* next do all the common fields */
461 vap->va_type = ap->a_vp->v_type;
462 vap->va_mode = pfs->pfs_mode;
463 vap->va_fileid = pfs->pfs_fileno;
464 vap->va_flags = 0;
465 vap->va_blocksize = PAGE_SIZE;
466 vap->va_bytes = vap->va_size = 0;
467 vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
468
469 /*
470 * Make all times be current TOD.
471 * It would be possible to get the process start
472 * time from the p_stat structure, but there's
473 * no "file creation" time stamp anyway, and the
474 * p_stat structure is not addressible if u. gets
475 * swapped out for that process.
476 */
477 nanotime(&vap->va_ctime);
478 vap->va_atime = vap->va_mtime = vap->va_ctime;
479
480 /*
481 * If the process has exercised some setuid or setgid
482 * privilege, then rip away read/write permission so
483 * that only root can gain access.
484 */
485 switch (pfs->pfs_type) {
486 case Pctl:
487 case Pregs:
488 case Pfpregs:
489 case Pdbregs:
490 case Pmem:
491 if (procp->p_flag & P_SUGID)
492 vap->va_mode &= ~((VREAD|VWRITE)|
493 ((VREAD|VWRITE)>>3)|
494 ((VREAD|VWRITE)>>6));
495 break;
496 default:
497 break;
498 }
499
500 /*
501 * now do the object specific fields
502 *
503 * The size could be set from struct reg, but it's hardly
504 * worth the trouble, and it puts some (potentially) machine
505 * dependent data into this machine-independent code. If it
506 * becomes important then this function should break out into
507 * a per-file stat function in the corresponding .c file.
508 */
509
510 vap->va_nlink = 1;
511 if (procp) {
512 vap->va_uid = procp->p_ucred->cr_uid;
513 vap->va_gid = procp->p_ucred->cr_gid;
514 }
515
516 switch (pfs->pfs_type) {
517 case Proot:
518 /*
519 * Set nlink to 1 to tell fts(3) we don't actually know.
520 */
521 vap->va_nlink = 1;
522 vap->va_uid = 0;
523 vap->va_gid = 0;
524 vap->va_size = vap->va_bytes = DEV_BSIZE;
525 break;
526
527 case Pcurproc: {
528 char buf[16]; /* should be enough */
529 vap->va_uid = 0;
530 vap->va_gid = 0;
531 vap->va_size = vap->va_bytes =
532 snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
533 break;
534 }
535
536 case Pproc:
537 vap->va_nlink = nproc_targets;
538 vap->va_size = vap->va_bytes = DEV_BSIZE;
539 break;
540
541 case Pfile: {
542 char *fullpath, *freepath;
543 error = textvp_fullpath(procp, &fullpath, &freepath);
544 if (error == 0) {
545 vap->va_size = strlen(fullpath);
546 free(freepath, M_TEMP);
547 } else {
548 vap->va_size = sizeof("unknown") - 1;
549 error = 0;
550 }
551 vap->va_bytes = vap->va_size;
552 break;
553 }
554
555 case Pmem:
556 /*
557 * If we denied owner access earlier, then we have to
558 * change the owner to root - otherwise 'ps' and friends
559 * will break even though they are setgid kmem. *SIGH*
560 */
561 if (procp->p_flag & P_SUGID)
562 vap->va_uid = 0;
563 else
564 vap->va_uid = procp->p_ucred->cr_uid;
565 break;
566
567 case Pregs:
568 vap->va_bytes = vap->va_size = sizeof(struct reg);
569 break;
570
571 case Pfpregs:
572 vap->va_bytes = vap->va_size = sizeof(struct fpreg);
573 break;
574
575 case Pdbregs:
576 vap->va_bytes = vap->va_size = sizeof(struct dbreg);
577 break;
578
579 case Ptype:
580 case Pmap:
581 case Pctl:
582 case Pstatus:
583 case Pnote:
584 case Pnotepg:
585 case Pcmdline:
586 case Prlimit:
587 break;
588
589 default:
590 panic("procfs_getattr");
591 }
592
593 return (error);
594}
595
596static int
597procfs_setattr(ap)
598 struct vop_setattr_args /* {
599 struct vnode *a_vp;
600 struct vattr *a_vap;
601 struct ucred *a_cred;
dadab5e9 602 struct thread *a_td;
984263bc
MD
603 } */ *ap;
604{
605
606 if (ap->a_vap->va_flags != VNOVAL)
607 return (EOPNOTSUPP);
608
609 /*
610 * just fake out attribute setting
611 * it's not good to generate an error
612 * return, otherwise things like creat()
613 * will fail when they try to set the
614 * file length to 0. worse, this means
615 * that echo $note > /proc/$pid/note will fail.
616 */
617
618 return (0);
619}
620
621/*
622 * implement access checking.
623 *
624 * something very similar to this code is duplicated
625 * throughout the 4bsd kernel and should be moved
626 * into kern/vfs_subr.c sometime.
627 *
628 * actually, the check for super-user is slightly
629 * broken since it will allow read access to write-only
630 * objects. this doesn't cause any particular trouble
631 * but does mean that the i/o entry points need to check
632 * that the operation really does make sense.
633 */
634static int
635procfs_access(ap)
636 struct vop_access_args /* {
637 struct vnode *a_vp;
638 int a_mode;
639 struct ucred *a_cred;
dadab5e9 640 struct thread *a_td;
984263bc
MD
641 } */ *ap;
642{
643 struct vattr *vap;
644 struct vattr vattr;
645 int error;
646
647 /*
648 * If you're the super-user,
649 * you always get access.
650 */
651 if (ap->a_cred->cr_uid == 0)
652 return (0);
653
654 vap = &vattr;
3b568787 655 error = VOP_GETATTR(ap->a_vp, vap, ap->a_td);
984263bc
MD
656 if (error)
657 return (error);
658
659 /*
660 * Access check is based on only one of owner, group, public.
661 * If not owner, then check group. If not a member of the
662 * group, then check public access.
663 */
664 if (ap->a_cred->cr_uid != vap->va_uid) {
665 gid_t *gp;
666 int i;
667
668 ap->a_mode >>= 3;
669 gp = ap->a_cred->cr_groups;
670 for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++)
671 if (vap->va_gid == *gp)
672 goto found;
673 ap->a_mode >>= 3;
674found:
675 ;
676 }
677
678 if ((vap->va_mode & ap->a_mode) == ap->a_mode)
679 return (0);
680
681 return (EACCES);
682}
683
684/*
685 * lookup. this is incredibly complicated in the
686 * general case, however for most pseudo-filesystems
687 * very little needs to be done.
688 *
689 * unless you want to get a migraine, just make sure your
690 * filesystem doesn't do any locking of its own. otherwise
691 * read and inwardly digest ufs_lookup().
692 */
693static int
694procfs_lookup(ap)
695 struct vop_lookup_args /* {
696 struct vnode * a_dvp;
697 struct vnode ** a_vpp;
698 struct componentname * a_cnp;
699 } */ *ap;
700{
701 struct componentname *cnp = ap->a_cnp;
702 struct vnode **vpp = ap->a_vpp;
703 struct vnode *dvp = ap->a_dvp;
704 char *pname = cnp->cn_nameptr;
705 /* struct proc *curp = cnp->cn_proc; */
706 struct proc_target *pt;
707 pid_t pid;
708 struct pfsnode *pfs;
709 struct proc *p;
710 int i;
711
712 *vpp = NULL;
713
714 if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
715 return (EROFS);
716
717 if (cnp->cn_namelen == 1 && *pname == '.') {
718 *vpp = dvp;
719 VREF(dvp);
720 /* vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, curp); */
721 return (0);
722 }
723
724 pfs = VTOPFS(dvp);
725 switch (pfs->pfs_type) {
726 case Proot:
727 if (cnp->cn_flags & ISDOTDOT)
728 return (EIO);
729
730 if (CNEQ(cnp, "curproc", 7))
731 return (procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc));
732
733 pid = atopid(pname, cnp->cn_namelen);
734 if (pid == NO_PID)
735 break;
736
737 p = PFIND(pid);
738 if (p == NULL)
739 break;
740
28c57d20
HP
741 if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 &&
742 ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid)
743 break;
744
984263bc
MD
745 return (procfs_allocvp(dvp->v_mount, vpp, pid, Pproc));
746
747 case Pproc:
748 if (cnp->cn_flags & ISDOTDOT)
749 return (procfs_root(dvp->v_mount, vpp));
750
751 p = PFIND(pfs->pfs_pid);
752 if (p == NULL)
753 break;
754
28c57d20
HP
755 if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 &&
756 ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid)
757 break;
758
984263bc
MD
759 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
760 if (cnp->cn_namelen == pt->pt_namlen &&
761 bcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
762 (pt->pt_valid == NULL || (*pt->pt_valid)(p)))
763 goto found;
764 }
765 break;
766 found:
767 return (procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
768 pt->pt_pfstype));
769
770 default:
771 return (ENOTDIR);
772 }
773
774 return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
775}
776
777/*
778 * Does this process have a text file?
779 */
780int
781procfs_validfile(p)
782 struct proc *p;
783{
784
785 return (procfs_findtextvp(p) != NULLVP);
786}
787
788/*
789 * readdir() returns directory entries from pfsnode (vp).
790 *
791 * We generate just one directory entry at a time, as it would probably
792 * not pay off to buffer several entries locally to save uiomove calls.
793 */
794static int
795procfs_readdir(ap)
796 struct vop_readdir_args /* {
797 struct vnode *a_vp;
798 struct uio *a_uio;
799 struct ucred *a_cred;
800 int *a_eofflag;
801 int *a_ncookies;
802 u_long **a_cookies;
803 } */ *ap;
804{
805 struct uio *uio = ap->a_uio;
806 struct dirent d;
807 struct dirent *dp = &d;
808 struct pfsnode *pfs;
809 int count, error, i, off;
810 static u_int delen;
811
812 if (!delen) {
813
814 d.d_namlen = PROCFS_NAMELEN;
815 delen = GENERIC_DIRSIZ(&d);
816 }
817
818 pfs = VTOPFS(ap->a_vp);
819
820 off = (int)uio->uio_offset;
821 if (off != uio->uio_offset || off < 0 ||
822 off % delen != 0 || uio->uio_resid < delen)
823 return (EINVAL);
824
825 error = 0;
826 count = 0;
827 i = off / delen;
828
829 switch (pfs->pfs_type) {
830 /*
831 * this is for the process-specific sub-directories.
832 * all that is needed to is copy out all the entries
833 * from the procent[] table (top of this file).
834 */
835 case Pproc: {
836 struct proc *p;
837 struct proc_target *pt;
838
839 p = PFIND(pfs->pfs_pid);
840 if (p == NULL)
841 break;
41c20dac 842 if (!PRISON_CHECK(ap->a_cred, p->p_ucred))
984263bc
MD
843 break;
844
845 for (pt = &proc_targets[i];
846 uio->uio_resid >= delen && i < nproc_targets; pt++, i++) {
847 if (pt->pt_valid && (*pt->pt_valid)(p) == 0)
848 continue;
849
850 dp->d_reclen = delen;
851 dp->d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype);
852 dp->d_namlen = pt->pt_namlen;
853 bcopy(pt->pt_name, dp->d_name, pt->pt_namlen + 1);
854 dp->d_type = pt->pt_type;
855
856 if ((error = uiomove((caddr_t)dp, delen, uio)) != 0)
857 break;
858 }
859
860 break;
861 }
862
863 /*
864 * this is for the root of the procfs filesystem
865 * what is needed is a special entry for "curproc"
866 * followed by an entry for each process on allproc
867#ifdef PROCFS_ZOMBIE
868 * and zombproc.
869#endif
870 */
871
872 case Proot: {
873#ifdef PROCFS_ZOMBIE
874 int doingzomb = 0;
875#endif
876 int pcnt = 0;
877 volatile struct proc *p = allproc.lh_first;
878
879 for (; p && uio->uio_resid >= delen; i++, pcnt++) {
880 bzero((char *) dp, delen);
881 dp->d_reclen = delen;
882
883 switch (i) {
884 case 0: /* `.' */
885 case 1: /* `..' */
886 dp->d_fileno = PROCFS_FILENO(0, Proot);
887 dp->d_namlen = i + 1;
888 bcopy("..", dp->d_name, dp->d_namlen);
889 dp->d_name[i + 1] = '\0';
890 dp->d_type = DT_DIR;
891 break;
892
893 case 2:
894 dp->d_fileno = PROCFS_FILENO(0, Pcurproc);
895 dp->d_namlen = 7;
896 bcopy("curproc", dp->d_name, 8);
897 dp->d_type = DT_LNK;
898 break;
899
900 default:
901 while (pcnt < i) {
902 p = p->p_list.le_next;
903 if (!p)
904 goto done;
41c20dac 905 if (!PRISON_CHECK(ap->a_cred, p->p_ucred))
984263bc
MD
906 continue;
907 pcnt++;
908 }
41c20dac 909 while (!PRISON_CHECK(ap->a_cred, p->p_ucred)) {
984263bc
MD
910 p = p->p_list.le_next;
911 if (!p)
912 goto done;
913 }
28c57d20
HP
914 if (ps_showallprocs == 0 &&
915 ap->a_cred->cr_uid != 0 &&
916 ap->a_cred->cr_uid !=
917 p->p_ucred->cr_uid) {
918 p = p->p_list.le_next;
919 if (!p)
920 goto done;
921 break;
922 }
923
984263bc
MD
924 dp->d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
925 dp->d_namlen = sprintf(dp->d_name, "%ld",
926 (long)p->p_pid);
927 dp->d_type = DT_DIR;
928 p = p->p_list.le_next;
929 break;
930 }
931
932 if ((error = uiomove((caddr_t)dp, delen, uio)) != 0)
933 break;
934 }
935 done:
936
937#ifdef PROCFS_ZOMBIE
938 if (p == NULL && doingzomb == 0) {
939 doingzomb = 1;
940 p = zombproc.lh_first;
941 goto again;
942 }
943#endif
944
945 break;
946
947 }
948
949 default:
950 error = ENOTDIR;
951 break;
952 }
953
954 uio->uio_offset = i * delen;
955
956 return (error);
957}
958
959/*
960 * readlink reads the link of `curproc' or `file'
961 */
962static int
963procfs_readlink(ap)
964 struct vop_readlink_args *ap;
965{
966 char buf[16]; /* should be enough */
967 struct proc *procp;
968 struct vnode *vp = ap->a_vp;
969 struct pfsnode *pfs = VTOPFS(vp);
970 char *fullpath, *freepath;
971 int error, len;
972
973 switch (pfs->pfs_type) {
974 case Pcurproc:
975 if (pfs->pfs_fileno != PROCFS_FILENO(0, Pcurproc))
976 return (EINVAL);
977
978 len = snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
979
980 return (uiomove(buf, len, ap->a_uio));
981 /*
982 * There _should_ be no way for an entire process to disappear
983 * from under us...
984 */
985 case Pfile:
986 procp = PFIND(pfs->pfs_pid);
41c20dac 987 if (procp == NULL || procp->p_ucred == NULL) {
984263bc
MD
988 printf("procfs_readlink: pid %d disappeared\n",
989 pfs->pfs_pid);
990 return (uiomove("unknown", sizeof("unknown") - 1,
991 ap->a_uio));
992 }
993 error = textvp_fullpath(procp, &fullpath, &freepath);
994 if (error != 0)
995 return (uiomove("unknown", sizeof("unknown") - 1,
996 ap->a_uio));
997 error = uiomove(fullpath, strlen(fullpath), ap->a_uio);
998 free(freepath, M_TEMP);
999 return (error);
1000 default:
1001 return (EINVAL);
1002 }
1003}
1004
1005/*
1006 * convert decimal ascii to pid_t
1007 */
1008static pid_t
1009atopid(b, len)
1010 const char *b;
1011 u_int len;
1012{
1013 pid_t p = 0;
1014
1015 while (len--) {
1016 char c = *b++;
1017 if (c < '0' || c > '9')
1018 return (NO_PID);
1019 p = 10 * p + (c - '0');
1020 if (p > PID_MAX)
1021 return (NO_PID);
1022 }
1023
1024 return (p);
1025}
1026
1027/*
1028 * procfs vnode operations.
1029 */
1030vop_t **procfs_vnodeop_p;
1031static struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
1032 { &vop_default_desc, (vop_t *) vop_defaultop },
1033 { &vop_access_desc, (vop_t *) procfs_access },
1034 { &vop_advlock_desc, (vop_t *) procfs_badop },
1035 { &vop_bmap_desc, (vop_t *) procfs_bmap },
1036 { &vop_close_desc, (vop_t *) procfs_close },
1037 { &vop_create_desc, (vop_t *) procfs_badop },
1038 { &vop_getattr_desc, (vop_t *) procfs_getattr },
1039 { &vop_inactive_desc, (vop_t *) procfs_inactive },
1040 { &vop_link_desc, (vop_t *) procfs_badop },
1041 { &vop_lookup_desc, (vop_t *) procfs_lookup },
1042 { &vop_mkdir_desc, (vop_t *) procfs_badop },
1043 { &vop_mknod_desc, (vop_t *) procfs_badop },
1044 { &vop_open_desc, (vop_t *) procfs_open },
1045 { &vop_pathconf_desc, (vop_t *) vop_stdpathconf },
1046 { &vop_print_desc, (vop_t *) procfs_print },
1047 { &vop_read_desc, (vop_t *) procfs_rw },
1048 { &vop_readdir_desc, (vop_t *) procfs_readdir },
1049 { &vop_readlink_desc, (vop_t *) procfs_readlink },
1050 { &vop_reclaim_desc, (vop_t *) procfs_reclaim },
1051 { &vop_remove_desc, (vop_t *) procfs_badop },
1052 { &vop_rename_desc, (vop_t *) procfs_badop },
1053 { &vop_rmdir_desc, (vop_t *) procfs_badop },
1054 { &vop_setattr_desc, (vop_t *) procfs_setattr },
1055 { &vop_symlink_desc, (vop_t *) procfs_badop },
1056 { &vop_write_desc, (vop_t *) procfs_rw },
1057 { &vop_ioctl_desc, (vop_t *) procfs_ioctl },
1058 { NULL, NULL }
1059};
1060static struct vnodeopv_desc procfs_vnodeop_opv_desc =
1061 { &procfs_vnodeop_p, procfs_vnodeop_entries };
1062
1063VNODEOP_SET(procfs_vnodeop_opv_desc);