sys/vfs/procfs/procfs_vnops.c

   1 /*
   2  * Copyright (c) 1993, 1995 Jan-Simon Pendry
   3  * Copyright (c) 1993, 1995
   4  *      The Regents of the University of California.  All rights reserved.
   5  *
   6  * This code is derived from software contributed to Berkeley by
   7  * Jan-Simon Pendry.
   8  *
   9  * Redistribution and use in source and binary forms, with or without
  10  * modification, are permitted provided that the following conditions
  11  * are met:
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  * 3. All advertising materials mentioning features or use of this software
  18  *    must display the following acknowledgement:
  19  *      This product includes software developed by the University of
  20  *      California, Berkeley and its contributors.
  21  * 4. Neither the name of the University nor the names of its contributors
  22  *    may be used to endorse or promote products derived from this software
  23  *    without specific prior written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  35  * SUCH DAMAGE.
  36  *
  37  *      @(#)procfs_vnops.c      8.18 (Berkeley) 5/21/95
  38  *
  39  * $FreeBSD: src/sys/miscfs/procfs/procfs_vnops.c,v 1.76.2.7 2002/01/22 17:22:59 nectar Exp $
  40  * $DragonFly: src/sys/vfs/procfs/procfs_vnops.c,v 1.38 2006/09/05 00:55:50 dillon Exp $
  41  */
  42
  43 /*
  44  * procfs vnode interface
  45  */
  46
  47 #include <sys/param.h>
  48 #include <sys/systm.h>
  49 #include <sys/time.h>
  50 #include <sys/kernel.h>
  51 #include <sys/lock.h>
  52 #include <sys/fcntl.h>
  53 #include <sys/proc.h>
  54 #include <sys/signalvar.h>
  55 #include <sys/vnode.h>
  56 #include <sys/uio.h>
  57 #include <sys/mount.h>
  58 #include <sys/namei.h>
  59 #include <sys/dirent.h>
  60 #include <sys/malloc.h>
  61 #include <machine/reg.h>
  62 #include <vm/vm_zone.h>
  63 #include <vfs/procfs/procfs.h>
  64 #include <sys/pioctl.h>
  65
  66 #include <machine/limits.h>
  67
  68 static int      procfs_access (struct vop_access_args *);
  69 static int      procfs_badop (struct vop_generic_args *);
  70 static int      procfs_bmap (struct vop_bmap_args *);
  71 static int      procfs_close (struct vop_close_args *);
  72 static int      procfs_getattr (struct vop_getattr_args *);
  73 static int      procfs_inactive (struct vop_inactive_args *);
  74 static int      procfs_ioctl (struct vop_ioctl_args *);
  75 static int      procfs_lookup (struct vop_old_lookup_args *);
  76 static int      procfs_open (struct vop_open_args *);
  77 static int      procfs_print (struct vop_print_args *);
  78 static int      procfs_readdir (struct vop_readdir_args *);
  79 static int      procfs_readlink (struct vop_readlink_args *);
  80 static int      procfs_reclaim (struct vop_reclaim_args *);
  81 static int      procfs_setattr (struct vop_setattr_args *);
  82
  83 static int      procfs_readdir_proc(struct vop_readdir_args *);
  84 static int      procfs_readdir_root(struct vop_readdir_args *);
  85
  86 /*
  87  * procfs vnode operations.
  88  */
  89 struct vop_ops procfs_vnode_vops = {
  90         .vop_default =          vop_defaultop,
  91         .vop_access =           procfs_access,
  92         .vop_advlock =          (void *)procfs_badop,
  93         .vop_bmap =             procfs_bmap,
  94         .vop_close =            procfs_close,
  95         .vop_old_create =       (void *)procfs_badop,
  96         .vop_getattr =          procfs_getattr,
  97         .vop_inactive =         procfs_inactive,
  98         .vop_old_link =         (void *)procfs_badop,
  99         .vop_old_lookup =       procfs_lookup,
 100         .vop_old_mkdir =        (void *)procfs_badop,
 101         .vop_old_mknod =        (void *)procfs_badop,
 102         .vop_open =             procfs_open,
 103         .vop_pathconf =         vop_stdpathconf,
 104         .vop_print =            procfs_print,
 105         .vop_read =             procfs_rw,
 106         .vop_readdir =          procfs_readdir,
 107         .vop_readlink =         procfs_readlink,
 108         .vop_reclaim =          procfs_reclaim,
 109         .vop_old_remove =       (void *)procfs_badop,
 110         .vop_old_rename =       (void *)procfs_badop,
 111         .vop_old_rmdir =        (void *)procfs_badop,
 112         .vop_setattr =          procfs_setattr,
 113         .vop_old_symlink =      (void *)procfs_badop,
 114         .vop_write =            (void *)procfs_rw,
 115         .vop_ioctl =            procfs_ioctl
 116 };
 117
 118
 119 /*
 120  * This is a list of the valid names in the
 121  * process-specific sub-directories.  It is
 122  * used in procfs_lookup and procfs_readdir
 123  */
 124 static struct proc_target {
 125         u_char  pt_type;
 126         u_char  pt_namlen;
 127         char    *pt_name;
 128         pfstype pt_pfstype;
 129         int     (*pt_valid) (struct proc *p);
 130 } proc_targets[] = {
 131 #define N(s) sizeof(s)-1, s
 132         /*        name          type            validp */
 133         { DT_DIR, N("."),       Pproc,          NULL },
 134         { DT_DIR, N(".."),      Proot,          NULL },
 135         { DT_REG, N("mem"),     Pmem,           NULL },
 136         { DT_REG, N("regs"),    Pregs,          procfs_validregs },
 137         { DT_REG, N("fpregs"),  Pfpregs,        procfs_validfpregs },
 138         { DT_REG, N("dbregs"),  Pdbregs,        procfs_validdbregs },
 139         { DT_REG, N("ctl"),     Pctl,           NULL },
 140         { DT_REG, N("status"),  Pstatus,        NULL },
 141         { DT_REG, N("note"),    Pnote,          NULL },
 142         { DT_REG, N("notepg"),  Pnotepg,        NULL },
 143         { DT_REG, N("map"),     Pmap,           procfs_validmap },
 144         { DT_REG, N("etype"),   Ptype,          procfs_validtype },
 145         { DT_REG, N("cmdline"), Pcmdline,       NULL },
 146         { DT_REG, N("rlimit"),  Prlimit,        NULL },
 147         { DT_LNK, N("file"),    Pfile,          NULL },
 148 #undef N
 149 };
 150 static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
 151
 152 static pid_t atopid (const char *, u_int);
 153
 154 /*
 155  * set things up for doing i/o on
 156  * the pfsnode (vp).  (vp) is locked
 157  * on entry, and should be left locked
 158  * on exit.
 159  *
 160  * for procfs we don't need to do anything
 161  * in particular for i/o.  all that is done
 162  * is to support exclusive open on process
 163  * memory images.
 164  *
 165  * procfs_open(struct vnode *a_vp, int a_mode, struct ucred *a_cred)
 166  */
 167 static int
 168 procfs_open(struct vop_open_args *ap)
 169 {
 170         struct pfsnode *pfs = VTOPFS(ap->a_vp);
 171         struct proc *p1, *p2;
 172
 173         p2 = PFIND(pfs->pfs_pid);
 174         if (p2 == NULL)
 175                 return (ENOENT);
 176         if (pfs->pfs_pid && !PRISON_CHECK(ap->a_cred, p2->p_ucred))
 177                 return (ENOENT);
 178
 179         switch (pfs->pfs_type) {
 180         case Pmem:
 181                 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
 182                     ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
 183                         return (EBUSY);
 184
 185                 p1 = curproc;
 186                 KKASSERT(p1);
 187                 /* Can't trace a process that's currently exec'ing. */
 188                 if ((p2->p_flag & P_INEXEC) != 0)
 189                         return EAGAIN;
 190                 if (!CHECKIO(p1, p2) || p_trespass(ap->a_cred, p2->p_ucred))
 191                         return (EPERM);
 192
 193                 if (ap->a_mode & FWRITE)
 194                         pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
 195
 196                 break;
 197
 198         default:
 199                 break;
 200         }
 201
 202         return (vop_stdopen(ap));
 203 }
 204
 205 /*
 206  * close the pfsnode (vp) after doing i/o.
 207  * (vp) is not locked on entry or exit.
 208  *
 209  * nothing to do for procfs other than undo
 210  * any exclusive open flag (see _open above).
 211  *
 212  * procfs_close(struct vnode *a_vp, int a_fflag, struct ucred *a_cred)
 213  */
 214 static int
 215 procfs_close(struct vop_close_args *ap)
 216 {
 217         struct pfsnode *pfs = VTOPFS(ap->a_vp);
 218         struct proc *p;
 219
 220         switch (pfs->pfs_type) {
 221         case Pmem:
 222                 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
 223                         pfs->pfs_flags &= ~(FWRITE|O_EXCL);
 224                 /*
 225                  * This rather complicated-looking code is trying to
 226                  * determine if this was the last close on this particular
 227                  * vnode.  While one would expect v_usecount to be 1 at
 228                  * that point, it seems that (according to John Dyson)
 229                  * the VM system will bump up the usecount.  So:  if the
 230                  * usecount is 2, and VOBJBUF is set, then this is really
 231                  * the last close.  Otherwise, if the usecount is < 2
 232                  * then it is definitely the last close.
 233                  * If this is the last close, then it checks to see if
 234                  * the target process has PF_LINGER set in p_pfsflags,
 235                  * if this is *not* the case, then the process' stop flags
 236                  * are cleared, and the process is woken up.  This is
 237                  * to help prevent the case where a process has been
 238                  * told to stop on an event, but then the requesting process
 239                  * has gone away or forgotten about it.
 240                  */
 241                 if ((ap->a_vp->v_usecount < 2)
 242                     && (p = pfind(pfs->pfs_pid))
 243                     && !(p->p_pfsflags & PF_LINGER)) {
 244                         p->p_stops = 0;
 245                         p->p_step = 0;
 246                         wakeup(&p->p_step);
 247                 }
 248                 break;
 249         default:
 250                 break;
 251         }
 252
 253         return (vop_stdclose(ap));
 254 }
 255
 256 /*
 257  * do an ioctl operation on a pfsnode (vp).
 258  * (vp) is not locked on entry or exit.
 259  */
 260 static int
 261 procfs_ioctl(struct vop_ioctl_args *ap)
 262 {
 263         struct pfsnode *pfs = VTOPFS(ap->a_vp);
 264         struct proc *procp;
 265         struct proc *p;
 266         int error;
 267         int signo;
 268         struct procfs_status *psp;
 269         unsigned char flags;
 270
 271         procp = pfind(pfs->pfs_pid);
 272         if (procp == NULL)
 273                 return ENOTTY;
 274         p = curproc;
 275         if (p == NULL)
 276                 return EINVAL;
 277
 278         /* Can't trace a process that's currently exec'ing. */
 279         if ((procp->p_flag & P_INEXEC) != 0)
 280                 return EAGAIN;
 281         if (!CHECKIO(p, procp) || p_trespass(ap->a_cred, procp->p_ucred))
 282                 return EPERM;
 283
 284         switch (ap->a_command) {
 285         case PIOCBIS:
 286           procp->p_stops |= *(unsigned int*)ap->a_data;
 287           break;
 288         case PIOCBIC:
 289           procp->p_stops &= ~*(unsigned int*)ap->a_data;
 290           break;
 291         case PIOCSFL:
 292           /*
 293            * NFLAGS is "non-suser_xxx flags" -- currently, only
 294            * PFS_ISUGID ("ignore set u/g id");
 295            */
 296 #define NFLAGS  (PF_ISUGID)
 297           flags = (unsigned char)*(unsigned int*)ap->a_data;
 298           if (flags & NFLAGS && (error = suser_cred(ap->a_cred, 0)))
 299             return error;
 300           procp->p_pfsflags = flags;
 301           break;
 302         case PIOCGFL:
 303           *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags;
 304           break;
 305         case PIOCSTATUS:
 306           psp = (struct procfs_status *)ap->a_data;
 307           psp->state = (procp->p_step == 0);
 308           psp->flags = procp->p_pfsflags;
 309           psp->events = procp->p_stops;
 310           if (procp->p_step) {
 311             psp->why = procp->p_stype;
 312             psp->val = procp->p_xstat;
 313           } else {
 314             psp->why = psp->val = 0;    /* Not defined values */
 315           }
 316           break;
 317         case PIOCWAIT:
 318           psp = (struct procfs_status *)ap->a_data;
 319           if (procp->p_step == 0) {
 320             error = tsleep(&procp->p_stype, PCATCH, "piocwait", 0);
 321             if (error)
 322               return error;
 323           }
 324           psp->state = 1;       /* It stopped */
 325           psp->flags = procp->p_pfsflags;
 326           psp->events = procp->p_stops;
 327           psp->why = procp->p_stype;    /* why it stopped */
 328           psp->val = procp->p_xstat;    /* any extra info */
 329           break;
 330         case PIOCCONT:  /* Restart a proc */
 331           if (procp->p_step == 0)
 332             return EINVAL;      /* Can only start a stopped process */
 333           if ((signo = *(int*)ap->a_data) != 0) {
 334             if (signo >= NSIG || signo <= 0)
 335               return EINVAL;
 336             ksignal(procp, signo);
 337           }
 338           procp->p_step = 0;
 339           wakeup(&procp->p_step);
 340           break;
 341         default:
 342           return (ENOTTY);
 343         }
 344         return 0;
 345 }
 346
 347 /*
 348  * do block mapping for pfsnode (vp).
 349  * since we don't use the buffer cache
 350  * for procfs this function should never
 351  * be called.  in any case, it's not clear
 352  * what part of the kernel ever makes use
 353  * of this function.  for sanity, this is the
 354  * usual no-op bmap, although returning
 355  * (EIO) would be a reasonable alternative.
 356  *
 357  * procfs_bmap(struct vnode *a_vp, off_t a_loffset, struct vnode **a_vpp,
 358  *              off_t *a_doffsetp, int *a_runp)
 359  */
 360 static int
 361 procfs_bmap(struct vop_bmap_args *ap)
 362 {
 363         if (ap->a_vpp != NULL)
 364                 *ap->a_vpp = ap->a_vp;
 365         if (ap->a_doffsetp != NULL)
 366                 *ap->a_doffsetp = ap->a_loffset;
 367         if (ap->a_runp != NULL)
 368                 *ap->a_runp = 0;
 369         if (ap->a_runb != NULL)
 370                 *ap->a_runb = 0;
 371         return (0);
 372 }
 373
 374 /*
 375  * procfs_inactive is called when the pfsnode
 376  * is vrele'd and the reference count goes
 377  * to zero.  (vp) will be on the vnode free
 378  * list, so to get it back vget() must be
 379  * used.
 380  *
 381  * (vp) is locked on entry, but must be unlocked on exit.
 382  *
 383  * procfs_inactive(struct vnode *a_vp, struct thread *a_td)
 384  */
 385 static int
 386 procfs_inactive(struct vop_inactive_args *ap)
 387 {
 388         /*struct vnode *vp = ap->a_vp;*/
 389
 390         return (0);
 391 }
 392
 393 /*
 394  * _reclaim is called when getnewvnode()
 395  * wants to make use of an entry on the vnode
 396  * free list.  at this time the filesystem needs
 397  * to free any private data and remove the node
 398  * from any private lists.
 399  *
 400  * procfs_reclaim(struct vnode *a_vp)
 401  */
 402 static int
 403 procfs_reclaim(struct vop_reclaim_args *ap)
 404 {
 405         return (procfs_freevp(ap->a_vp));
 406 }
 407
 408 /*
 409  * _print is used for debugging.
 410  * just print a readable description
 411  * of (vp).
 412  *
 413  * procfs_print(struct vnode *a_vp)
 414  */
 415 static int
 416 procfs_print(struct vop_print_args *ap)
 417 {
 418         struct pfsnode *pfs = VTOPFS(ap->a_vp);
 419
 420         printf("tag VT_PROCFS, type %d, pid %ld, mode %x, flags %lx\n",
 421             pfs->pfs_type, (long)pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
 422         return (0);
 423 }
 424
 425 /*
 426  * generic entry point for unsupported operations
 427  */
 428 static int
 429 procfs_badop(struct vop_generic_args *ap)
 430 {
 431         return (EIO);
 432 }
 433
 434 /*
 435  * Invent attributes for pfsnode (vp) and store
 436  * them in (vap).
 437  * Directories lengths are returned as zero since
 438  * any real length would require the genuine size
 439  * to be computed, and nothing cares anyway.
 440  *
 441  * this is relatively minimal for procfs.
 442  *
 443  * procfs_getattr(struct vnode *a_vp, struct vattr *a_vap,
 444  *                struct ucred *a_cred, struct thread *a_td)
 445  */
 446 static int
 447 procfs_getattr(struct vop_getattr_args *ap)
 448 {
 449         struct pfsnode *pfs = VTOPFS(ap->a_vp);
 450         struct vattr *vap = ap->a_vap;
 451         struct proc *procp;
 452         int error;
 453
 454         /*
 455          * First make sure that the process and its credentials
 456          * still exist.
 457          */
 458         switch (pfs->pfs_type) {
 459         case Proot:
 460         case Pcurproc:
 461                 procp = 0;
 462                 break;
 463
 464         default:
 465                 procp = PFIND(pfs->pfs_pid);
 466                 if (procp == NULL || procp->p_ucred == NULL)
 467                         return (ENOENT);
 468         }
 469
 470         error = 0;
 471
 472         /* start by zeroing out the attributes */
 473         VATTR_NULL(vap);
 474
 475         /* next do all the common fields */
 476         vap->va_type = ap->a_vp->v_type;
 477         vap->va_mode = pfs->pfs_mode;
 478         vap->va_fileid = pfs->pfs_fileno;
 479         vap->va_flags = 0;
 480         vap->va_blocksize = PAGE_SIZE;
 481         vap->va_bytes = vap->va_size = 0;
 482         vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
 483
 484         /*
 485          * Make all times be current TOD.
 486          * It would be possible to get the process start
 487          * time from the p_stat structure, but there's
 488          * no "file creation" time stamp anyway, and the
 489          * p_stat structure is not addressible if u. gets
 490          * swapped out for that process.
 491          */
 492         nanotime(&vap->va_ctime);
 493         vap->va_atime = vap->va_mtime = vap->va_ctime;
 494
 495         /*
 496          * If the process has exercised some setuid or setgid
 497          * privilege, then rip away read/write permission so
 498          * that only root can gain access.
 499          */
 500         switch (pfs->pfs_type) {
 501         case Pctl:
 502         case Pregs:
 503         case Pfpregs:
 504         case Pdbregs:
 505         case Pmem:
 506                 if (procp->p_flag & P_SUGID)
 507                         vap->va_mode &= ~((VREAD|VWRITE)|
 508                                           ((VREAD|VWRITE)>>3)|
 509                                           ((VREAD|VWRITE)>>6));
 510                 break;
 511         default:
 512                 break;
 513         }
 514
 515         /*
 516          * now do the object specific fields
 517          *
 518          * The size could be set from struct reg, but it's hardly
 519          * worth the trouble, and it puts some (potentially) machine
 520          * dependent data into this machine-independent code.  If it
 521          * becomes important then this function should break out into
 522          * a per-file stat function in the corresponding .c file.
 523          */
 524
 525         vap->va_nlink = 1;
 526         if (procp) {
 527                 vap->va_uid = procp->p_ucred->cr_uid;
 528                 vap->va_gid = procp->p_ucred->cr_gid;
 529         }
 530
 531         switch (pfs->pfs_type) {
 532         case Proot:
 533                 /*
 534                  * Set nlink to 1 to tell fts(3) we don't actually know.
 535                  */
 536                 vap->va_nlink = 1;
 537                 vap->va_uid = 0;
 538                 vap->va_gid = 0;
 539                 vap->va_size = vap->va_bytes = DEV_BSIZE;
 540                 break;
 541
 542         case Pcurproc: {
 543                 char buf[16];           /* should be enough */
 544                 vap->va_uid = 0;
 545                 vap->va_gid = 0;
 546                 vap->va_size = vap->va_bytes =
 547                     snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
 548                 break;
 549         }
 550
 551         case Pproc:
 552                 vap->va_nlink = nproc_targets;
 553                 vap->va_size = vap->va_bytes = DEV_BSIZE;
 554                 break;
 555
 556         case Pfile: {
 557                 char *fullpath, *freepath;
 558                 error = vn_fullpath(procp, NULL, &fullpath, &freepath);
 559                 if (error == 0) {
 560                         vap->va_size = strlen(fullpath);
 561                         kfree(freepath, M_TEMP);
 562                 } else {
 563                         vap->va_size = sizeof("unknown") - 1;
 564                         error = 0;
 565                 }
 566                 vap->va_bytes = vap->va_size;
 567                 break;
 568         }
 569
 570         case Pmem:
 571                 /*
 572                  * If we denied owner access earlier, then we have to
 573                  * change the owner to root - otherwise 'ps' and friends
 574                  * will break even though they are setgid kmem. *SIGH*
 575                  */
 576                 if (procp->p_flag & P_SUGID)
 577                         vap->va_uid = 0;
 578                 else
 579                         vap->va_uid = procp->p_ucred->cr_uid;
 580                 break;
 581
 582         case Pregs:
 583                 vap->va_bytes = vap->va_size = sizeof(struct reg);
 584                 break;
 585
 586         case Pfpregs:
 587                 vap->va_bytes = vap->va_size = sizeof(struct fpreg);
 588                 break;
 589
 590         case Pdbregs:
 591                 vap->va_bytes = vap->va_size = sizeof(struct dbreg);
 592                 break;
 593
 594         case Ptype:
 595         case Pmap:
 596         case Pctl:
 597         case Pstatus:
 598         case Pnote:
 599         case Pnotepg:
 600         case Pcmdline:
 601         case Prlimit:
 602                 break;
 603
 604         default:
 605                 panic("procfs_getattr");
 606         }
 607
 608         return (error);
 609 }
 610
 611 /*
 612  * procfs_setattr(struct vnode *a_vp, struct vattr *a_vap,
 613  *                struct ucred *a_cred, struct thread *a_td)
 614  */
 615 static int
 616 procfs_setattr(struct vop_setattr_args *ap)
 617 {
 618         if (ap->a_vap->va_flags != VNOVAL)
 619                 return (EOPNOTSUPP);
 620
 621         /*
 622          * just fake out attribute setting
 623          * it's not good to generate an error
 624          * return, otherwise things like creat()
 625          * will fail when they try to set the
 626          * file length to 0.  worse, this means
 627          * that echo $note > /proc/$pid/note will fail.
 628          */
 629
 630         return (0);
 631 }
 632
 633 /*
 634  * implement access checking.
 635  *
 636  * something very similar to this code is duplicated
 637  * throughout the 4bsd kernel and should be moved
 638  * into kern/vfs_subr.c sometime.
 639  *
 640  * actually, the check for super-user is slightly
 641  * broken since it will allow read access to write-only
 642  * objects.  this doesn't cause any particular trouble
 643  * but does mean that the i/o entry points need to check
 644  * that the operation really does make sense.
 645  *
 646  * procfs_access(struct vnode *a_vp, int a_mode, struct ucred *a_cred,
 647  *               struct thread *a_td)
 648  */
 649 static int
 650 procfs_access(struct vop_access_args *ap)
 651 {
 652         struct vattr *vap;
 653         struct vattr vattr;
 654         int error;
 655
 656         /*
 657          * If you're the super-user,
 658          * you always get access.
 659          */
 660         if (ap->a_cred->cr_uid == 0)
 661                 return (0);
 662
 663         vap = &vattr;
 664         error = VOP_GETATTR(ap->a_vp, vap);
 665         if (error)
 666                 return (error);
 667
 668         /*
 669          * Access check is based on only one of owner, group, public.
 670          * If not owner, then check group. If not a member of the
 671          * group, then check public access.
 672          */
 673         if (ap->a_cred->cr_uid != vap->va_uid) {
 674                 gid_t *gp;
 675                 int i;
 676
 677                 ap->a_mode >>= 3;
 678                 gp = ap->a_cred->cr_groups;
 679                 for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++)
 680                         if (vap->va_gid == *gp)
 681                                 goto found;
 682                 ap->a_mode >>= 3;
 683 found:
 684                 ;
 685         }
 686
 687         if ((vap->va_mode & ap->a_mode) == ap->a_mode)
 688                 return (0);
 689
 690         return (EACCES);
 691 }
 692
 693 /*
 694  * lookup.  this is incredibly complicated in the general case, however
 695  * for most pseudo-filesystems very little needs to be done.
 696  *
 697  * procfs_lookup(struct vnode *a_dvp, struct vnode **a_vpp,
 698  *               struct componentname *a_cnp)
 699  */
 700 static int
 701 procfs_lookup(struct vop_old_lookup_args *ap)
 702 {
 703         struct componentname *cnp = ap->a_cnp;
 704         struct vnode **vpp = ap->a_vpp;
 705         struct vnode *dvp = ap->a_dvp;
 706         char *pname = cnp->cn_nameptr;
 707         /* struct proc *curp = cnp->cn_proc; */
 708         struct proc_target *pt;
 709         pid_t pid;
 710         struct pfsnode *pfs;
 711         struct proc *p;
 712         int i;
 713         int error;
 714
 715         *vpp = NULL;
 716
 717         if (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME)
 718                 return (EROFS);
 719
 720         error = 0;
 721         if (cnp->cn_namelen == 1 && *pname == '.') {
 722                 *vpp = dvp;
 723                 vref(*vpp);
 724                 goto out;
 725         }
 726
 727         pfs = VTOPFS(dvp);
 728         switch (pfs->pfs_type) {
 729         case Proot:
 730                 if (cnp->cn_flags & CNP_ISDOTDOT)
 731                         return (EIO);
 732
 733                 if (CNEQ(cnp, "curproc", 7)) {
 734                         error = procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc);
 735                         goto out;
 736                 }
 737
 738                 pid = atopid(pname, cnp->cn_namelen);
 739                 if (pid == NO_PID)
 740                         break;
 741
 742                 p = PFIND(pid);
 743                 if (p == NULL)
 744                         break;
 745
 746                 if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred))
 747                         break;
 748
 749                 if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 &&
 750                     ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid)
 751                         break;
 752
 753                 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc);
 754                 goto out;
 755
 756         case Pproc:
 757                 if (cnp->cn_flags & CNP_ISDOTDOT) {
 758                         error = procfs_root(dvp->v_mount, vpp);
 759                         goto out;
 760                 }
 761
 762                 p = PFIND(pfs->pfs_pid);
 763                 if (p == NULL)
 764                         break;
 765
 766                 if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred))
 767                         break;
 768
 769                 if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 &&
 770                     ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid)
 771                         break;
 772
 773                 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
 774                         if (cnp->cn_namelen == pt->pt_namlen &&
 775                             bcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
 776                             (pt->pt_valid == NULL || (*pt->pt_valid)(p)))
 777                                 goto found;
 778                 }
 779                 break;
 780         found:
 781                 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
 782                                         pt->pt_pfstype);
 783                 goto out;
 784
 785         default:
 786                 error = ENOTDIR;
 787                 goto out;
 788         }
 789         if (cnp->cn_nameiop == NAMEI_LOOKUP)
 790                 error = ENOENT;
 791         else
 792                 error = EROFS;
 793         /*
 794          * If no error occured *vpp will hold a referenced locked vnode.
 795          * dvp was passed to us locked and *vpp must be returned locked.
 796          * If *vpp != dvp then we should unlock dvp if (1) this is not the
 797          * last component or (2) CNP_LOCKPARENT is not set.
 798          */
 799 out:
 800         if (error == 0 && *vpp != dvp) {
 801                 if ((cnp->cn_flags & CNP_LOCKPARENT) == 0) {
 802                         cnp->cn_flags |= CNP_PDIRUNLOCK;
 803                         vn_unlock(dvp);
 804                 }
 805         }
 806         return (error);
 807 }
 808
 809 /*
 810  * Does this process have a text file?
 811  */
 812 int
 813 procfs_validfile(struct proc *p)
 814 {
 815         return (procfs_findtextvp(p) != NULLVP);
 816 }
 817
 818 /*
 819  * readdir() returns directory entries from pfsnode (vp).
 820  *
 821  * We generate just one directory entry at a time, as it would probably
 822  * not pay off to buffer several entries locally to save uiomove calls.
 823  *
 824  * procfs_readdir(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred,
 825  *                int *a_eofflag, int *a_ncookies, u_long **a_cookies)
 826  */
 827 static int
 828 procfs_readdir(struct vop_readdir_args *ap)
 829 {
 830         struct pfsnode *pfs;
 831         int error;
 832
 833         if (ap->a_uio->uio_offset < 0 || ap->a_uio->uio_offset > INT_MAX)
 834                 return (EINVAL);
 835         if ((error = vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY)) != 0)
 836                 return (error);
 837         pfs = VTOPFS(ap->a_vp);
 838
 839         switch (pfs->pfs_type) {
 840         case Pproc:
 841                 /*
 842                  * this is for the process-specific sub-directories.
 843                  * all that is needed to is copy out all the entries
 844                  * from the procent[] table (top of this file).
 845                  */
 846                 error = procfs_readdir_proc(ap);
 847                 break;
 848         case Proot:
 849                 /*
 850                  * this is for the root of the procfs filesystem
 851                  * what is needed is a special entry for "curproc"
 852                  * followed by an entry for each process on allproc
 853                  */
 854                 error = procfs_readdir_root(ap);
 855                 break;
 856         default:
 857                 error = ENOTDIR;
 858                 break;
 859         }
 860
 861         vn_unlock(ap->a_vp);
 862         return (error);
 863 }
 864
 865 static int
 866 procfs_readdir_proc(struct vop_readdir_args *ap)
 867 {
 868         struct pfsnode *pfs;
 869         int error, i, retval;
 870         struct proc *p;
 871         struct proc_target *pt;
 872         struct uio *uio = ap->a_uio;
 873
 874         pfs = VTOPFS(ap->a_vp);
 875         p = PFIND(pfs->pfs_pid);
 876         if (p == NULL)
 877                 return(0);
 878         if (!PRISON_CHECK(ap->a_cred, p->p_ucred))
 879                 return(0);
 880
 881         error = 0;
 882         i = (int)uio->uio_offset;
 883         if (i < 0)
 884                 return (EINVAL);
 885
 886         for (pt = &proc_targets[i];
 887              !error && uio->uio_resid > 0 && i < nproc_targets; pt++, i++) {
 888                 if (pt->pt_valid && (*pt->pt_valid)(p) == 0)
 889                         continue;
 890
 891                 retval = vop_write_dirent(&error, uio,
 892                     PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype), pt->pt_type,
 893                     pt->pt_namlen, pt->pt_name);
 894                 if (retval)
 895                         break;
 896         }
 897
 898         uio->uio_offset = (off_t)i;
 899
 900         return(0);
 901 }
 902
 903 struct procfs_readdir_root_info {
 904         int error;
 905         int i;
 906         int pcnt;
 907         struct uio *uio;
 908         struct ucred *cred;
 909 };
 910
 911 static int procfs_readdir_root_callback(struct proc *p, void *data);
 912
 913 static int
 914 procfs_readdir_root(struct vop_readdir_args *ap)
 915 {
 916         struct procfs_readdir_root_info info;
 917         struct uio *uio = ap->a_uio;
 918         int res;
 919
 920         info.error = 0;
 921         info.i = (int)uio->uio_offset;
 922
 923         if (info.i < 0)
 924                 return (EINVAL);
 925
 926         info.pcnt = 0;
 927         info.uio = uio;
 928         info.cred = ap->a_cred;
 929         while (info.pcnt < 3) {
 930                 res = procfs_readdir_root_callback(NULL, &info);
 931                 if (res < 0)
 932                         break;
 933         }
 934         if (res >= 0)
 935                 allproc_scan(procfs_readdir_root_callback, &info);
 936         uio->uio_offset = (off_t)info.i;
 937
 938         return (info.error);
 939 }
 940
 941 static int
 942 procfs_readdir_root_callback(struct proc *p, void *data)
 943 {
 944         struct procfs_readdir_root_info *info = data;
 945         struct uio *uio;
 946         int retval;
 947         ino_t d_ino;
 948         const char *d_name;
 949         char d_name_pid[20];
 950         size_t d_namlen;
 951         uint8_t d_type;
 952
 953         uio = info->uio;
 954
 955         if (uio->uio_resid <= 0 || info->error)
 956                 return(-1);
 957
 958         switch (info->pcnt) {
 959         case 0:         /* `.' */
 960                 d_ino = PROCFS_FILENO(0, Proot);
 961                 d_name = ".";
 962                 d_namlen = 1;
 963                 d_type = DT_DIR;
 964                 break;
 965         case 1:         /* `..' */
 966                 d_ino = PROCFS_FILENO(0, Proot);
 967                 d_name = "..";
 968                 d_namlen = 2;
 969                 d_type = DT_DIR;
 970                 break;
 971
 972         case 2:
 973                 d_ino = PROCFS_FILENO(0, Pcurproc);
 974                 d_namlen = 7;
 975                 d_name = "curproc";
 976                 d_type = DT_LNK;
 977                 break;
 978
 979
 980         default:
 981                 if (!PRISON_CHECK(info->cred, p->p_ucred))
 982                         return(0);
 983                 if (ps_showallprocs == 0 &&
 984                     info->cred->cr_uid != 0 &&
 985                     info->cred->cr_uid != p->p_ucred->cr_uid) {
 986                         return(0);
 987                 }
 988
 989                 /*
 990                  * Skip entries we have already returned (optimization)
 991                  */
 992                 if (info->pcnt < info->i) {
 993                         ++info->pcnt;
 994                         return(0);
 995                 }
 996
 997                 d_ino = PROCFS_FILENO(p->p_pid, Pproc);
 998                 d_namlen = snprintf(d_name_pid, sizeof(d_name_pid),
 999                     "%ld", (long)p->p_pid);
1000                 d_name = d_name_pid;
1001                 d_type = DT_DIR;
1002                 break;
1003         }
1004
1005         /*
1006          * Skip entries we have already returned (optimization)
1007          */
1008         if (info->pcnt < info->i) {
1009                 ++info->pcnt;
1010                 return(0);
1011         }
1012
1013         retval = vop_write_dirent(&info->error, uio,
1014                                   d_ino, d_type, d_namlen, d_name);
1015         if (retval)
1016                 return(-1);
1017         ++info->pcnt;
1018         ++info->i;
1019         return(0);
1020 }
1021
1022 /*
1023  * readlink reads the link of `curproc' or `file'
1024  */
1025 static int
1026 procfs_readlink(struct vop_readlink_args *ap)
1027 {
1028         char buf[16];           /* should be enough */
1029         struct proc *procp;
1030         struct vnode *vp = ap->a_vp;
1031         struct pfsnode *pfs = VTOPFS(vp);
1032         char *fullpath, *freepath;
1033         int error, len;
1034
1035         switch (pfs->pfs_type) {
1036         case Pcurproc:
1037                 if (pfs->pfs_fileno != PROCFS_FILENO(0, Pcurproc))
1038                         return (EINVAL);
1039
1040                 len = snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
1041
1042                 return (uiomove(buf, len, ap->a_uio));
1043         /*
1044          * There _should_ be no way for an entire process to disappear
1045          * from under us...
1046          */
1047         case Pfile:
1048                 procp = PFIND(pfs->pfs_pid);
1049                 if (procp == NULL || procp->p_ucred == NULL) {
1050                         printf("procfs_readlink: pid %d disappeared\n",
1051                             pfs->pfs_pid);
1052                         return (uiomove("unknown", sizeof("unknown") - 1,
1053                             ap->a_uio));
1054                 }
1055                 error = vn_fullpath(procp, NULL, &fullpath, &freepath);
1056                 if (error != 0)
1057                         return (uiomove("unknown", sizeof("unknown") - 1,
1058                             ap->a_uio));
1059                 error = uiomove(fullpath, strlen(fullpath), ap->a_uio);
1060                 kfree(freepath, M_TEMP);
1061                 return (error);
1062         default:
1063                 return (EINVAL);
1064         }
1065 }
1066
1067 /*
1068  * convert decimal ascii to pid_t
1069  */
1070 static pid_t
1071 atopid(const char *b, u_int len)
1072 {
1073         pid_t p = 0;
1074
1075         while (len--) {
1076                 char c = *b++;
1077                 if (c < '0' || c > '9')
1078                         return (NO_PID);
1079                 p = 10 * p + (c - '0');
1080                 if (p > PID_MAX)
1081                         return (NO_PID);
1082         }
1083
1084         return (p);
1085 }
1086