sys/vfs/procfs/procfs_subr.c

   1 /*
   2  * Copyright (c) 1993 Jan-Simon Pendry
   3  * Copyright (c) 1993
   4  *      The Regents of the University of California.  All rights reserved.
   5  *
   6  * This code is derived from software contributed to Berkeley by
   7  * Jan-Simon Pendry.
   8  *
   9  * Redistribution and use in source and binary forms, with or without
  10  * modification, are permitted provided that the following conditions
  11  * are met:
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  * 3. Neither the name of the University nor the names of its contributors
  18  *    may be used to endorse or promote products derived from this software
  19  *    without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31  * SUCH DAMAGE.
  32  *
  33  *      @(#)procfs_subr.c       8.6 (Berkeley) 5/14/95
  34  *
  35  * $FreeBSD: src/sys/miscfs/procfs/procfs_subr.c,v 1.26.2.3 2002/02/18 21:28:04 des Exp $
  36  */
  37
  38 #include <sys/param.h>
  39 #include <sys/systm.h>
  40 #include <sys/sysctl.h>
  41 #include <sys/uio.h>
  42 #include <sys/proc.h>
  43 #include <sys/mount.h>
  44 #include <sys/vnode.h>
  45 #include <sys/malloc.h>
  46 #include <sys/spinlock.h>
  47
  48 #include <sys/spinlock2.h>
  49
  50 #include <vfs/procfs/procfs.h>
  51
  52 #define PFS_HSIZE       1031
  53
  54 struct pfshead {
  55         struct spinlock spin;
  56         struct pfsnode  *first;
  57 } __cachealign;
  58
  59 static struct pfshead   pfshead[PFS_HSIZE];
  60 static struct lock      procfslk = LOCK_INITIALIZER("pvplk", 0, 0);
  61
  62 MALLOC_DEFINE(M_PROCFS, "procfs", "procfs v_data");
  63
  64 #define PFSHASH(pid)    &pfshead[((pid) & ~PFS_DEAD) % PFS_HSIZE]
  65
  66 /*
  67  * Allocate a pfsnode/vnode pair.  If no error occurs the returned vnode
  68  * will be referenced and exclusively locked.
  69  *
  70  * The pid, pfs_type, and mount point uniquely identify a pfsnode.
  71  * The mount point is needed because someone might mount this filesystem
  72  * twice.
  73  *
  74  * All pfsnodes are maintained on a singly-linked list.  new nodes are
  75  * only allocated when they cannot be found on this list.  entries on
  76  * the list are removed when the vfs reclaim entry is called.
  77  *
  78  * A single lock is kept for the entire list.  this is needed because the
  79  * getnewvnode() function can block waiting for a vnode to become free,
  80  * in which case there may be more than one process trying to get the same
  81  * vnode.  this lock is only taken if we are going to call getnewvnode,
  82  * since the kernel itself is single-threaded.
  83  *
  84  * If an entry is found on the list, then call vget() to take a reference
  85  * and obtain the lock.  This will properly re-reference the vnode if it
  86  * had gotten onto the free list.
  87  */
  88 int
  89 procfs_allocvp(struct mount *mp, struct vnode **vpp, long pid, pfstype pfs_type)
  90 {
  91         struct pfsnode *pfs;
  92         struct vnode *vp;
  93         struct pfshead *ph;
  94         int error;
  95
  96         ph = PFSHASH(pid);
  97 loop:
  98         spin_lock(&ph->spin);
  99         for (pfs = ph->first; pfs; pfs = pfs->pfs_next) {
 100                 if (pfs->pfs_pid == pid && pfs->pfs_type == pfs_type &&
 101                     PFSTOV(pfs)->v_mount == mp) {
 102                         vp = PFSTOV(pfs);
 103                         vhold(vp);
 104                         spin_unlock(&ph->spin);
 105                         if (vget(vp, LK_EXCLUSIVE)) {
 106                                 vdrop(vp);
 107                                 goto loop;
 108                         }
 109                         vdrop(vp);
 110
 111                         /*
 112                          * Make sure the vnode is still in the cache after
 113                          * getting the interlock to avoid racing a free.
 114                          */
 115                         spin_lock(&ph->spin);
 116                         for (pfs = ph->first; pfs; pfs = pfs->pfs_next) {
 117                                 if (PFSTOV(pfs) == vp &&
 118                                     pfs->pfs_pid == pid &&
 119                                     pfs->pfs_type == pfs_type &&
 120                                     PFSTOV(pfs)->v_mount == mp) {
 121                                         break;
 122                                 }
 123                         }
 124                         if (pfs == NULL || PFSTOV(pfs) != vp) {
 125                                 spin_unlock(&ph->spin);
 126                                 vput(vp);
 127                                 goto loop;
 128
 129                         }
 130                         spin_unlock(&ph->spin);
 131                         *vpp = vp;
 132                         return (0);
 133                 }
 134         }
 135         spin_unlock(&ph->spin);
 136
 137         /*
 138          * otherwise lock the vp list while we call getnewvnode
 139          * since that can block.
 140          */
 141         if (lockmgr(&procfslk, LK_EXCLUSIVE|LK_SLEEPFAIL))
 142                 goto loop;
 143
 144         /*
 145          * Do the MALLOC before the getnewvnode since doing so afterward
 146          * might cause a bogus v_data pointer to get dereferenced
 147          * elsewhere if MALLOC should block.
 148          *
 149          * XXX this may not matter anymore since getnewvnode now returns
 150          * a VX locked vnode.
 151          */
 152         pfs = kmalloc(sizeof(struct pfsnode), M_PROCFS, M_WAITOK);
 153
 154         error = getnewvnode(VT_PROCFS, mp, vpp, 0, 0);
 155         if (error) {
 156                 kfree(pfs, M_PROCFS);
 157                 goto out;
 158         }
 159         vp = *vpp;
 160
 161         vp->v_data = pfs;
 162
 163         pfs->pfs_next = 0;
 164         pfs->pfs_pid = (pid_t) pid;
 165         pfs->pfs_type = pfs_type;
 166         pfs->pfs_vnode = vp;
 167         pfs->pfs_flags = 0;
 168         pfs->pfs_fileno = PROCFS_FILENO(pid, pfs_type);
 169         lockinit(&pfs->pfs_lock, "pfslk", 0, 0);
 170
 171         switch (pfs_type) {
 172         case Proot:     /* /proc = dr-xr-xr-x */
 173                 pfs->pfs_mode = (VREAD|VEXEC) |
 174                                 (VREAD|VEXEC) >> 3 |
 175                                 (VREAD|VEXEC) >> 6;
 176                 vp->v_type = VDIR;
 177                 vp->v_flag = VROOT;
 178                 break;
 179
 180         case Pcurproc:  /* /proc/curproc = lr--r--r-- */
 181                 pfs->pfs_mode = (VREAD) |
 182                                 (VREAD >> 3) |
 183                                 (VREAD >> 6);
 184                 vp->v_type = VLNK;
 185                 break;
 186
 187         case Pproc:
 188                 pfs->pfs_mode = (VREAD|VEXEC) |
 189                                 (VREAD|VEXEC) >> 3 |
 190                                 (VREAD|VEXEC) >> 6;
 191                 vp->v_type = VDIR;
 192                 break;
 193
 194         case Pfile:
 195                 pfs->pfs_mode = (VREAD|VEXEC) |
 196                                 (VREAD|VEXEC) >> 3 |
 197                                 (VREAD|VEXEC) >> 6;
 198                 vp->v_type = VLNK;
 199                 break;
 200
 201         case Pmem:
 202                 pfs->pfs_mode = (VREAD|VWRITE);
 203                 vp->v_type = VREG;
 204                 break;
 205
 206         case Pregs:
 207         case Pfpregs:
 208         case Pdbregs:
 209                 pfs->pfs_mode = (VREAD|VWRITE);
 210                 vp->v_type = VREG;
 211                 break;
 212
 213         case Pctl:
 214         case Pnote:
 215         case Pnotepg:
 216                 pfs->pfs_mode = (VWRITE);
 217                 vp->v_type = VREG;
 218                 break;
 219
 220         case Ptype:
 221         case Pmap:
 222         case Pstatus:
 223         case Pcmdline:
 224         case Prlimit:
 225                 pfs->pfs_mode = (VREAD) |
 226                                 (VREAD >> 3) |
 227                                 (VREAD >> 6);
 228                 vp->v_type = VREG;
 229                 break;
 230
 231         default:
 232                 panic("procfs_allocvp");
 233         }
 234
 235         /* add to procfs vnode list */
 236         spin_lock(&ph->spin);
 237         pfs->pfs_next = ph->first;
 238         ph->first = pfs;
 239         spin_unlock(&ph->spin);
 240         vx_downgrade(vp);
 241
 242 out:
 243         lockmgr(&procfslk, LK_RELEASE);
 244
 245         return (error);
 246 }
 247
 248 int
 249 procfs_freevp(struct vnode *vp)
 250 {
 251         struct pfshead *ph;
 252         struct pfsnode **pp;
 253         struct pfsnode *pfs;
 254
 255         pfs = VTOPFS(vp);
 256         vp->v_data = NULL;
 257         ph = PFSHASH(pfs->pfs_pid);
 258
 259         spin_lock(&ph->spin);
 260         pp = &ph->first;
 261         while (*pp != pfs) {
 262                 KKASSERT(*pp != NULL);
 263                 pp = &(*pp)->pfs_next;
 264         }
 265         *pp = pfs->pfs_next;
 266         spin_unlock(&ph->spin);
 267
 268         pfs->pfs_next = NULL;
 269         pfs->pfs_vnode = NULL;
 270         kfree(pfs, M_PROCFS);
 271
 272         return (0);
 273 }
 274
 275 /*
 276  * Try to find the calling pid. Note that pfind()
 277  * now references the proc structure to be returned
 278  * and needs to be released later with PRELE().
 279  */
 280 struct proc *
 281 pfs_pfind(pid_t pfs_pid)
 282 {
 283         struct proc *p = NULL;
 284
 285         if (pfs_pid == 0) {
 286                 p = &proc0;
 287                 PHOLD(p);
 288         } else {
 289                 p = pfind(pfs_pid);
 290         }
 291
 292         /*
 293          * Make sure the process is not in the middle of exiting (where
 294          * a lot of its structural members may wind up being NULL).  If it
 295          * is we give up on it.
 296          */
 297         if (p) {
 298                 lwkt_gettoken(&p->p_token);
 299                 if (p->p_flags & P_POSTEXIT) {
 300                         lwkt_reltoken(&p->p_token);
 301                         PRELE(p);
 302                         p = NULL;
 303                 }
 304         }
 305         return p;
 306 }
 307
 308 struct proc *
 309 pfs_zpfind(pid_t pfs_pid)
 310 {
 311         struct proc *p = NULL;
 312
 313         if (pfs_pid == 0) {
 314                 p = &proc0;
 315                 PHOLD(p);
 316         } else {
 317                 p = zpfind(pfs_pid);
 318         }
 319
 320         /*
 321          * Make sure the process is not in the middle of exiting (where
 322          * a lot of its structural members may wind up being NULL).  If it
 323          * is we give up on it.
 324          */
 325         if (p) {
 326                 lwkt_gettoken(&p->p_token);
 327                 if (p->p_flags & P_POSTEXIT) {
 328                         lwkt_reltoken(&p->p_token);
 329                         PRELE(p);
 330                         p = NULL;
 331                 }
 332         }
 333         return p;
 334 }
 335
 336 void
 337 pfs_pdone(struct proc *p)
 338 {
 339         if (p) {
 340                 lwkt_reltoken(&p->p_token);
 341                 PRELE(p);
 342         }
 343 }
 344
 345 int
 346 procfs_rw(struct vop_read_args *ap)
 347 {
 348         struct vnode *vp = ap->a_vp;
 349         struct uio *uio = ap->a_uio;
 350         struct thread *curtd = uio->uio_td;
 351         struct proc *curp;
 352         struct pfsnode *pfs = VTOPFS(vp);
 353         struct proc *p;
 354         struct lwp *lp;
 355         int rtval;
 356
 357         if (curtd == NULL)
 358                 return (EINVAL);
 359         if ((curp = curtd->td_proc) == NULL)    /* XXX */
 360                 return (EINVAL);
 361
 362         p = pfs_pfind(pfs->pfs_pid);
 363         if (p == NULL) {
 364                 rtval = EINVAL;
 365                 goto out;
 366         }
 367         if (p->p_pid == 1 && securelevel > 0 && uio->uio_rw == UIO_WRITE) {
 368                 rtval = EACCES;
 369                 goto out;
 370         }
 371
 372         /*
 373          * XXX lwp
 374          */
 375         lp = FIRST_LWP_IN_PROC(p);
 376         if (lp == NULL) {
 377                 rtval = EINVAL;
 378                 goto out;
 379         }
 380         LWPHOLD(lp);
 381
 382         lockmgr(&pfs->pfs_lock, LK_EXCLUSIVE);
 383
 384         switch (pfs->pfs_type) {
 385         case Pnote:
 386         case Pnotepg:
 387                 rtval = procfs_donote(curp, lp, pfs, uio);
 388                 break;
 389
 390         case Pregs:
 391                 rtval = procfs_doregs(curp, lp, pfs, uio);
 392                 break;
 393
 394         case Pfpregs:
 395                 rtval = procfs_dofpregs(curp, lp, pfs, uio);
 396                 break;
 397
 398         case Pdbregs:
 399                 rtval = procfs_dodbregs(curp, lp, pfs, uio);
 400                 break;
 401
 402         case Pctl:
 403                 rtval = procfs_doctl(curp, lp, pfs, uio);
 404                 break;
 405
 406         case Pstatus:
 407                 rtval = procfs_dostatus(curp, lp, pfs, uio);
 408                 break;
 409
 410         case Pmap:
 411                 rtval = procfs_domap(curp, lp, pfs, uio);
 412                 break;
 413
 414         case Pmem:
 415                 rtval = procfs_domem(curp, lp, pfs, uio);
 416                 break;
 417
 418         case Ptype:
 419                 rtval = procfs_dotype(curp, lp, pfs, uio);
 420                 break;
 421
 422         case Pcmdline:
 423                 rtval = procfs_docmdline(curp, lp, pfs, uio);
 424                 break;
 425
 426         case Prlimit:
 427                 rtval = procfs_dorlimit(curp, lp, pfs, uio);
 428                 break;
 429
 430         default:
 431                 rtval = EOPNOTSUPP;
 432                 break;
 433         }
 434         LWPRELE(lp);
 435
 436         lockmgr(&pfs->pfs_lock, LK_RELEASE);
 437 out:
 438         pfs_pdone(p);
 439
 440         return rtval;
 441 }
 442
 443 /*
 444  * Get a string from userland into (buf).  Strip a trailing
 445  * nl character (to allow easy access from the shell).
 446  * The buffer should be *buflenp + 1 chars long.  vfs_getuserstr
 447  * will automatically add a nul char at the end.
 448  *
 449  * Returns 0 on success or the following errors
 450  *
 451  * EINVAL:    file offset is non-zero.
 452  * EMSGSIZE:  message is longer than kernel buffer
 453  * EFAULT:    user i/o buffer is not addressable
 454  */
 455 int
 456 vfs_getuserstr(struct uio *uio, char *buf, int *buflenp)
 457 {
 458         int xlen;
 459         int error;
 460
 461         if (uio->uio_offset != 0)
 462                 return (EINVAL);
 463
 464         xlen = *buflenp;
 465
 466         /* must be able to read the whole string in one go */
 467         if (xlen < uio->uio_resid)
 468                 return (EMSGSIZE);
 469         xlen = uio->uio_resid;
 470
 471         if ((error = uiomove(buf, xlen, uio)) != 0)
 472                 return (error);
 473
 474         /* allow multiple writes without seeks */
 475         uio->uio_offset = 0;
 476
 477         /* cleanup string and remove trailing newline */
 478         buf[xlen] = '\0';
 479         xlen = strlen(buf);
 480         if (xlen > 0 && buf[xlen-1] == '\n')
 481                 buf[--xlen] = '\0';
 482         *buflenp = xlen;
 483
 484         return (0);
 485 }
 486
 487 vfs_namemap_t *
 488 vfs_findname(vfs_namemap_t *nm, char *buf, int buflen)
 489 {
 490
 491         for (; nm->nm_name; nm++)
 492                 if (bcmp(buf, nm->nm_name, buflen+1) == 0)
 493                         return (nm);
 494
 495         return (0);
 496 }
 497
 498 void
 499 procfs_exit(struct thread *td)
 500 {
 501         struct pfshead *ph;
 502         struct pfsnode *pfs;
 503         struct vnode *vp;
 504         pid_t pid;
 505
 506         KKASSERT(td->td_proc);
 507         pid = td->td_proc->p_pid;
 508
 509         /*
 510          * NOTE: We can't just vgone() the vnode any more, not while
 511          *       it may potentially still be active.  This will clean
 512          *       the vp and clear the mount and cause the new VOP subsystem
 513          *       to assert or panic when someone tries to do an operation
 514          *       on an open (exited) procfs descriptor.
 515          *
 516          * Prevent further operations on this pid by setting pfs_pid to -1.
 517          * Note that a pfs_pid of 0 is used for nodes which do not track
 518          * any particular pid.
 519          *
 520          * Use vx_get() to properly ref/lock a vp which may not have any
 521          * refs and which may or may not already be reclaimed.  vx_put()
 522          * will then properly deactivate it and cause it to be recycled.
 523          *
 524          * The hash table can also get ripped out from under us when
 525          * we block so take the easy way out and restart the scan.
 526          */
 527         for (;;) {
 528                 ph = PFSHASH(pid);
 529                 spin_lock(&ph->spin);
 530                 for (pfs = ph->first; pfs; pfs = pfs->pfs_next) {
 531                         if (pfs->pfs_pid == pid)
 532                                 break;
 533                 }
 534                 if (pfs == NULL) {
 535                         spin_unlock(&ph->spin);
 536                         break;
 537                 }
 538                 vp = PFSTOV(pfs);
 539                 vhold(vp);
 540                 spin_unlock(&ph->spin);
 541                 vx_get(vp);
 542                 pfs->pfs_pid |= PFS_DEAD; /* does not effect hash */
 543                 vx_put(vp);
 544                 vdrop(vp);
 545         }
 546 }