kernel - Major signal path adjustments to fix races, tsleep race fixes, +more
[dragonfly.git] / sys / vfs / procfs / procfs_vnops.c
index d7b26c0..0613945 100644 (file)
@@ -37,7 +37,6 @@
  *     @(#)procfs_vnops.c      8.18 (Berkeley) 5/21/95
  *
  * $FreeBSD: src/sys/miscfs/procfs/procfs_vnops.c,v 1.76.2.7 2002/01/22 17:22:59 nectar Exp $
- * $DragonFly: src/sys/vfs/procfs/procfs_vnops.c,v 1.46 2007/11/20 21:03:50 dillon Exp $
  */
 
 /*
@@ -64,6 +63,8 @@
 #include <vfs/procfs/procfs.h>
 #include <sys/pioctl.h>
 
+#include <sys/spinlock2.h>
+
 #include <machine/limits.h>
 
 static int     procfs_access (struct vop_access_args *);
@@ -148,7 +149,7 @@ static struct proc_target {
        { DT_LNK, N("file"),    Pfile,          NULL },
 #undef N
 };
-static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
+static const int nproc_targets = NELEM(proc_targets);
 
 static pid_t atopid (const char *, u_int);
 
@@ -171,26 +172,35 @@ procfs_open(struct vop_open_args *ap)
 {
        struct pfsnode *pfs = VTOPFS(ap->a_vp);
        struct proc *p1, *p2;
+       int error;
 
-       p2 = PFIND(pfs->pfs_pid);
+       p2 = pfs_pfind(pfs->pfs_pid);
        if (p2 == NULL)
                return (ENOENT);
-       if (pfs->pfs_pid && !PRISON_CHECK(ap->a_cred, p2->p_ucred))
-               return (ENOENT);
+       if (pfs->pfs_pid && !PRISON_CHECK(ap->a_cred, p2->p_ucred)) {
+               error = ENOENT;
+               goto done;
+       }
 
        switch (pfs->pfs_type) {
        case Pmem:
                if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
-                   ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
-                       return (EBUSY);
+                   ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) {
+                       error = EBUSY;
+                       goto done;
+               }
 
                p1 = curproc;
                KKASSERT(p1);
                /* Can't trace a process that's currently exec'ing. */ 
-               if ((p2->p_flag & P_INEXEC) != 0)
-                       return EAGAIN;
-               if (!CHECKIO(p1, p2) || p_trespass(ap->a_cred, p2->p_ucred))
-                       return (EPERM);
+               if ((p2->p_flags & P_INEXEC) != 0) {
+                       error = EAGAIN;
+                       goto done;
+               }
+               if (!CHECKIO(p1, p2) || p_trespass(ap->a_cred, p2->p_ucred)) {
+                       error = EPERM;
+                       goto done;
+               }
 
                if (ap->a_mode & FWRITE)
                        pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
@@ -200,8 +210,10 @@ procfs_open(struct vop_open_args *ap)
        default:
                break;
        }
-
-       return (vop_stdopen(ap));
+       error = vop_stdopen(ap);
+done:
+       PRELE(p2);
+       return error;
 }
 
 /*
@@ -234,13 +246,18 @@ procfs_close(struct vop_close_args *ap)
                 * told to stop on an event, but then the requesting process
                 * has gone away or forgotten about it.
                 */
+               p = NULL;
                if ((ap->a_vp->v_opencount < 2)
                    && (p = pfind(pfs->pfs_pid))
                    && !(p->p_pfsflags & PF_LINGER)) {
+                       spin_lock(&p->p_spin);
                        p->p_stops = 0;
                        p->p_step = 0;
+                       spin_unlock(&p->p_spin);
                        wakeup(&p->p_step);
                }
+               if (p)
+                       PRELE(p);
                break;
        default:
                break;
@@ -268,14 +285,20 @@ procfs_ioctl(struct vop_ioctl_args *ap)
        if (procp == NULL)
                return ENOTTY;
        p = curproc;
-       if (p == NULL)
-               return EINVAL;
+       if (p == NULL) {
+               error = EINVAL;
+               goto done;
+       }
 
        /* Can't trace a process that's currently exec'ing. */ 
-       if ((procp->p_flag & P_INEXEC) != 0)
-               return EAGAIN;
-       if (!CHECKIO(p, procp) || p_trespass(ap->a_cred, procp->p_ucred))
-               return EPERM;
+       if ((procp->p_flags & P_INEXEC) != 0) {
+               error = EAGAIN;
+               goto done;
+       }
+       if (!CHECKIO(p, procp) || p_trespass(ap->a_cred, procp->p_ucred)) {
+               error = EPERM;
+               goto done;
+       }
 
        switch (ap->a_command) {
        case PIOCBIS:
@@ -292,31 +315,49 @@ procfs_ioctl(struct vop_ioctl_args *ap)
 #define NFLAGS (PF_ISUGID)
          flags = (unsigned char)*(unsigned int*)ap->a_data;
          if (flags & NFLAGS && (error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0)))
-           return error;
+           goto done;
          procp->p_pfsflags = flags;
          break;
        case PIOCGFL:
          *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags;
          break;
        case PIOCSTATUS:
+         /*
+          * NOTE: syscall entry deals with stopevents and may run without
+          *       the MP lock.
+          */
          psp = (struct procfs_status *)ap->a_data;
-         psp->state = (procp->p_step == 0);
          psp->flags = procp->p_pfsflags;
          psp->events = procp->p_stops;
+         spin_lock(&procp->p_spin);
          if (procp->p_step) {
+           psp->state = 0;
            psp->why = procp->p_stype;
            psp->val = procp->p_xstat;
+           spin_unlock(&procp->p_spin);
          } else {
-           psp->why = psp->val = 0;    /* Not defined values */
+           psp->state = 1;
+           spin_unlock(&procp->p_spin);
+           psp->why = 0;       /* Not defined values */
+           psp->val = 0;       /* Not defined values */
          }
          break;
        case PIOCWAIT:
+         /*
+          * NOTE: syscall entry deals with stopevents and may run without
+          *       the MP lock.
+          */
          psp = (struct procfs_status *)ap->a_data;
-         if (procp->p_step == 0) {
-           error = tsleep(&procp->p_stype, PCATCH, "piocwait", 0);
+         spin_lock(&procp->p_spin);
+         while (procp->p_step == 0) {
+           tsleep_interlock(&procp->p_stype, PCATCH);
+           spin_unlock(&procp->p_spin);
+           error = tsleep(&procp->p_stype, PCATCH | PINTERLOCKED, "piocwait", 0);
            if (error)
-             return error;
+             goto done;
+           spin_lock(&procp->p_spin);
          }
+         spin_unlock(&procp->p_spin);
          psp->state = 1;       /* It stopped */
          psp->flags = procp->p_pfsflags;
          psp->events = procp->p_stops;
@@ -324,19 +365,32 @@ procfs_ioctl(struct vop_ioctl_args *ap)
          psp->val = procp->p_xstat;    /* any extra info */
          break;
        case PIOCCONT:  /* Restart a proc */
-         if (procp->p_step == 0)
-           return EINVAL;      /* Can only start a stopped process */
+         /*
+          * NOTE: syscall entry deals with stopevents and may run without
+          *       the MP lock.  However, the caller is presumably interlocked
+          *       by having waited.
+          */
+         if (procp->p_step == 0) {
+           error = EINVAL;     /* Can only start a stopped process */
+           goto done;
+         }
          if ((signo = *(int*)ap->a_data) != 0) {
-           if (signo >= NSIG || signo <= 0)
-             return EINVAL;
+           if (signo >= NSIG || signo <= 0) {
+             error = EINVAL;
+             goto done;
+           }
            ksignal(procp, signo);
          }
          procp->p_step = 0;
          wakeup(&procp->p_step);
          break;
        default:
-         return (ENOTTY);
+         error = ENOTTY;
+         goto done;
        }
+       error = 0;
+done:
+       PRELE(procp);
        return 0;
 }
 
@@ -381,8 +435,10 @@ procfs_bmap(struct vop_bmap_args *ap)
 static int
 procfs_inactive(struct vop_inactive_args *ap)
 {
-       /*struct vnode *vp = ap->a_vp;*/
+       struct pfsnode *pfs = VTOPFS(ap->a_vp);
 
+       if (pfs->pfs_pid & PFS_DEAD)
+               vrecycle(ap->a_vp);
        return (0);
 }
 
@@ -453,13 +509,15 @@ procfs_getattr(struct vop_getattr_args *ap)
        switch (pfs->pfs_type) {
        case Proot:
        case Pcurproc:
-               procp = 0;
+               procp = NULL;
                break;
 
        default:
-               procp = PFIND(pfs->pfs_pid);
-               if (procp == NULL || procp->p_ucred == NULL)
-                       return (ENOENT);
+               procp = pfs_pfind(pfs->pfs_pid);
+               if (procp == NULL || procp->p_ucred == NULL) {
+                       error = ENOENT;
+                       goto done;
+               }
        }
 
        error = 0;
@@ -498,7 +556,7 @@ procfs_getattr(struct vop_getattr_args *ap)
        case Pfpregs:
        case Pdbregs:
        case Pmem:
-               if (procp->p_flag & P_SUGID)
+               if (procp->p_flags & P_SUGID)
                        vap->va_mode &= ~((VREAD|VWRITE)|
                                          ((VREAD|VWRITE)>>3)|
                                          ((VREAD|VWRITE)>>6));
@@ -550,7 +608,7 @@ procfs_getattr(struct vop_getattr_args *ap)
 
        case Pfile: {
                char *fullpath, *freepath;
-               error = vn_fullpath(procp, NULL, &fullpath, &freepath);
+               error = cache_fullpath(procp, &procp->p_textnch, &fullpath, &freepath, 0);
                if (error == 0) {
                        vap->va_size = strlen(fullpath);
                        kfree(freepath, M_TEMP);
@@ -568,7 +626,7 @@ procfs_getattr(struct vop_getattr_args *ap)
                 * change the owner to root - otherwise 'ps' and friends
                 * will break even though they are setgid kmem. *SIGH*
                 */
-               if (procp->p_flag & P_SUGID)
+               if (procp->p_flags & P_SUGID)
                        vap->va_uid = 0;
                else
                        vap->va_uid = procp->p_ucred->cr_uid;
@@ -599,7 +657,9 @@ procfs_getattr(struct vop_getattr_args *ap)
        default:
                panic("procfs_getattr");
        }
-
+done:
+       if (procp)
+               PRELE(procp);
        return (error);
 }
 
@@ -628,60 +688,19 @@ procfs_setattr(struct vop_setattr_args *ap)
 /*
  * implement access checking.
  *
- * something very similar to this code is duplicated
- * throughout the 4bsd kernel and should be moved
- * into kern/vfs_subr.c sometime.
- *
- * actually, the check for super-user is slightly
- * broken since it will allow read access to write-only
- * objects.  this doesn't cause any particular trouble
- * but does mean that the i/o entry points need to check
- * that the operation really does make sense.
- *
  * procfs_access(struct vnode *a_vp, int a_mode, struct ucred *a_cred)
  */
 static int
 procfs_access(struct vop_access_args *ap)
 {
-       struct vattr *vap;
        struct vattr vattr;
        int error;
 
-       /*
-        * If you're the super-user,
-        * you always get access.
-        */
-       if (ap->a_cred->cr_uid == 0)
-               return (0);
-
-       vap = &vattr;
-       error = VOP_GETATTR(ap->a_vp, vap);
-       if (error)
-               return (error);
-
-       /*
-        * Access check is based on only one of owner, group, public.
-        * If not owner, then check group. If not a member of the
-        * group, then check public access.
-        */
-       if (ap->a_cred->cr_uid != vap->va_uid) {
-               gid_t *gp;
-               int i;
-
-               ap->a_mode >>= 3;
-               gp = ap->a_cred->cr_groups;
-               for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++)
-                       if (vap->va_gid == *gp)
-                               goto found;
-               ap->a_mode >>= 3;
-found:
-               ;
-       }
-
-       if ((vap->va_mode & ap->a_mode) == ap->a_mode)
-               return (0);
-
-       return (EACCES);
+       error = VOP_GETATTR(ap->a_vp, &vattr);
+       if (!error)
+               error = vop_helper_access(ap, vattr.va_uid, vattr.va_gid, 
+                               vattr.va_mode, 0);
+       return (error);
 }
 
 /*
@@ -712,6 +731,7 @@ procfs_lookup(struct vop_old_lookup_args *ap)
        if (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME)
                return (EROFS);
 
+       p = NULL;
        error = 0;
        if (cnp->cn_namelen == 1 && *pname == '.') {
                *vpp = dvp;
@@ -734,7 +754,7 @@ procfs_lookup(struct vop_old_lookup_args *ap)
                if (pid == NO_PID)
                        break;
 
-               p = PFIND(pid);
+               p = pfs_pfind(pid);
                if (p == NULL)
                        break;
 
@@ -754,7 +774,7 @@ procfs_lookup(struct vop_old_lookup_args *ap)
                        goto out;
                }
 
-               p = PFIND(pfs->pfs_pid);
+               p = pfs_pfind(pfs->pfs_pid);
                if (p == NULL)
                        break;
                /* XXX lwp */
@@ -800,6 +820,8 @@ out:
                        vn_unlock(dvp);
                }
        }
+       if (p)
+               PRELE(p);
        return (error);
 }
 
@@ -870,18 +892,22 @@ procfs_readdir_proc(struct vop_readdir_args *ap)
        struct uio *uio = ap->a_uio;
 
        pfs = VTOPFS(ap->a_vp);
-       p = PFIND(pfs->pfs_pid);
+       p = pfs_pfind(pfs->pfs_pid);
        if (p == NULL)
                return(0);
-       if (!PRISON_CHECK(ap->a_cred, p->p_ucred))
-               return(0);
-       /* XXX lwp */
+       if (!PRISON_CHECK(ap->a_cred, p->p_ucred)) {
+               error = 0;
+               goto done;
+       }
+       /* XXX lwp, not MPSAFE */
        lp = FIRST_LWP_IN_PROC(p);
 
        error = 0;
        i = (int)uio->uio_offset;
-       if (i < 0)
-               return (EINVAL);
+       if (i < 0) {
+               error = EINVAL;
+               goto done;
+       }
 
        for (pt = &proc_targets[i];
             !error && uio->uio_resid > 0 && i < nproc_targets; pt++, i++) {
@@ -896,8 +922,10 @@ procfs_readdir_proc(struct vop_readdir_args *ap)
        }
 
        uio->uio_offset = (off_t)i;
-
-       return(0);
+       error = 0;
+done:
+       PRELE(p);
+       return error;
 }
 
 struct procfs_readdir_root_info {
@@ -1045,19 +1073,26 @@ procfs_readlink(struct vop_readlink_args *ap)
         * from under us...
         */
        case Pfile:
-               procp = PFIND(pfs->pfs_pid);
+               procp = pfs_pfind(pfs->pfs_pid);
                if (procp == NULL || procp->p_ucred == NULL) {
                        kprintf("procfs_readlink: pid %d disappeared\n",
                            pfs->pfs_pid);
+                       if (procp)
+                               PRELE(procp);
                        return (uiomove("unknown", sizeof("unknown") - 1,
                            ap->a_uio));
                }
-               error = vn_fullpath(procp, NULL, &fullpath, &freepath);
-               if (error != 0)
+               error = cache_fullpath(procp, &procp->p_textnch, &fullpath, &freepath, 0);
+               if (error != 0) {
+                       if (procp)
+                               PRELE(procp);
                        return (uiomove("unknown", sizeof("unknown") - 1,
                            ap->a_uio));
+               }
                error = uiomove(fullpath, strlen(fullpath), ap->a_uio);
                kfree(freepath, M_TEMP);
+               if (procp)
+                       PRELE(procp);
                return (error);
        default:
                return (EINVAL);