kernel - Do not allow destroyed namecache entries to be re-resolved
[dragonfly.git] / sys / kern / vfs_cache.c
index f7a8866..d4899d3 100644 (file)
@@ -186,27 +186,24 @@ static void _cache_setunresolved(struct namecache *ncp);
 static void _cache_cleanneg(int count);
 static void _cache_cleanpos(int count);
 static void _cache_cleandefered(void);
+static void _cache_unlink(struct namecache *ncp);
 
 /*
  * The new name cache statistics
  */
 SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics");
-#define STATNODE(mode, name, var) \
-       SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
-#define STATNODE_INT(mode, name, var) \
-       SYSCTL_UINT(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
-static int numneg; STATNODE_INT(CTLFLAG_RD, numneg, &numneg);
-static int numcache; STATNODE_INT(CTLFLAG_RD, numcache, &numcache);
-static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls);
-static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits);
-static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits);
-static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks);
-static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss);
-static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap);
-static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps);
-static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits);
-static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps);
-static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
+static int numneg;
+SYSCTL_INT(_vfs_cache, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0,
+    "Number of negative namecache entries");
+static int numcache;
+SYSCTL_INT(_vfs_cache, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0,
+    "Number of namecaches entries");
+static u_long numcalls;
+SYSCTL_ULONG(_vfs_cache, OID_AUTO, numcalls, CTLFLAG_RD, &numcalls, 0,
+    "Number of namecache lookups");
+static u_long numchecks;
+SYSCTL_ULONG(_vfs_cache, OID_AUTO, numchecks, CTLFLAG_RD, &numchecks, 0,
+    "Number of checked entries in namecache lookups");
 
 struct nchstats nchstats[SMP_MAXCPU];
 /*
@@ -882,10 +879,10 @@ _cache_setvp(struct mount *mp, struct namecache *ncp, struct vnode *vp)
                 */
                if (!TAILQ_EMPTY(&ncp->nc_list))
                        vhold(vp);
-               spin_lock(&vp->v_spinlock);
+               spin_lock(&vp->v_spin);
                ncp->nc_vp = vp;
                TAILQ_INSERT_HEAD(&vp->v_namecache, ncp, nc_vnode);
-               spin_unlock(&vp->v_spinlock);
+               spin_unlock(&vp->v_spin);
                if (ncp->nc_exlocks)
                        vhold(vp);
 
@@ -905,6 +902,11 @@ _cache_setvp(struct mount *mp, struct namecache *ncp, struct vnode *vp)
                }
                atomic_add_int(&numcache, 1);
                ncp->nc_error = 0;
+               /* XXX: this is a hack to work-around the lack of a real pfs vfs
+                * implementation*/
+               if (mp != NULL)
+                       if (strncmp(mp->mnt_stat.f_fstypename, "null", 5) == 0)
+                               vp->v_pfsmp = mp;
        } else {
                /*
                 * When creating a negative cache hit we set the
@@ -920,7 +922,7 @@ _cache_setvp(struct mount *mp, struct namecache *ncp, struct vnode *vp)
                spin_unlock(&ncspin);
                ncp->nc_error = ENOENT;
                if (mp)
-                       ncp->nc_namecache_gen = mp->mnt_namecache_gen;
+                       VFS_NCPGEN_SET(mp, ncp);
        }
        ncp->nc_flag &= ~(NCF_UNRESOLVED | NCF_DEFEREDZAP);
 }
@@ -974,10 +976,10 @@ _cache_setunresolved(struct namecache *ncp)
                ncp->nc_error = ENOTCONN;
                if ((vp = ncp->nc_vp) != NULL) {
                        atomic_add_int(&numcache, -1);
-                       spin_lock(&vp->v_spinlock);
+                       spin_lock(&vp->v_spin);
                        ncp->nc_vp = NULL;
                        TAILQ_REMOVE(&vp->v_namecache, ncp, nc_vnode);
-                       spin_unlock(&vp->v_spinlock);
+                       spin_unlock(&vp->v_spin);
 
                        /*
                         * Any vp associated with an ncp with children is
@@ -1032,8 +1034,7 @@ _cache_auto_unresolve(struct mount *mp, struct namecache *ncp)
         * If a resolved negative cache hit is invalid due to
         * the mount's namecache generation being bumped, zap it.
         */
-       if (ncp->nc_vp == NULL &&
-           ncp->nc_namecache_gen != mp->mnt_namecache_gen) {
+       if (ncp->nc_vp == NULL && VFS_NCPGEN_TEST(mp, ncp)) {
                _cache_setunresolved(ncp);
                return;
        }
@@ -1263,7 +1264,7 @@ cache_inval_vp(struct vnode *vp, int flags)
        struct namecache *next;
 
 restart:
-       spin_lock(&vp->v_spinlock);
+       spin_lock(&vp->v_spin);
        ncp = TAILQ_FIRST(&vp->v_namecache);
        if (ncp)
                _cache_hold(ncp);
@@ -1271,7 +1272,7 @@ restart:
                /* loop entered with ncp held and vp spin-locked */
                if ((next = TAILQ_NEXT(ncp, nc_vnode)) != NULL)
                        _cache_hold(next);
-               spin_unlock(&vp->v_spinlock);
+               spin_unlock(&vp->v_spin);
                _cache_lock(ncp);
                if (ncp->nc_vp != vp) {
                        kprintf("Warning: cache_inval_vp: race-A detected on "
@@ -1284,16 +1285,16 @@ restart:
                _cache_inval(ncp, flags);
                _cache_put(ncp);                /* also releases reference */
                ncp = next;
-               spin_lock(&vp->v_spinlock);
+               spin_lock(&vp->v_spin);
                if (ncp && ncp->nc_vp != vp) {
-                       spin_unlock(&vp->v_spinlock);
+                       spin_unlock(&vp->v_spin);
                        kprintf("Warning: cache_inval_vp: race-B detected on "
                                "%s\n", ncp->nc_name);
                        _cache_drop(ncp);
                        goto restart;
                }
        }
-       spin_unlock(&vp->v_spinlock);
+       spin_unlock(&vp->v_spin);
        return(TAILQ_FIRST(&vp->v_namecache) != NULL);
 }
 
@@ -1312,7 +1313,7 @@ cache_inval_vp_nonblock(struct vnode *vp)
        struct namecache *ncp;
        struct namecache *next;
 
-       spin_lock(&vp->v_spinlock);
+       spin_lock(&vp->v_spin);
        ncp = TAILQ_FIRST(&vp->v_namecache);
        if (ncp)
                _cache_hold(ncp);
@@ -1320,7 +1321,7 @@ cache_inval_vp_nonblock(struct vnode *vp)
                /* loop entered with ncp held */
                if ((next = TAILQ_NEXT(ncp, nc_vnode)) != NULL)
                        _cache_hold(next);
-               spin_unlock(&vp->v_spinlock);
+               spin_unlock(&vp->v_spin);
                if (_cache_lock_nonblock(ncp)) {
                        _cache_drop(ncp);
                        if (next)
@@ -1338,16 +1339,16 @@ cache_inval_vp_nonblock(struct vnode *vp)
                _cache_inval(ncp, 0);
                _cache_put(ncp);                /* also releases reference */
                ncp = next;
-               spin_lock(&vp->v_spinlock);
+               spin_lock(&vp->v_spin);
                if (ncp && ncp->nc_vp != vp) {
-                       spin_unlock(&vp->v_spinlock);
+                       spin_unlock(&vp->v_spin);
                        kprintf("Warning: cache_inval_vp: race-B detected on "
                                "%s\n", ncp->nc_name);
                        _cache_drop(ncp);
                        goto done;
                }
        }
-       spin_unlock(&vp->v_spinlock);
+       spin_unlock(&vp->v_spin);
 done:
        return(TAILQ_FIRST(&vp->v_namecache) != NULL);
 }
@@ -1372,14 +1373,26 @@ cache_rename(struct nchandle *fnch, struct nchandle *tnch)
        struct nchash_head *nchpp;
        u_int32_t hash;
        char *oname;
+       char *nname;
+
+       if (tncp->nc_nlen) {
+               nname = kmalloc(tncp->nc_nlen + 1, M_VFSCACHE, M_WAITOK);
+               bcopy(tncp->nc_name, nname, tncp->nc_nlen);
+               nname[tncp->nc_nlen] = 0;
+       } else {
+               nname = NULL;
+       }
 
        /*
         * Rename fncp (unlink)
         */
        _cache_unlink_parent(fncp);
        oname = fncp->nc_name;
-       fncp->nc_name = tncp->nc_name;
+       fncp->nc_name = nname;
        fncp->nc_nlen = tncp->nc_nlen;
+       if (oname)
+               kfree(oname, M_VFSCACHE);
+
        tncp_par = tncp->nc_parent;
        _cache_hold(tncp_par);
        _cache_lock(tncp_par);
@@ -1400,13 +1413,24 @@ cache_rename(struct nchandle *fnch, struct nchandle *tnch)
        /*
         * Get rid of the overwritten tncp (unlink)
         */
-       _cache_setunresolved(tncp);
-       _cache_unlink_parent(tncp);
-       tncp->nc_name = NULL;
-       tncp->nc_nlen = 0;
+       _cache_unlink(tncp);
+}
 
-       if (oname)
-               kfree(oname, M_VFSCACHE);
+/*
+ * Perform actions consistent with unlinking a file.  The namecache
+ * entry is marked DESTROYED so it no longer shows up in searches,
+ * and will be physically deleted when the vnode goes away.
+ */
+void
+cache_unlink(struct nchandle *nch)
+{
+       _cache_unlink(nch->ncp);
+}
+
+static void
+_cache_unlink(struct namecache *ncp)
+{
+       ncp->nc_flag |= NCF_DESTROYED;
 }
 
 /*
@@ -1613,11 +1637,11 @@ cache_fromdvp(struct vnode *dvp, struct ucred *cred, int makeit,
         * Handle the makeit == 0 degenerate case
         */
        if (makeit == 0) {
-               spin_lock(&dvp->v_spinlock);
+               spin_lock(&dvp->v_spin);
                nch->ncp = TAILQ_FIRST(&dvp->v_namecache);
                if (nch->ncp)
                        cache_hold(nch);
-               spin_unlock(&dvp->v_spinlock);
+               spin_unlock(&dvp->v_spin);
        }
 
        /*
@@ -1627,14 +1651,14 @@ cache_fromdvp(struct vnode *dvp, struct ucred *cred, int makeit,
                /*
                 * Break out if we successfully acquire a working ncp.
                 */
-               spin_lock(&dvp->v_spinlock);
+               spin_lock(&dvp->v_spin);
                nch->ncp = TAILQ_FIRST(&dvp->v_namecache);
                if (nch->ncp) {
                        cache_hold(nch);
-                       spin_unlock(&dvp->v_spinlock);
+                       spin_unlock(&dvp->v_spin);
                        break;
                }
-               spin_unlock(&dvp->v_spinlock);
+               spin_unlock(&dvp->v_spin);
 
                /*
                 * If dvp is the root of its filesystem it should already
@@ -1774,14 +1798,14 @@ cache_fromdvp_try(struct vnode *dvp, struct ucred *cred,
                        break;
                }
                vn_unlock(pvp);
-               spin_lock(&pvp->v_spinlock);
+               spin_lock(&pvp->v_spin);
                if ((nch.ncp = TAILQ_FIRST(&pvp->v_namecache)) != NULL) {
                        _cache_hold(nch.ncp);
-                       spin_unlock(&pvp->v_spinlock);
+                       spin_unlock(&pvp->v_spin);
                        vrele(pvp);
                        break;
                }
-               spin_unlock(&pvp->v_spinlock);
+               spin_unlock(&pvp->v_spin);
                if (pvp->v_flag & VROOT) {
                        nch.ncp = _cache_get(pvp->v_mount->mnt_ncmountpt.ncp);
                        error = cache_resolve_mp(nch.mount);
@@ -2499,6 +2523,7 @@ cache_findmount_callback(struct mount *mp, void *data)
            mp->mnt_ncmounton.ncp == info->nch_ncp
        ) {
            info->result = mp;
+           atomic_add_int(&mp->mnt_refs, 1);
            return(-1);
        }
        return(0);
@@ -2517,6 +2542,12 @@ cache_findmount(struct nchandle *nch)
        return(info.result);
 }
 
+void
+cache_dropmount(struct mount *mp)
+{
+       atomic_add_int(&mp->mnt_refs, -1);
+}
+
 /*
  * Resolve an unresolved namecache entry, generally by looking it up.
  * The passed ncp must be locked and refd. 
@@ -2564,6 +2595,20 @@ restart:
                        return (ncp->nc_error);
        }
 
+       /*
+        * If the ncp was destroyed it will never resolve again.  This
+        * can basically only happen when someone is chdir'd into an
+        * empty directory which is then rmdir'd.  We want to catch this
+        * here and not dive the VFS because the VFS might actually
+        * have a way to re-resolve the disconnected ncp, which will
+        * result in inconsistencies in the cdir/nch for proc->p_fd.
+        */
+       if (ncp->nc_flag & NCF_DESTROYED) {
+               kprintf("Warning: cache_resolve: ncp '%s' was unlinked\n",
+                       ncp->nc_name);
+               return(EINVAL);
+       }
+
        /*
         * Mount points need special handling because the parent does not
         * belong to the same filesystem as the ncp.
@@ -2804,6 +2849,7 @@ _cache_cleanpos(int count)
         */
        while (count) {
                rover_copy = ++rover;   /* MPSAFEENOUGH */
+               cpu_ccfence();
                nchpp = NCHHASH(rover_copy);
 
                spin_lock(&nchpp->spin);
@@ -3011,12 +3057,18 @@ static int disablecwd;
 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
     "Disable getcwd");
 
-static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls);
-static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1);
-static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2);
-static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3);
-static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4);
-static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound);
+static u_long numcwdcalls;
+SYSCTL_ULONG(_vfs_cache, OID_AUTO, numcwdcalls, CTLFLAG_RD, &numcwdcalls, 0,
+    "Number of current directory resolution calls");
+static u_long numcwdfailnf;
+SYSCTL_ULONG(_vfs_cache, OID_AUTO, numcwdfailnf, CTLFLAG_RD, &numcwdfailnf, 0,
+    "Number of current directory failures due to lack of file");
+static u_long numcwdfailsz;
+SYSCTL_ULONG(_vfs_cache, OID_AUTO, numcwdfailsz, CTLFLAG_RD, &numcwdfailsz, 0,
+    "Number of current directory failures due to large result");
+static u_long numcwdfound;
+SYSCTL_ULONG(_vfs_cache, OID_AUTO, numcwdfound, CTLFLAG_RD, &numcwdfound, 0,
+    "Number of current directory resolution successes");
 
 /*
  * MPALMOSTSAFE
@@ -3092,7 +3144,7 @@ kern_getcwd(char *buf, size_t buflen, int *error)
                 */
                for (i = ncp->nc_nlen - 1; i >= 0; i--) {
                        if (bp == buf) {
-                               numcwdfail4++;
+                               numcwdfailsz++;
                                *error = ERANGE;
                                bp = NULL;
                                goto done;
@@ -3100,7 +3152,7 @@ kern_getcwd(char *buf, size_t buflen, int *error)
                        *--bp = ncp->nc_name[i];
                }
                if (bp == buf) {
-                       numcwdfail4++;
+                       numcwdfailsz++;
                        *error = ERANGE;
                        bp = NULL;
                        goto done;
@@ -3126,14 +3178,14 @@ kern_getcwd(char *buf, size_t buflen, int *error)
                ncp = nch.ncp;
        }
        if (ncp == NULL) {
-               numcwdfail2++;
+               numcwdfailnf++;
                *error = ENOENT;
                bp = NULL;
                goto done;
        }
        if (!slash_prefixed) {
                if (bp == buf) {
-                       numcwdfail4++;
+                       numcwdfailsz++;
                        *error = ERANGE;
                        bp = NULL;
                        goto done;
@@ -3153,22 +3205,27 @@ done:
  *
  * The passed nchp is referenced but not locked.
  */
-#undef STATNODE
-#define STATNODE(name)                                                 \
-       static u_int name;                                              \
-       SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "")
-
 static int disablefullpath;
 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW,
     &disablefullpath, 0,
     "Disable fullpath lookups");
 
-STATNODE(numfullpathcalls);
-STATNODE(numfullpathfail1);
-STATNODE(numfullpathfail2);
-STATNODE(numfullpathfail3);
-STATNODE(numfullpathfail4);
-STATNODE(numfullpathfound);
+static u_int numfullpathcalls;
+SYSCTL_UINT(_vfs_cache, OID_AUTO, numfullpathcalls, CTLFLAG_RD,
+    &numfullpathcalls, 0,
+    "Number of full path resolutions in progress");
+static u_int numfullpathfailnf;
+SYSCTL_UINT(_vfs_cache, OID_AUTO, numfullpathfailnf, CTLFLAG_RD,
+    &numfullpathfailnf, 0,
+    "Number of full path resolution failures due to lack of file");
+static u_int numfullpathfailsz;
+SYSCTL_UINT(_vfs_cache, OID_AUTO, numfullpathfailsz, CTLFLAG_RD,
+    &numfullpathfailsz, 0,
+    "Number of full path resolution failures due to insufficient memory");
+static u_int numfullpathfound;
+SYSCTL_UINT(_vfs_cache, OID_AUTO, numfullpathfound, CTLFLAG_RD,
+    &numfullpathfound, 0,
+    "Number of full path resolution successes");
 
 int
 cache_fullpath(struct proc *p, struct nchandle *nchp,
@@ -3235,7 +3292,7 @@ cache_fullpath(struct proc *p, struct nchandle *nchp,
                 */
                for (i = ncp->nc_nlen - 1; i >= 0; i--) {
                        if (bp == buf) {
-                               numfullpathfail4++;
+                               numfullpathfailsz++;
                                kfree(buf, M_TEMP);
                                error = ENOMEM;
                                goto done;
@@ -3243,7 +3300,7 @@ cache_fullpath(struct proc *p, struct nchandle *nchp,
                        *--bp = ncp->nc_name[i];
                }
                if (bp == buf) {
-                       numfullpathfail4++;
+                       numfullpathfailsz++;
                        kfree(buf, M_TEMP);
                        error = ENOMEM;
                        goto done;
@@ -3271,7 +3328,7 @@ cache_fullpath(struct proc *p, struct nchandle *nchp,
                ncp = nch.ncp;
        }
        if (ncp == NULL) {
-               numfullpathfail2++;
+               numfullpathfailnf++;
                kfree(buf, M_TEMP);
                error = ENOENT;
                goto done;
@@ -3279,7 +3336,7 @@ cache_fullpath(struct proc *p, struct nchandle *nchp,
 
        if (!slash_prefixed) {
                if (bp == buf) {
-                       numfullpathfail4++;
+                       numfullpathfailsz++;
                        kfree(buf, M_TEMP);
                        error = ENOMEM;
                        goto done;
@@ -3297,12 +3354,14 @@ done:
 }
 
 int
-vn_fullpath(struct proc *p, struct vnode *vn, char **retbuf, char **freebuf, int guess) 
+vn_fullpath(struct proc *p, struct vnode *vn, char **retbuf, char **freebuf,
+    int guess)
 {
        struct namecache *ncp;
        struct nchandle nch;
        int error;
 
+       *freebuf = NULL;
        atomic_add_int(&numfullpathcalls, 1);
        if (disablefullpath)
                return (ENODEV);
@@ -3315,17 +3374,17 @@ vn_fullpath(struct proc *p, struct vnode *vn, char **retbuf, char **freebuf, int
                if ((vn = p->p_textvp) == NULL)
                        return (EINVAL);
        }
-       spin_lock(&vn->v_spinlock);
+       spin_lock(&vn->v_spin);
        TAILQ_FOREACH(ncp, &vn->v_namecache, nc_vnode) {
                if (ncp->nc_nlen)
                        break;
        }
        if (ncp == NULL) {
-               spin_unlock(&vn->v_spinlock);
+               spin_unlock(&vn->v_spin);
                return (EINVAL);
        }
        _cache_hold(ncp);
-       spin_unlock(&vn->v_spinlock);
+       spin_unlock(&vn->v_spin);
 
        atomic_add_int(&numfullpathcalls, -1);
        nch.ncp = ncp;;