2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * Copyright (c) 1989, 1993, 1995
35 * The Regents of the University of California. All rights reserved.
37 * This code is derived from software contributed to Berkeley by
38 * Poul-Henning Kamp of the FreeBSD Project.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. All advertising materials mentioning features or use of this software
49 * must display the following acknowledgement:
50 * This product includes software developed by the University of
51 * California, Berkeley and its contributors.
52 * 4. Neither the name of the University nor the names of its contributors
53 * may be used to endorse or promote products derived from this software
54 * without specific prior written permission.
56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
69 * $FreeBSD: src/sys/kern/vfs_cache.c,v 1.42.2.6 2001/10/05 20:07:03 dillon Exp $
70 * $DragonFly: src/sys/kern/vfs_cache.c,v 1.30 2004/10/01 07:08:23 dillon Exp $
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/kernel.h>
76 #include <sys/sysctl.h>
77 #include <sys/mount.h>
78 #include <sys/vnode.h>
79 #include <sys/malloc.h>
80 #include <sys/sysproto.h>
82 #include <sys/namei.h>
83 #include <sys/nlookup.h>
84 #include <sys/filedesc.h>
85 #include <sys/fnv_hash.h>
86 #include <sys/globaldata.h>
87 #include <sys/kern_syscall.h>
90 * Random lookups in the cache are accomplished with a hash table using
91 * a hash key of (nc_src_vp, name).
93 * Negative entries may exist and correspond to structures where nc_vp
94 * is NULL. In a negative entry, NCF_WHITEOUT will be set if the entry
95 * corresponds to a whited-out directory entry (verses simply not finding the
98 * Upon reaching the last segment of a path, if the reference is for DELETE,
99 * or NOCACHE is set (rewrite), and the name is located in the cache, it
104 * Structures associated with name cacheing.
106 #define NCHHASH(hash) (&nchashtbl[(hash) & nchash])
109 MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
111 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
112 static struct namecache_list ncneglist; /* instead of vnode */
114 static u_long nchash; /* size of hash table */
115 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "");
117 static u_long ncnegfactor = 16; /* ratio of negative entries */
118 SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "");
120 static u_long numneg; /* number of cache entries allocated */
121 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "");
123 static u_long numcache; /* number of cache entries allocated */
124 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
126 static u_long numunres; /* number of unresolved entries */
127 SYSCTL_ULONG(_debug, OID_AUTO, numunres, CTLFLAG_RD, &numunres, 0, "");
129 SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), "");
130 SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), "");
133 * The new name cache statistics
135 SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics");
136 #define STATNODE(mode, name, var) \
137 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
138 STATNODE(CTLFLAG_RD, numneg, &numneg);
139 STATNODE(CTLFLAG_RD, numcache, &numcache);
140 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls);
141 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits);
142 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits);
143 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks);
144 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss);
145 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap);
146 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps);
147 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits);
148 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps);
149 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
151 struct nchstats nchstats[SMP_MAXCPU];
153 * Export VFS cache effectiveness statistics to user-land.
155 * The statistics are left for aggregation to user-land so
156 * neat things can be achieved, like observing per-CPU cache
160 sysctl_nchstats(SYSCTL_HANDLER_ARGS)
162 struct globaldata *gd;
166 for (i = 0; i < ncpus; ++i) {
167 gd = globaldata_find(i);
168 if ((error = SYSCTL_OUT(req, (void *)&(*gd->gd_nchstats),
169 sizeof(struct nchstats))))
175 SYSCTL_PROC(_vfs_cache, OID_AUTO, nchstats, CTLTYPE_OPAQUE|CTLFLAG_RD,
176 0, 0, sysctl_nchstats, "S,nchstats", "VFS cache effectiveness statistics");
178 static void cache_zap(struct namecache *ncp);
181 * cache_hold() and cache_drop() prevent the premature deletion of a
182 * namecache entry but do not prevent operations (such as zapping) on
183 * that namecache entry.
187 _cache_hold(struct namecache *ncp)
195 _cache_drop(struct namecache *ncp)
197 KKASSERT(ncp->nc_refs > 0);
198 if (ncp->nc_refs == 1 &&
199 (ncp->nc_flag & NCF_UNRESOLVED) &&
200 TAILQ_EMPTY(&ncp->nc_list)
209 * Link a new namecache entry to its parent. Be careful to avoid races
210 * if vhold() blocks in the future.
213 cache_link_parent(struct namecache *ncp, struct namecache *par)
215 KKASSERT(ncp->nc_parent == NULL);
216 ncp->nc_parent = par;
217 if (TAILQ_EMPTY(&par->nc_list)) {
218 TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry);
220 * Any vp associated with an ncp which has children must
221 * be held to prevent it from being recycled.
226 TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry);
231 * Remove the parent association from a namecache structure.
234 cache_unlink_parent(struct namecache *ncp)
236 struct namecache *par;
238 if ((par = ncp->nc_parent) != NULL) {
239 ncp->nc_parent = NULL;
240 par = cache_hold(par);
241 TAILQ_REMOVE(&par->nc_list, ncp, nc_entry);
242 if (par->nc_vp && TAILQ_EMPTY(&par->nc_list))
249 * Allocate a new namecache structure.
251 static struct namecache *
254 struct namecache *ncp;
256 ncp = malloc(sizeof(*ncp), M_VFSCACHE, M_WAITOK|M_ZERO);
257 ncp->nc_flag = NCF_UNRESOLVED;
258 ncp->nc_error = ENOTCONN; /* needs to be resolved */
259 TAILQ_INIT(&ncp->nc_list);
265 * Ref and deref a namecache structure.
268 cache_hold(struct namecache *ncp)
270 return(_cache_hold(ncp));
274 cache_drop(struct namecache *ncp)
280 * Namespace locking. The caller must already hold a reference to the
281 * namecache structure in order to lock/unlock it. This function prevents
282 * the namespace from being created or destroyed by accessors other then
285 * Note that holding a locked namecache structure prevents other threads
286 * from making namespace changes (e.g. deleting or creating), prevents
287 * vnode association state changes by other threads, and prevents the
288 * namecache entry from being resolved or unresolved by other threads.
290 * The lock owner has full authority to associate/disassociate vnodes
291 * and resolve/unresolve the locked ncp.
293 * In particular, if a vnode is associated with a locked cache entry
294 * that vnode will *NOT* be recycled. We accomplish this by vhold()ing the
295 * vnode. XXX we should find a more efficient way to prevent the vnode
296 * from being recycled, but remember that any given vnode may have multiple
297 * namecache associations (think hardlinks).
300 cache_lock(struct namecache *ncp)
305 KKASSERT(ncp->nc_refs != 0);
310 if (ncp->nc_exlocks == 0) {
314 * The vp associated with a locked ncp must be held
315 * to prevent it from being recycled (which would
316 * cause the ncp to become unresolved).
318 * XXX loop on race for later MPSAFE work.
324 if (ncp->nc_locktd == td) {
328 ncp->nc_flag |= NCF_LOCKREQ;
329 if (tsleep(ncp, 0, "clock", hz) == EWOULDBLOCK) {
332 printf("[diagnostic] cache_lock: blocked on %*.*s\n",
333 ncp->nc_nlen, ncp->nc_nlen,
340 printf("[diagnostic] cache_lock: unblocked %*.*s\n",
341 ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name);
346 cache_unlock(struct namecache *ncp)
348 thread_t td = curthread;
350 KKASSERT(ncp->nc_refs > 0);
351 KKASSERT(ncp->nc_exlocks > 0);
352 KKASSERT(ncp->nc_locktd == td);
353 if (--ncp->nc_exlocks == 0) {
356 ncp->nc_locktd = NULL;
357 if (ncp->nc_flag & NCF_LOCKREQ) {
358 ncp->nc_flag &= ~NCF_LOCKREQ;
365 * ref-and-lock, unlock-and-deref functions.
368 cache_get(struct namecache *ncp)
376 cache_put(struct namecache *ncp)
383 * Resolve an unresolved ncp by associating a vnode with it. If the
384 * vnode is NULL, a negative cache entry is created.
386 * The ncp should be locked on entry and will remain locked on return.
389 cache_setvp(struct namecache *ncp, struct vnode *vp)
391 KKASSERT(ncp->nc_flag & NCF_UNRESOLVED);
395 * Any vp associated with an ncp which has children must
396 * be held. Any vp associated with a locked ncp must be held.
398 if (!TAILQ_EMPTY(&ncp->nc_list))
400 TAILQ_INSERT_HEAD(&vp->v_namecache, ncp, nc_vnode);
405 * Set auxillary flags
409 ncp->nc_flag |= NCF_ISDIR;
412 ncp->nc_flag |= NCF_ISSYMLINK;
413 /* XXX cache the contents of the symlink */
421 TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode);
423 ncp->nc_error = ENOENT;
425 ncp->nc_flag &= ~NCF_UNRESOLVED;
429 * Disassociate the vnode or negative-cache association and mark a
430 * namecache entry as unresolved again. Note that the ncp is still
431 * left in the hash table and still linked to its parent.
433 * The ncp should be locked on entry and will remain locked on return.
436 cache_setunresolved(struct namecache *ncp)
440 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
441 ncp->nc_flag |= NCF_UNRESOLVED;
442 ncp->nc_flag &= ~(NCF_WHITEOUT|NCF_ISDIR|NCF_ISSYMLINK);
443 ncp->nc_error = ENOTCONN;
445 if ((vp = ncp->nc_vp) != NULL) {
447 ncp->nc_vp = NULL; /* safety */
448 TAILQ_REMOVE(&vp->v_namecache, ncp, nc_vnode);
451 * Any vp associated with an ncp with children is
452 * held by that ncp. Any vp associated with a locked
453 * ncp is held by that ncp. These conditions must be
454 * undone when the vp is cleared out from the ncp.
456 if (!TAILQ_EMPTY(&ncp->nc_list))
461 TAILQ_REMOVE(&ncneglist, ncp, nc_vnode);
468 * vget the vnode associated with the namecache entry. Resolve the namecache
469 * entry if necessary and deal with namecache/vp races. The passed ncp must
470 * be referenced and may be locked. The ncp's ref/locking state is not
471 * effected by this call.
473 * lk_type may be LK_SHARED, LK_EXCLUSIVE. A ref'd, possibly locked
474 * (depending on the passed lk_type) will be returned in *vpp with an error
475 * of 0, or NULL will be returned in *vpp with a non-0 error code. The
476 * most typical error is ENOENT, meaning that the ncp represents a negative
477 * cache hit and there is no vnode to retrieve, but other errors can occur
480 * The main race we have to deal with are namecache zaps. The ncp itself
481 * will not disappear since it is referenced, and it turns out that the
482 * validity of the vp pointer can be checked simply by rechecking the
483 * contents of ncp->nc_vp.
486 cache_vget(struct namecache *ncp, struct ucred *cred,
487 int lk_type, struct vnode **vpp)
494 if (ncp->nc_flag & NCF_UNRESOLVED) {
496 error = cache_resolve(ncp, cred);
501 if (error == 0 && (vp = ncp->nc_vp) != NULL) {
502 error = vget(vp, NULL, lk_type, curthread);
504 if (vp != ncp->nc_vp) /* handle cache_zap race */
507 } else if (vp != ncp->nc_vp) { /* handle cache_zap race */
512 if (error == 0 && vp == NULL)
519 cache_vref(struct namecache *ncp, struct ucred *cred, struct vnode **vpp)
526 if (ncp->nc_flag & NCF_UNRESOLVED) {
528 error = cache_resolve(ncp, cred);
533 if (error == 0 && (vp = ncp->nc_vp) != NULL) {
535 if (vp != ncp->nc_vp) { /* handle cache_zap race */
540 if (error == 0 && vp == NULL)
547 * Try to destroy a namecache entry. The entry is disassociated from its
548 * vnode or ncneglist and reverted to an UNRESOLVED state.
550 * Then, if there are no additional references to the ncp and we can
551 * successfully delete the children, the entry is also removed from the
552 * namecache hashlist / topology.
554 * References or undeletable children will prevent the entry from being
555 * removed from the topology. The entry may be revalidated (typically
556 * by cache_enter()) at a later time. Children remain because:
558 * + we have tried to delete a node rather then a leaf in the topology.
559 * + the presence of negative entries (we try to scrap these).
560 * + an entry or child has a non-zero ref count and cannot be scrapped.
562 * This function must be called with the ncp held and will drop the ref
563 * count during zapping.
566 cache_zap(struct namecache *ncp)
568 struct namecache *par;
571 * Disassociate the vnode or negative cache ref and set NCF_UNRESOLVED.
573 cache_setunresolved(ncp);
576 * Try to scrap the entry and possibly tail-recurse on its parent.
577 * We only scrap unref'd (other then our ref) unresolved entries,
578 * we do not scrap 'live' entries.
580 while (ncp->nc_flag & NCF_UNRESOLVED) {
582 * Someone other then us has a ref, stop.
584 if (ncp->nc_refs > 1)
588 * We have children, stop.
590 if (!TAILQ_EMPTY(&ncp->nc_list))
593 if (ncp->nc_flag & NCF_HASHED) {
594 ncp->nc_flag &= ~NCF_HASHED;
595 LIST_REMOVE(ncp, nc_hash);
599 * Unlink from its parent and free, then loop on the
600 * parent. XXX temp hack, in stage-3 parent is never NULL
602 if ((par = ncp->nc_parent) != NULL) {
603 par = cache_hold(par);
604 TAILQ_REMOVE(&par->nc_list, ncp, nc_entry);
605 if (par->nc_vp && TAILQ_EMPTY(&par->nc_list))
609 ncp->nc_refs = -1; /* safety */
610 ncp->nc_parent = NULL; /* safety */
612 free(ncp->nc_name, M_VFSCACHE);
613 free(ncp, M_VFSCACHE);
615 if (par == NULL) /* temp hack */
616 return; /* temp hack */
623 * NEW NAMECACHE LOOKUP API
625 * Lookup an entry in the cache. A locked, referenced, non-NULL
626 * entry is *always* returned, even if the supplied component is illegal.
627 * The returned namecache entry should be returned to the system with
628 * cache_put() or cache_unlock() + cache_drop().
630 * namecache locks are recursive but care must be taken to avoid lock order
633 * Nobody else will be able to manipulate the associated namespace (e.g.
634 * create, delete, rename, rename-target) until the caller unlocks the
637 * The returned entry will be in one of three states: positive hit (non-null
638 * vnode), negative hit (null vnode), or unresolved (NCF_UNRESOLVED is set).
639 * Unresolved entries must be resolved through the filesystem to associate the
640 * vnode and/or determine whether a positive or negative hit has occured.
642 * It is not necessary to lock a directory in order to lock namespace under
643 * that directory. In fact, it is explicitly not allowed to do that. A
644 * directory is typically only locked when being created, renamed, or
647 * The directory (par) may be unresolved, in which case any returned child
648 * will likely also be marked unresolved. Likely but not guarenteed. Since
649 * the filesystem VOP_NEWLOOKUP() requires a resolved directory vnode the
650 * caller is responsible for resolving the namecache chain top-down. This API
651 * specifically allows whole chains to be created in an unresolved state.
654 cache_nlookup(struct namecache *par, struct nlcomponent *nlc)
656 struct namecache *ncp;
657 struct namecache *new_ncp;
658 struct nchashhead *nchpp;
666 * Try to locate an existing entry
668 hash = fnv_32_buf(nlc->nlc_nameptr, nlc->nlc_namelen, FNV1_32_INIT);
669 hash = fnv_32_buf(&par, sizeof(par), hash);
672 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
676 * Zap entries that have timed out.
678 if (ncp->nc_timeout &&
679 (int)(ncp->nc_timeout - ticks) < 0
681 cache_zap(cache_hold(ncp));
686 * Break out if we find a matching entry. Note that
687 * UNRESOLVED entries may match.
689 if (ncp->nc_parent == par &&
690 ncp->nc_nlen == nlc->nlc_namelen &&
691 bcmp(ncp->nc_name, nlc->nlc_nameptr, ncp->nc_nlen) == 0
694 free(new_ncp->nc_name, M_VFSCACHE);
695 free(new_ncp, M_VFSCACHE);
702 * We failed to locate an entry, create a new entry and add it to
703 * the cache. We have to relookup after possibly blocking in
706 if (new_ncp == NULL) {
707 new_ncp = cache_alloc();
708 new_ncp->nc_name = malloc(nlc->nlc_namelen,
709 M_VFSCACHE, M_WAITOK);
716 * Initialize as a new UNRESOLVED entry, lock (non-blocking),
717 * and link to the parent.
719 ncp->nc_nlen = nlc->nlc_namelen;
720 bcopy(nlc->nlc_nameptr, ncp->nc_name, nlc->nlc_namelen);
721 nchpp = NCHHASH(hash);
722 LIST_INSERT_HEAD(nchpp, ncp, nc_hash);
723 ncp->nc_flag |= NCF_HASHED;
725 cache_link_parent(ncp, par);
729 * Entry found. Cleanup any dangling new_ncp, ref and lock
738 * Resolve an unresolved namecache entry, generally by looking it up.
739 * The passed ncp must be locked.
741 * Theoretically since a vnode cannot be recycled while held, and since
742 * the nc_parent chain holds its vnode as long as children exist, the
743 * direct parent of the cache entry we are trying to resolve should
744 * have a valid vnode. If not then generate an error that we can
745 * determine is related to a resolver bug.
748 cache_resolve(struct namecache *ncp, struct ucred *cred)
750 struct namecache *par;
752 if ((par = ncp->nc_parent) == NULL) {
753 ncp->nc_error = EXDEV;
754 } else if (par->nc_vp == NULL) {
755 ncp->nc_error = EXDEV;
757 ncp->nc_error = vop_resolve(par->nc_vp->v_ops, ncp, cred);
759 return(ncp->nc_error);
763 * Lookup an entry in the cache.
765 * XXX OLD API ROUTINE! WHEN ALL VFSs HAVE BEEN CLEANED UP THIS PROCEDURE
768 * Lookup is called with dvp pointing to the directory to search,
769 * cnp pointing to the name of the entry being sought.
771 * If the lookup succeeds, the vnode is returned in *vpp, and a
772 * status of -1 is returned.
774 * If the lookup determines that the name does not exist (negative cacheing),
775 * a status of ENOENT is returned.
777 * If the lookup fails, a status of zero is returned.
779 * Matching UNRESOLVED entries are resolved.
781 * HACKS: we create dummy nodes for parents
784 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
786 struct namecache *ncp;
787 struct namecache *par;
788 struct namecache *bpar;
790 globaldata_t gd = mycpu;
795 * Obtain the namecache entry associated with dvp, creating one if
796 * necessary. If we have to create one we have insufficient
797 * information to hash it or even supply the name, but we still
798 * need one so we can link it in.
800 * NOTE: in this stage of development, the passed 'par' is
801 * almost always NULL.
803 while ((par = TAILQ_FIRST(&dvp->v_namecache)) == NULL) {
805 if (TAILQ_FIRST(&dvp->v_namecache) != NULL)
806 free(par, M_VFSCACHE);
808 cache_setvp(par, dvp); /* XXX par not locked */
812 * Deal with "." and "..". In this stage of code development we leave
813 * the returned ncpp NULL. Note that if the namecache is disjoint,
814 * we won't find a vnode for "..".
816 if (cnp->cn_nameptr[0] == '.') {
817 if (cnp->cn_namelen == 1) {
820 numposhits++; /* include in total statistics */
823 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
825 numposhits++; /* include in total statistics */
826 if ((cnp->cn_flags & CNP_MAKEENTRY) == 0)
828 if (par->nc_parent == NULL ||
829 par->nc_parent->nc_vp == NULL) {
832 *vpp = par->nc_parent->nc_vp;
838 * Try to locate an existing entry
840 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
842 hash = fnv_32_buf(&bpar, sizeof(bpar), hash);
844 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
848 * Zap entries that have timed out.
850 if (ncp->nc_timeout &&
851 (int)(ncp->nc_timeout - ticks) < 0
853 cache_zap(cache_hold(ncp));
858 * Break out if we find a matching entry.
860 if (ncp->nc_parent == par &&
861 ncp->nc_nlen == cnp->cn_namelen &&
862 bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen) == 0
870 * We found an entry but it is unresolved, act the same as if we
871 * failed to locate the entry. cache_enter() will do the right
874 if (ncp && (ncp->nc_flag & NCF_UNRESOLVED)) {
880 * If we failed to locate an entry, return 0 (indicates failure).
883 if ((cnp->cn_flags & CNP_MAKEENTRY) == 0) {
888 gd->gd_nchstats->ncs_miss++;
893 * If we found an entry, but we don't want to have one, we zap it.
895 if ((cnp->cn_flags & CNP_MAKEENTRY) == 0) {
897 gd->gd_nchstats->ncs_badhits++;
903 * If the vnode is not NULL then return the positive match.
907 gd->gd_nchstats->ncs_goodhits++;
914 * If the vnode is NULL we found a negative match. If we want to
915 * create it, purge the negative match and return failure (as if
916 * we hadn't found a match in the first place).
918 if (cnp->cn_nameiop == NAMEI_CREATE) {
920 gd->gd_nchstats->ncs_badhits++;
928 * We found a "negative" match, ENOENT notifies client of this match.
929 * The nc_flag field records whether this is a whiteout. Since there
930 * is no vnode we can use the vnode tailq link field with ncneglist.
932 TAILQ_REMOVE(&ncneglist, ncp, nc_vnode);
933 TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode);
934 gd->gd_nchstats->ncs_neghits++;
935 if (ncp->nc_flag & NCF_WHITEOUT)
936 cnp->cn_flags |= CNP_ISWHITEOUT;
942 * Add an entry to the cache. (OLD API)
944 * XXX OLD API ROUTINE! WHEN ALL VFSs HAVE BEEN CLEANED UP THIS PROCEDURE
948 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
950 struct namecache *par;
951 struct namecache *ncp;
952 struct namecache *new_ncp;
953 struct namecache *bpar;
954 struct nchashhead *nchpp;
958 * If the directory has no namecache entry we must associate one with
959 * it. The name of the entry is not known so it isn't hashed. This
960 * is a severe hack to support the old API.
962 while ((par = TAILQ_FIRST(&dvp->v_namecache)) == NULL) {
964 if (TAILQ_FIRST(&dvp->v_namecache) != NULL)
965 free(par, M_VFSCACHE);
967 cache_setvp(par, dvp);
972 * This may be a bit confusing. "." and ".." are 'virtual' entries.
973 * We do not actually create a namecache entry representing either.
974 * However, the ".." case is used to linkup a potentially disjoint
975 * directory with its parent, to disconnect a directory from its
976 * parent, or to change an existing linkage that may no longer be
977 * correct (as might occur when a subdirectory is renamed).
980 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
984 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[0] == '.' &&
985 cnp->cn_nameptr[1] == '.'
989 cache_unlink_parent(par);
991 while ((ncp = TAILQ_FIRST(&vp->v_namecache)) == NULL) {
993 if (TAILQ_FIRST(&vp->v_namecache) != NULL)
994 free(ncp, M_VFSCACHE);
996 cache_setvp(ncp, vp);
999 * ncp is the new parent of par
1003 cache_unlink_parent(par);
1004 cache_link_parent(par, ncp);
1012 * Locate other entries associated with this vnode and zap them,
1013 * because the purge code may not be able to find them due to
1014 * the topology not yet being consistent. This is a hack (this
1015 * whole routine is a hack, actually, so that makes this a hack
1020 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) {
1021 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
1022 cache_zap(cache_hold(ncp));
1029 * Try to find a match in the hash table, allocate a new entry if
1030 * we can't. We have to retry the loop after any potential blocking
1033 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
1035 hash = fnv_32_buf(&bpar, sizeof(bpar), hash);
1039 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
1043 * Break out if we find a matching entry.
1045 if (ncp->nc_parent == par &&
1046 ncp->nc_nlen == cnp->cn_namelen &&
1047 bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen) == 0
1054 if (new_ncp == NULL) {
1055 new_ncp = cache_alloc();
1056 new_ncp->nc_name = malloc(cnp->cn_namelen,
1057 M_VFSCACHE, M_WAITOK);
1062 ncp->nc_nlen = cnp->cn_namelen;
1063 bcopy(cnp->cn_nameptr, ncp->nc_name, cnp->cn_namelen);
1064 nchpp = NCHHASH(hash);
1065 LIST_INSERT_HEAD(nchpp, ncp, nc_hash);
1066 ncp->nc_flag |= NCF_HASHED;
1067 cache_link_parent(ncp, par);
1068 } else if (new_ncp) {
1069 free(new_ncp->nc_name, M_VFSCACHE);
1070 free(new_ncp, M_VFSCACHE);
1073 cache_setunresolved(ncp);
1074 cache_setvp(ncp, vp);
1079 if (cnp->cn_flags & CNP_CACHETIMEOUT) {
1080 if ((ncp->nc_timeout = ticks + cnp->cn_timeout) == 0)
1081 ncp->nc_timeout = 1;
1085 * If the target vnode is NULL if this is to be a negative cache
1089 ncp->nc_flag &= ~NCF_WHITEOUT;
1090 if (cnp->cn_flags & CNP_ISWHITEOUT)
1091 ncp->nc_flag |= NCF_WHITEOUT;
1096 * Don't cache too many negative hits
1098 if (numneg > MINNEG && numneg * ncnegfactor > numcache) {
1099 ncp = TAILQ_FIRST(&ncneglist);
1100 KKASSERT(ncp != NULL);
1101 cache_zap(cache_hold(ncp));
1106 * Name cache initialization, from vfsinit() when we are booting
1114 /* initialise per-cpu namecache effectiveness statistics. */
1115 for (i = 0; i < ncpus; ++i) {
1116 gd = globaldata_find(i);
1117 gd->gd_nchstats = &nchstats[i];
1120 TAILQ_INIT(&ncneglist);
1121 nchashtbl = hashinit(desiredvnodes*2, M_VFSCACHE, &nchash);
1125 * Called from start_init() to bootstrap the root filesystem. Returns
1126 * a referenced, unlocked namecache record.
1129 cache_allocroot(struct vnode *vp)
1131 struct namecache *ncp = cache_alloc();
1133 cache_setvp(ncp, vp);
1134 ncp->nc_flag |= NCF_MOUNTPT | NCF_ROOT;
1135 return(cache_hold(ncp));
1139 * vfs_cache_setroot()
1141 * Create an association between the root of our namecache and
1142 * the root vnode. This routine may be called several times during
1145 * If the caller intends to save the returned namecache pointer somewhere
1146 * it must cache_hold() it.
1149 vfs_cache_setroot(struct vnode *nvp, struct namecache *ncp)
1152 struct namecache *oncp;
1166 * Invalidate all namecache entries to a particular vnode as well as
1167 * any direct children of that vnode in the namecache. This is a
1168 * 'catch all' purge used by filesystems that do not know any better.
1170 * A new vnode v_id is generated. Note that no vnode will ever have a
1173 * Note that the linkage between the vnode and its namecache entries will
1174 * be removed, but the namecache entries themselves might stay put due to
1175 * active references from elsewhere in the system or due to the existance of
1176 * the children. The namecache topology is left intact even if we do not
1177 * know what the vnode association is. Such entries will be marked
1180 * XXX: Only time and the size of v_id prevents this from failing:
1181 * XXX: In theory we should hunt down all (struct vnode*, v_id)
1182 * XXX: soft references and nuke them, at least on the global
1183 * XXX: v_id wraparound. The period of resistance can be extended
1184 * XXX: by incrementing each vnodes v_id individually instead of
1185 * XXX: using the global v_id.
1188 cache_purge(struct vnode *vp)
1190 static u_long nextid;
1191 struct namecache *ncp;
1192 struct namecache *scan;
1195 * Disassociate the vnode from its namecache entries along with
1196 * (for historical reasons) any direct children.
1198 while ((ncp = TAILQ_FIRST(&vp->v_namecache)) != NULL) {
1201 restart: /* YYY hack, fix me */
1202 TAILQ_FOREACH(scan, &ncp->nc_list, nc_entry) {
1203 if ((scan->nc_flag & NCF_UNRESOLVED) == 0) {
1204 cache_zap(cache_hold(scan));
1212 * Calculate a new unique id for ".." handling
1216 } while (nextid == vp->v_id || nextid == 0);
1221 * Flush all entries referencing a particular filesystem.
1223 * Since we need to check it anyway, we will flush all the invalid
1224 * entries at the same time.
1227 cache_purgevfs(struct mount *mp)
1229 struct nchashhead *nchpp;
1230 struct namecache *ncp, *nnp;
1233 * Scan hash tables for applicable entries.
1235 for (nchpp = &nchashtbl[nchash]; nchpp >= nchashtbl; nchpp--) {
1236 ncp = LIST_FIRST(nchpp);
1240 nnp = LIST_NEXT(ncp, nc_hash);
1243 if (ncp->nc_vp && ncp->nc_vp->v_mount == mp)
1255 * Test whether the vnode is at a leaf in the nameicache tree.
1257 * Returns 0 if it is a leaf, -1 if it isn't.
1260 cache_leaf_test(struct vnode *vp)
1262 struct namecache *scan;
1263 struct namecache *ncp;
1265 TAILQ_FOREACH(scan, &vp->v_namecache, nc_vnode) {
1266 TAILQ_FOREACH(ncp, &scan->nc_list, nc_entry) {
1267 /* YYY && ncp->nc_vp->v_type == VDIR ? */
1268 if (ncp->nc_vp != NULL)
1276 * Perform canonical checks and cache lookup and pass on to filesystem
1277 * through the vop_cachedlookup only if needed.
1280 * struct vnode a_dvp;
1281 * struct vnode **a_vpp;
1282 * struct componentname *a_cnp;
1286 vfs_cache_lookup(struct vop_lookup_args *ap)
1288 struct vnode *dvp, *vp;
1291 struct vnode **vpp = ap->a_vpp;
1292 struct componentname *cnp = ap->a_cnp;
1293 struct ucred *cred = cnp->cn_cred;
1294 int flags = cnp->cn_flags;
1295 struct thread *td = cnp->cn_td;
1296 u_long vpid; /* capability number of vnode */
1300 lockparent = flags & CNP_LOCKPARENT;
1302 if (dvp->v_type != VDIR)
1305 if ((flags & CNP_ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
1306 (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME)) {
1310 error = VOP_ACCESS(dvp, VEXEC, cred, td);
1315 error = cache_lookup(dvp, vpp, cnp);
1318 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
1320 if (error == ENOENT)
1325 cnp->cn_flags &= ~CNP_PDIRUNLOCK;
1326 if (dvp == vp) { /* lookup on "." */
1329 } else if (flags & CNP_ISDOTDOT) {
1330 VOP_UNLOCK(dvp, NULL, 0, td);
1331 cnp->cn_flags |= CNP_PDIRUNLOCK;
1332 error = vget(vp, NULL, LK_EXCLUSIVE, td);
1333 if (!error && lockparent && (flags & CNP_ISLASTCN)) {
1334 if ((error = vn_lock(dvp, NULL, LK_EXCLUSIVE, td)) == 0)
1335 cnp->cn_flags &= ~CNP_PDIRUNLOCK;
1338 error = vget(vp, NULL, LK_EXCLUSIVE, td);
1339 if (!lockparent || error || !(flags & CNP_ISLASTCN)) {
1340 VOP_UNLOCK(dvp, NULL, 0, td);
1341 cnp->cn_flags |= CNP_PDIRUNLOCK;
1345 * Check that the capability number did not change
1346 * while we were waiting for the lock.
1349 if (vpid == vp->v_id)
1352 if (lockparent && dvp != vp && (flags & CNP_ISLASTCN)) {
1353 VOP_UNLOCK(dvp, NULL, 0, td);
1354 cnp->cn_flags |= CNP_PDIRUNLOCK;
1357 if (cnp->cn_flags & CNP_PDIRUNLOCK) {
1358 error = vn_lock(dvp, NULL, LK_EXCLUSIVE, td);
1361 cnp->cn_flags &= ~CNP_PDIRUNLOCK;
1363 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
1366 static int disablecwd;
1367 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, "");
1369 static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls);
1370 static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1);
1371 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2);
1372 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3);
1373 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4);
1374 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound);
1377 __getcwd(struct __getcwd_args *uap)
1387 buflen = uap->buflen;
1390 if (buflen > MAXPATHLEN)
1391 buflen = MAXPATHLEN;
1393 buf = malloc(buflen, M_TEMP, M_WAITOK);
1394 bp = kern_getcwd(buf, buflen, &error);
1396 error = copyout(bp, uap->buf, strlen(bp) + 1);
1402 kern_getcwd(char *buf, size_t buflen, int *error)
1404 struct proc *p = curproc;
1406 int i, slash_prefixed;
1407 struct filedesc *fdp;
1408 struct namecache *ncp;
1417 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) {
1418 if (vp->v_flag & VROOT) {
1419 if (vp->v_mount == NULL) { /* forced unmount */
1423 vp = vp->v_mount->mnt_vnodecovered;
1426 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) {
1427 if (ncp->nc_parent && ncp->nc_parent->nc_vp &&
1437 for (i = ncp->nc_nlen - 1; i >= 0; i--) {
1443 *--bp = ncp->nc_name[i];
1452 vp = ncp->nc_parent->nc_vp;
1454 if (!slash_prefixed) {
1468 * Thus begins the fullpath magic.
1472 #define STATNODE(name) \
1473 static u_int name; \
1474 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "")
1476 static int disablefullpath;
1477 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW,
1478 &disablefullpath, 0, "");
1480 STATNODE(numfullpathcalls);
1481 STATNODE(numfullpathfail1);
1482 STATNODE(numfullpathfail2);
1483 STATNODE(numfullpathfail3);
1484 STATNODE(numfullpathfail4);
1485 STATNODE(numfullpathfound);
1488 vn_fullpath(struct proc *p, struct vnode *vn, char **retbuf, char **freebuf)
1491 int i, slash_prefixed;
1492 struct filedesc *fdp;
1493 struct namecache *ncp;
1497 if (disablefullpath)
1503 /* vn is NULL, client wants us to use p->p_textvp */
1505 if ((vn = p->p_textvp) == NULL)
1509 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
1510 bp = buf + MAXPATHLEN - 1;
1514 for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) {
1515 if (vp->v_flag & VROOT) {
1516 if (vp->v_mount == NULL) { /* forced unmount */
1520 vp = vp->v_mount->mnt_vnodecovered;
1523 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) {
1524 if (ncp->nc_parent && ncp->nc_parent->nc_vp &&
1534 for (i = ncp->nc_nlen - 1; i >= 0; i--) {
1540 *--bp = ncp->nc_name[i];
1549 vp = ncp->nc_parent->nc_vp;
1551 if (!slash_prefixed) {