2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * Copyright (c) 1989, 1993, 1995
35 * The Regents of the University of California. All rights reserved.
37 * This code is derived from software contributed to Berkeley by
38 * Poul-Henning Kamp of the FreeBSD Project.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. All advertising materials mentioning features or use of this software
49 * must display the following acknowledgement:
50 * This product includes software developed by the University of
51 * California, Berkeley and its contributors.
52 * 4. Neither the name of the University nor the names of its contributors
53 * may be used to endorse or promote products derived from this software
54 * without specific prior written permission.
56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
69 * $FreeBSD: src/sys/kern/vfs_cache.c,v 1.42.2.6 2001/10/05 20:07:03 dillon Exp $
70 * $DragonFly: src/sys/kern/vfs_cache.c,v 1.26 2004/09/26 01:24:52 dillon Exp $
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/kernel.h>
76 #include <sys/sysctl.h>
77 #include <sys/mount.h>
78 #include <sys/vnode.h>
79 #include <sys/malloc.h>
80 #include <sys/sysproto.h>
82 #include <sys/namei.h>
83 #include <sys/filedesc.h>
84 #include <sys/fnv_hash.h>
85 #include <sys/globaldata.h>
86 #include <sys/kern_syscall.h>
89 * Random lookups in the cache are accomplished with a hash table using
90 * a hash key of (nc_src_vp, name).
92 * Negative entries may exist and correspond to structures where nc_vp
93 * is NULL. In a negative entry, NCF_WHITEOUT will be set if the entry
94 * corresponds to a whited-out directory entry (verses simply not finding the
97 * Upon reaching the last segment of a path, if the reference is for DELETE,
98 * or NOCACHE is set (rewrite), and the name is located in the cache, it
103 * Structures associated with name cacheing.
105 #define NCHHASH(hash) (&nchashtbl[(hash) & nchash])
108 MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
110 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
111 static struct namecache_list ncneglist; /* instead of vnode */
112 static struct namecache rootnamecache; /* Dummy node */
114 static int nczapcheck; /* panic on bad release */
115 SYSCTL_INT(_debug, OID_AUTO, nczapcheck, CTLFLAG_RW, &nczapcheck, 0, "");
117 static u_long nchash; /* size of hash table */
118 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "");
120 static u_long ncnegfactor = 16; /* ratio of negative entries */
121 SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "");
123 static u_long numneg; /* number of cache entries allocated */
124 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "");
126 static u_long numcache; /* number of cache entries allocated */
127 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
129 static u_long numunres; /* number of unresolved entries */
130 SYSCTL_ULONG(_debug, OID_AUTO, numunres, CTLFLAG_RD, &numunres, 0, "");
132 SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), "");
133 SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), "");
136 * The new name cache statistics
138 SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics");
139 #define STATNODE(mode, name, var) \
140 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
141 STATNODE(CTLFLAG_RD, numneg, &numneg);
142 STATNODE(CTLFLAG_RD, numcache, &numcache);
143 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls);
144 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits);
145 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits);
146 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks);
147 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss);
148 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap);
149 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps);
150 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits);
151 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps);
152 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
154 struct nchstats nchstats[SMP_MAXCPU];
156 * Export VFS cache effectiveness statistics to user-land.
158 * The statistics are left for aggregation to user-land so
159 * neat things can be achieved, like observing per-CPU cache
163 sysctl_nchstats(SYSCTL_HANDLER_ARGS)
165 struct globaldata *gd;
169 for (i = 0; i < ncpus; ++i) {
170 gd = globaldata_find(i);
171 if ((error = SYSCTL_OUT(req, (void *)&(*gd->gd_nchstats),
172 sizeof(struct nchstats))))
178 SYSCTL_PROC(_vfs_cache, OID_AUTO, nchstats, CTLTYPE_OPAQUE|CTLFLAG_RD,
179 0, 0, sysctl_nchstats, "S,nchstats", "VFS cache effectiveness statistics");
181 static void cache_zap(struct namecache *ncp);
184 * cache_hold() and cache_drop() prevent the premature deletion of a
185 * namecache entry but do not prevent operations (such as zapping) on
186 * that namecache entry.
190 _cache_hold(struct namecache *ncp)
198 _cache_drop(struct namecache *ncp)
200 KKASSERT(ncp->nc_refs > 0);
201 if (ncp->nc_refs == 1 &&
202 (ncp->nc_flag & NCF_UNRESOLVED) &&
203 TAILQ_EMPTY(&ncp->nc_list)
212 cache_hold(struct namecache *ncp)
214 return(_cache_hold(ncp));
218 cache_drop(struct namecache *ncp)
224 * Namespace locking. The caller must already hold a reference to the
225 * namecache structure in order to lock/unlock it.
227 * Note that holding a locked namecache structure does not prevent the
228 * underlying vnode from being destroyed and the namecache state moving
229 * to an unresolved state. XXX MP
232 cache_lock(struct namecache *ncp)
234 thread_t td = curthread;
237 KKASSERT(ncp->nc_refs != 0);
239 if (ncp->nc_exlocks == 0) {
244 if (ncp->nc_locktd == td) {
248 ncp->nc_flag |= NCF_LOCKREQ;
249 if (tsleep(ncp, 0, "clock", hz) == EWOULDBLOCK) {
252 printf("cache_lock: blocked on %*.*s\n",
253 ncp->nc_nlen, ncp->nc_nlen,
259 printf("cache_lock: unblocked %*.*s\n",
260 ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name);
265 cache_unlock(struct namecache *ncp)
267 thread_t td = curthread;
269 KKASSERT(ncp->nc_refs > 0);
270 KKASSERT(ncp->nc_exlocks > 0);
271 KKASSERT(ncp->nc_locktd == td);
272 if (--ncp->nc_exlocks == 0) {
273 ncp->nc_locktd = NULL;
274 if (ncp->nc_flag & NCF_LOCKREQ) {
275 ncp->nc_flag &= ~NCF_LOCKREQ;
282 * Unlock and release a namecache entry.
285 cache_put(struct namecache *ncp)
292 cache_link_parent(struct namecache *ncp, struct namecache *par)
294 KKASSERT(ncp->nc_parent == NULL);
295 ncp->nc_parent = par;
296 if (TAILQ_EMPTY(&par->nc_list)) {
300 TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry);
304 cache_unlink_parent(struct namecache *ncp)
306 struct namecache *par;
308 if ((par = ncp->nc_parent) != NULL) {
309 ncp->nc_parent = NULL;
310 par = cache_hold(par);
311 TAILQ_REMOVE(&par->nc_list, ncp, nc_entry);
312 if (par->nc_vp && TAILQ_EMPTY(&par->nc_list))
318 static struct namecache *
319 cache_alloc(struct vnode *vp)
321 struct namecache *ncp;
323 ncp = malloc(sizeof(*ncp), M_VFSCACHE, M_WAITOK|M_ZERO);
324 TAILQ_INIT(&ncp->nc_list);
327 TAILQ_INSERT_HEAD(&vp->v_namecache, ncp, nc_vnode);
330 TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode);
337 static struct namecache *
338 cache_alloc_unresolved(struct vnode *vp)
340 struct namecache *ncp;
342 ncp = malloc(sizeof(*ncp), M_VFSCACHE, M_WAITOK|M_ZERO);
343 TAILQ_INIT(&ncp->nc_list);
344 ncp->nc_flag = NCF_UNRESOLVED;
350 * Try to destroy a namecache entry. The entry is disassociated from its
351 * vnode or ncneglist and reverted to an UNRESOLVED state.
353 * Then, if there are no additional references to the ncp and we can
354 * successfully delete the children, the entry is also removed from the
355 * namecache hashlist / topology.
357 * References or undeletable children will prevent the entry from being
358 * removed from the topology. The entry may be revalidated (typically
359 * by cache_enter()) at a later time. Children remain because:
361 * + we have tried to delete a node rather then a leaf in the topology.
362 * + the presence of negative entries (we try to scrap these).
363 * + an entry or child has a non-zero ref count and cannot be scrapped.
365 * This function must be called with the ncp held and will drop the ref
366 * count during zapping.
369 cache_zap(struct namecache *ncp)
371 struct namecache *par;
375 * Disassociate the vnode or negative cache ref and set NCF_UNRESOLVED.
377 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
378 ncp->nc_flag |= NCF_UNRESOLVED;
380 if ((vp = ncp->nc_vp) != NULL) {
382 ncp->nc_vp = NULL; /* safety */
383 TAILQ_REMOVE(&vp->v_namecache, ncp, nc_vnode);
384 if (!TAILQ_EMPTY(&ncp->nc_list))
387 TAILQ_REMOVE(&ncneglist, ncp, nc_vnode);
393 * Try to scrap the entry and possibly tail-recurse on its parent.
394 * We only scrap unref'd (other then our ref) unresolved entries,
395 * we do not scrap 'live' entries.
397 while (ncp->nc_flag & NCF_UNRESOLVED) {
399 * Someone other then us has a ref, stop.
401 if (ncp->nc_refs > 1)
405 * We have children, stop.
407 if (!TAILQ_EMPTY(&ncp->nc_list))
411 * Ok, we can completely destroy and free this entry. Sanity
412 * check it against our static rootnamecache structure,
413 * then remove it from the hash.
415 KKASSERT(ncp != &rootnamecache);
417 if (ncp->nc_flag & NCF_HASHED) {
418 ncp->nc_flag &= ~NCF_HASHED;
419 LIST_REMOVE(ncp, nc_hash);
423 * Unlink from its parent and free, then loop on the
424 * parent. XXX temp hack, in stage-3 parent is never NULL
426 if ((par = ncp->nc_parent) != NULL) {
427 par = cache_hold(par);
428 TAILQ_REMOVE(&par->nc_list, ncp, nc_entry);
429 if (par->nc_vp && TAILQ_EMPTY(&par->nc_list))
433 ncp->nc_refs = -1; /* safety */
434 ncp->nc_parent = NULL; /* safety */
436 free(ncp->nc_name, M_VFSCACHE);
437 free(ncp, M_VFSCACHE);
439 if (par == NULL) /* temp hack */
440 return; /* temp hack */
447 * NEW NAMECACHE LOOKUP API
449 * Lookup an entry in the cache. A locked, referenced, non-NULL
450 * entry is *always* returned, even if the supplied component is illegal.
451 * The returned namecache entry should be returned to the system with
452 * cache_put() or cache_unlock() + cache_drop().
454 * namecache locks are recursive but care must be taken to avoid lock order
457 * Nobody else will be able to manipulate the associated namespace (e.g.
458 * create, delete, rename, rename-target) until the caller unlocks the
461 * The returned entry will be in one of three states: positive hit (non-null
462 * vnode), negative hit (null vnode), or unresolved (NCF_UNRESOLVED is set).
463 * Unresolved entries must be resolved through the filesystem to associate the
464 * vnode and/or determine whether a positive or negative hit has occured.
466 * It is not necessary to lock a directory in order to lock namespace under
467 * that directory. In fact, it is explicitly not allowed to do that. A
468 * directory is typically only locked when being created, renamed, or
471 * The directory (par) may be unresolved, in which case any returned child
472 * will likely also be marked unresolved. Likely but not guarenteed. Since
473 * the filesystem VOP_NEWLOOKUP() requires a resolved directory vnode the
474 * caller is responsible for resolving the namecache chain top-down. This API
475 * specifically allows whole chains to be created in an unresolved state.
478 cache_nclookup(struct namecache *par, struct componentname *cnp)
485 * Lookup an entry in the cache
487 * Lookup is called with dvp pointing to the directory to search,
488 * cnp pointing to the name of the entry being sought.
490 * If the lookup succeeds, the vnode is returned in *vpp, and a
491 * status of -1 is returned.
493 * If the lookup determines that the name does not exist (negative cacheing),
494 * a status of ENOENT is returned.
496 * If the lookup fails, a status of zero is returned.
498 * Note that UNRESOLVED entries are ignored. They are not negative cache
502 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
504 struct namecache *ncp;
505 struct namecache *par;
507 globaldata_t gd = mycpu;
512 * Obtain the namecache entry associated with dvp, creating one if
513 * necessary. If we have to create one we have insufficient
514 * information to hash it or even supply the name, but we still
515 * need one so we can link it in.
517 * NOTE: in this stage of development, the passed 'par' is
518 * almost always NULL.
520 if ((par = TAILQ_FIRST(&dvp->v_namecache)) == NULL)
521 par = cache_alloc(dvp);
524 * Deal with "." and "..". In this stage of code development we leave
525 * the returned ncpp NULL. Note that if the namecache is disjoint,
526 * we won't find a vnode for "..".
528 if (cnp->cn_nameptr[0] == '.') {
529 if (cnp->cn_namelen == 1) {
532 numposhits++; /* include in total statistics */
535 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
537 numposhits++; /* include in total statistics */
538 if ((cnp->cn_flags & CNP_MAKEENTRY) == 0)
540 if (par->nc_parent == NULL ||
541 par->nc_parent->nc_vp == NULL) {
544 *vpp = par->nc_parent->nc_vp;
550 * Try to locate an existing entry
552 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
553 hash = fnv_32_buf(&par, sizeof(par), hash);
555 printf("DVP %p/%p %08x %*.*s\n", dvp, par, hash, (int)cnp->cn_namelen, (int)cnp->cn_namelen, cnp->cn_nameptr);
557 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
559 if (nczapcheck > 1) {
560 printf("TEST ncp par=%p %*.*s\n",
561 ncp->nc_parent, ncp->nc_nlen, ncp->nc_nlen,
566 * Zap entries that have timed out.
568 if (ncp->nc_timeout &&
569 (int)(ncp->nc_timeout - ticks) < 0
573 cache_zap(cache_hold(ncp));
578 * Break out if we find a matching entry. UNRESOLVED entries
579 * never match (they are in the middle of being destroyed).
581 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0 &&
582 ncp->nc_parent == par &&
583 ncp->nc_nlen == cnp->cn_namelen &&
584 bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen) == 0
594 * If we failed to locate an entry, return 0 (indicates failure).
597 if ((cnp->cn_flags & CNP_MAKEENTRY) == 0) {
602 gd->gd_nchstats->ncs_miss++;
604 printf("MISS %p/%p %*.*s/%*.*s\n", dvp, par,
605 par->nc_nlen, par->nc_nlen, (par->nc_name ? par->nc_name : ""),
606 (int)cnp->cn_namelen, (int)cnp->cn_namelen, cnp->cn_nameptr);
612 * If we found an entry, but we don't want to have one, we zap it.
614 if ((cnp->cn_flags & CNP_MAKEENTRY) == 0) {
616 gd->gd_nchstats->ncs_badhits++;
622 * If the vnode is not NULL then return the positive match.
626 gd->gd_nchstats->ncs_goodhits++;
633 * If the vnode is NULL we found a negative match. If we want to
634 * create it, purge the negative match and return failure (as if
635 * we hadn't found a match in the first place).
637 if (cnp->cn_nameiop == NAMEI_CREATE) {
639 gd->gd_nchstats->ncs_badhits++;
647 * We found a "negative" match, ENOENT notifies client of this match.
648 * The nc_flag field records whether this is a whiteout. Since there
649 * is no vnode we can use the vnode tailq link field with ncneglist.
651 TAILQ_REMOVE(&ncneglist, ncp, nc_vnode);
652 TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode);
653 gd->gd_nchstats->ncs_neghits++;
654 if (ncp->nc_flag & NCF_WHITEOUT)
655 cnp->cn_flags |= CNP_ISWHITEOUT;
661 * Generate a special linkage between the mount point and the root of the
662 * mounted filesystem in order to maintain the namecache topology across
663 * a mount point. The special linkage has a 0-length name component
664 * and sets NCF_MOUNTPT.
667 cache_mount(struct vnode *dvp, struct vnode *tvp)
669 struct namecache *ncp;
670 struct namecache *par;
671 struct nchashhead *nchpp;
675 * If a linkage already exists we do not have to do anything.
677 hash = fnv_32_buf("", 0, FNV1_32_INIT);
678 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
679 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
681 if (ncp->nc_vp == tvp &&
684 ncp->nc_parent->nc_vp == dvp
690 if ((par = TAILQ_FIRST(&dvp->v_namecache)) == NULL)
691 par = cache_alloc(dvp);
694 * Otherwise create a new linkage.
696 ncp = cache_alloc(tvp);
697 ncp->nc_flag = NCF_MOUNTPT;
698 cache_link_parent(ncp, par);
703 hash = fnv_32_buf("", 0, FNV1_32_INIT);
704 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
705 nchpp = NCHHASH(hash);
706 LIST_INSERT_HEAD(nchpp, ncp, nc_hash);
708 ncp->nc_flag |= NCF_HASHED;
712 * Add an entry to the cache.
715 cache_enter(struct vnode *dvp, struct namecache *par, struct vnode *vp, struct componentname *cnp)
717 struct namecache *ncp;
718 struct namecache *bpar;
719 struct nchashhead *nchpp;
724 * If the directory has no namecache entry we must associate one with
725 * it. The name of the entry is not known so it isn't hashed.
728 if ((par = TAILQ_FIRST(&dvp->v_namecache)) == NULL)
729 par = cache_alloc(dvp);
733 * This may be a bit confusing. "." and ".." are 'virtual' entries.
734 * We do not actually create a namecache entry representing either.
735 * However, the ".." case is used to linkup a potentially disjoint
736 * directory with its parent, to disconnect a directory from its
737 * parent, or to change an existing linkage that may no longer be
738 * correct (as might occur when a subdirectory is renamed).
741 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')
743 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[0] == '.' &&
744 cnp->cn_nameptr[1] == '.'
748 cache_unlink_parent(par);
750 if ((ncp = TAILQ_FIRST(&vp->v_namecache)) == NULL)
751 ncp = cache_alloc(vp);
754 cache_unlink_parent(par);
755 cache_link_parent(par, ncp); /* ncp is parent of par */
762 * Locate other entries associated with this vnode and zap them,
763 * because the purge code may not be able to find them due to
764 * the topology not yet being consistent. This is a temporary
769 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) {
770 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
771 cache_zap(cache_hold(ncp));
777 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
779 hash = fnv_32_buf(&bpar, sizeof(bpar), hash);
782 printf("ENTER %p/%p %08x '%*.*s' %p ", dvp, par, hash, (int)cnp->cn_namelen, (int)cnp->cn_namelen, cnp->cn_nameptr, vp);
785 name = malloc(cnp->cn_namelen, M_VFSCACHE, M_WAITOK);
786 ncp = cache_alloc(vp);
793 if (cnp->cn_flags & CNP_CACHETIMEOUT) {
794 if ((ncp->nc_timeout = ticks + cnp->cn_timeout) == 0)
799 * Linkup the parent pointer, bump the parent vnode's hold
800 * count when we go from 0->1 children.
802 cache_link_parent(ncp, par);
805 * Add to the hash table
808 ncp->nc_nlen = cnp->cn_namelen;
809 bcopy(cnp->cn_nameptr, ncp->nc_name, cnp->cn_namelen);
810 nchpp = NCHHASH(hash);
811 LIST_INSERT_HEAD(nchpp, ncp, nc_hash);
812 ncp->nc_flag |= NCF_HASHED;
815 * If the target vnode is NULL if this is to be a negative cache
819 ncp->nc_flag &= ~NCF_WHITEOUT;
820 if (cnp->cn_flags & CNP_ISWHITEOUT)
821 ncp->nc_flag |= NCF_WHITEOUT;
825 * Don't cache too many negative hits
827 if (numneg > MINNEG && numneg * ncnegfactor > numcache) {
828 ncp = TAILQ_FIRST(&ncneglist);
829 KKASSERT(ncp != NULL);
830 cache_zap(cache_hold(ncp));
835 * Name cache initialization, from vfsinit() when we are booting
837 * rootnamecache is initialized such that it cannot be recursively deleted.
845 /* initialise per-cpu namecache effectiveness statistics. */
846 for (i = 0; i < ncpus; ++i) {
847 gd = globaldata_find(i);
848 gd->gd_nchstats = &nchstats[i];
851 TAILQ_INIT(&ncneglist);
852 nchashtbl = hashinit(desiredvnodes*2, M_VFSCACHE, &nchash);
853 TAILQ_INIT(&rootnamecache.nc_list);
854 rootnamecache.nc_flag |= NCF_HASHED | NCF_ROOT | NCF_UNRESOLVED;
855 rootnamecache.nc_refs = 1;
859 * vfs_cache_setroot()
861 * Create an association between the root of our namecache and
862 * the root vnode. This routine may be called several times during
866 vfs_cache_setroot(struct vnode *nvp)
868 KKASSERT(rootnamecache.nc_refs > 0); /* don't accidently free */
869 cache_zap(cache_hold(&rootnamecache));
871 rootnamecache.nc_vp = nvp;
872 rootnamecache.nc_flag &= ~NCF_UNRESOLVED;
875 if (!TAILQ_EMPTY(&rootnamecache.nc_list))
877 TAILQ_INSERT_HEAD(&nvp->v_namecache, &rootnamecache, nc_vnode);
880 TAILQ_INSERT_TAIL(&ncneglist, &rootnamecache, nc_vnode);
881 rootnamecache.nc_flag &= ~NCF_WHITEOUT;
886 * Invalidate all namecache entries to a particular vnode as well as
887 * any direct children of that vnode in the namecache. This is a
888 * 'catch all' purge used by filesystems that do not know any better.
890 * A new vnode v_id is generated. Note that no vnode will ever have a
893 * Note that the linkage between the vnode and its namecache entries will
894 * be removed, but the namecache entries themselves might stay put due to
895 * active references from elsewhere in the system or due to the existance of
896 * the children. The namecache topology is left intact even if we do not
897 * know what the vnode association is. Such entries will be marked
900 * XXX: Only time and the size of v_id prevents this from failing:
901 * XXX: In theory we should hunt down all (struct vnode*, v_id)
902 * XXX: soft references and nuke them, at least on the global
903 * XXX: v_id wraparound. The period of resistance can be extended
904 * XXX: by incrementing each vnodes v_id individually instead of
905 * XXX: using the global v_id.
908 cache_purge(struct vnode *vp)
910 static u_long nextid;
911 struct namecache *ncp;
912 struct namecache *scan;
915 * Disassociate the vnode from its namecache entries along with
916 * (for historical reasons) any direct children.
918 while ((ncp = TAILQ_FIRST(&vp->v_namecache)) != NULL) {
921 restart: /* YYY hack, fix me */
922 TAILQ_FOREACH(scan, &ncp->nc_list, nc_entry) {
923 if ((scan->nc_flag & NCF_UNRESOLVED) == 0) {
924 cache_zap(cache_hold(scan));
932 * Calculate a new unique id for ".." handling
936 } while (nextid == vp->v_id || nextid == 0);
941 * Flush all entries referencing a particular filesystem.
943 * Since we need to check it anyway, we will flush all the invalid
944 * entries at the same time.
947 cache_purgevfs(struct mount *mp)
949 struct nchashhead *nchpp;
950 struct namecache *ncp, *nnp;
953 * Scan hash tables for applicable entries.
955 for (nchpp = &nchashtbl[nchash]; nchpp >= nchashtbl; nchpp--) {
956 ncp = LIST_FIRST(nchpp);
960 nnp = LIST_NEXT(ncp, nc_hash);
963 if (ncp->nc_vp && ncp->nc_vp->v_mount == mp)
975 * Test whether the vnode is at a leaf in the nameicache tree.
977 * Returns 0 if it is a leaf, -1 if it isn't.
980 cache_leaf_test(struct vnode *vp)
982 struct namecache *scan;
983 struct namecache *ncp;
985 TAILQ_FOREACH(scan, &vp->v_namecache, nc_vnode) {
986 TAILQ_FOREACH(ncp, &scan->nc_list, nc_entry) {
987 /* YYY && ncp->nc_vp->v_type == VDIR ? */
988 if (ncp->nc_vp != NULL)
996 * Perform canonical checks and cache lookup and pass on to filesystem
997 * through the vop_cachedlookup only if needed.
1000 * struct vnode a_dvp;
1001 * struct namecache *a_ncp;
1002 * struct vnode **a_vpp;
1003 * struct namecache **a_ncpp;
1004 * struct componentname *a_cnp;
1008 vfs_cache_lookup(struct vop_lookup_args *ap)
1010 struct vnode *dvp, *vp;
1013 struct vnode **vpp = ap->a_vpp;
1014 struct componentname *cnp = ap->a_cnp;
1015 struct ucred *cred = cnp->cn_cred;
1016 int flags = cnp->cn_flags;
1017 struct thread *td = cnp->cn_td;
1018 u_long vpid; /* capability number of vnode */
1022 lockparent = flags & CNP_LOCKPARENT;
1024 if (dvp->v_type != VDIR)
1027 if ((flags & CNP_ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
1028 (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME)) {
1032 error = VOP_ACCESS(dvp, VEXEC, cred, td);
1037 error = cache_lookup(dvp, vpp, cnp);
1040 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
1042 if (error == ENOENT)
1047 cnp->cn_flags &= ~CNP_PDIRUNLOCK;
1048 if (dvp == vp) { /* lookup on "." */
1051 } else if (flags & CNP_ISDOTDOT) {
1052 VOP_UNLOCK(dvp, NULL, 0, td);
1053 cnp->cn_flags |= CNP_PDIRUNLOCK;
1054 error = vget(vp, NULL, LK_EXCLUSIVE, td);
1055 if (!error && lockparent && (flags & CNP_ISLASTCN)) {
1056 if ((error = vn_lock(dvp, NULL, LK_EXCLUSIVE, td)) == 0)
1057 cnp->cn_flags &= ~CNP_PDIRUNLOCK;
1060 error = vget(vp, NULL, LK_EXCLUSIVE, td);
1061 if (!lockparent || error || !(flags & CNP_ISLASTCN)) {
1062 VOP_UNLOCK(dvp, NULL, 0, td);
1063 cnp->cn_flags |= CNP_PDIRUNLOCK;
1067 * Check that the capability number did not change
1068 * while we were waiting for the lock.
1071 if (vpid == vp->v_id)
1074 if (lockparent && dvp != vp && (flags & CNP_ISLASTCN)) {
1075 VOP_UNLOCK(dvp, NULL, 0, td);
1076 cnp->cn_flags |= CNP_PDIRUNLOCK;
1079 if (cnp->cn_flags & CNP_PDIRUNLOCK) {
1080 error = vn_lock(dvp, NULL, LK_EXCLUSIVE, td);
1083 cnp->cn_flags &= ~CNP_PDIRUNLOCK;
1085 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
1088 static int disablecwd;
1089 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, "");
1091 static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls);
1092 static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1);
1093 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2);
1094 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3);
1095 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4);
1096 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound);
1099 __getcwd(struct __getcwd_args *uap)
1109 buflen = uap->buflen;
1112 if (buflen > MAXPATHLEN)
1113 buflen = MAXPATHLEN;
1115 buf = malloc(buflen, M_TEMP, M_WAITOK);
1116 bp = kern_getcwd(buf, buflen, &error);
1118 error = copyout(bp, uap->buf, strlen(bp) + 1);
1124 kern_getcwd(char *buf, size_t buflen, int *error)
1126 struct proc *p = curproc;
1128 int i, slash_prefixed;
1129 struct filedesc *fdp;
1130 struct namecache *ncp;
1139 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) {
1140 if (vp->v_flag & VROOT) {
1141 if (vp->v_mount == NULL) { /* forced unmount */
1145 vp = vp->v_mount->mnt_vnodecovered;
1148 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) {
1149 if (ncp->nc_parent && ncp->nc_parent->nc_vp &&
1159 for (i = ncp->nc_nlen - 1; i >= 0; i--) {
1165 *--bp = ncp->nc_name[i];
1174 vp = ncp->nc_parent->nc_vp;
1176 if (!slash_prefixed) {
1190 * Thus begins the fullpath magic.
1194 #define STATNODE(name) \
1195 static u_int name; \
1196 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "")
1198 static int disablefullpath;
1199 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW,
1200 &disablefullpath, 0, "");
1202 STATNODE(numfullpathcalls);
1203 STATNODE(numfullpathfail1);
1204 STATNODE(numfullpathfail2);
1205 STATNODE(numfullpathfail3);
1206 STATNODE(numfullpathfail4);
1207 STATNODE(numfullpathfound);
1210 vn_fullpath(struct proc *p, struct vnode *vn, char **retbuf, char **freebuf)
1213 int i, slash_prefixed;
1214 struct filedesc *fdp;
1215 struct namecache *ncp;
1219 if (disablefullpath)
1225 /* vn is NULL, client wants us to use p->p_textvp */
1227 if ((vn = p->p_textvp) == NULL)
1231 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
1232 bp = buf + MAXPATHLEN - 1;
1236 for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) {
1237 if (vp->v_flag & VROOT) {
1238 if (vp->v_mount == NULL) { /* forced unmount */
1242 vp = vp->v_mount->mnt_vnodecovered;
1245 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) {
1246 if (ncp->nc_parent && ncp->nc_parent->nc_vp &&
1256 for (i = ncp->nc_nlen - 1; i >= 0; i--) {
1262 *--bp = ncp->nc_name[i];
1271 vp = ncp->nc_parent->nc_vp;
1273 if (!slash_prefixed) {