2 * Copyright (c) 2004,2013 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * External lock/ref-related vnode functions
38 * vs_state transition locking requirements:
40 * INACTIVE -> CACHED|DYING vx_lock(excl) + vfs_spin
41 * DYING -> CACHED vx_lock(excl)
42 * ACTIVE -> INACTIVE (none) + v_spin + vfs_spin
43 * INACTIVE -> ACTIVE vn_lock(any) + v_spin + vfs_spin
44 * CACHED -> ACTIVE vn_lock(any) + v_spin + vfs_spin
46 * NOTE: Switching to/from ACTIVE/INACTIVE requires v_spin and vfs_spin,
48 * Switching into ACTIVE also requires a vref and vnode lock, however
49 * the vnode lock is allowed to be SHARED.
51 * Switching into a CACHED or DYING state requires an exclusive vnode
52 * lock or vx_lock (which is almost the same thing).
55 #include <sys/param.h>
56 #include <sys/systm.h>
57 #include <sys/kernel.h>
58 #include <sys/malloc.h>
59 #include <sys/mount.h>
61 #include <sys/vnode.h>
63 #include <sys/sysctl.h>
65 #include <machine/limits.h>
68 #include <vm/vm_object.h>
71 #include <sys/thread2.h>
73 static void vnode_terminate(struct vnode *vp);
75 static MALLOC_DEFINE(M_VNODE, "vnodes", "vnode structures");
78 * The vnode free list hold inactive vnodes. Aged inactive vnodes
79 * are inserted prior to the mid point, and otherwise inserted
82 TAILQ_HEAD(freelst, vnode);
83 static struct freelst vnode_active_list;
84 static struct freelst vnode_inactive_list;
85 static struct vnode vnode_active_rover;
86 static struct vnode vnode_inactive_mid1;
87 static struct vnode vnode_inactive_mid2;
88 static struct vnode vnode_inactive_rover;
89 static struct spinlock vfs_spin = SPINLOCK_INITIALIZER(vfs_spin);
90 static enum { ROVER_MID1, ROVER_MID2 } rover_state = ROVER_MID2;
93 SYSCTL_INT(_debug, OID_AUTO, activevnodes, CTLFLAG_RD,
94 &activevnodes, 0, "Number of active nodes");
96 SYSCTL_INT(_debug, OID_AUTO, cachedvnodes, CTLFLAG_RD,
97 &cachedvnodes, 0, "Number of total cached nodes");
98 int inactivevnodes = 0;
99 SYSCTL_INT(_debug, OID_AUTO, inactivevnodes, CTLFLAG_RD,
100 &inactivevnodes, 0, "Number of inactive nodes");
101 static int wantfreevnodes = 25;
102 SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW,
103 &wantfreevnodes, 0, "Desired number of free vnodes");
104 static int batchfreevnodes = 5;
105 SYSCTL_INT(_debug, OID_AUTO, batchfreevnodes, CTLFLAG_RW,
106 &batchfreevnodes, 0, "Number of vnodes to free at once");
108 static ulong trackvnode;
109 SYSCTL_ULONG(_debug, OID_AUTO, trackvnode, CTLFLAG_RW,
114 * Called from vfsinit()
119 TAILQ_INIT(&vnode_inactive_list);
120 TAILQ_INIT(&vnode_active_list);
121 TAILQ_INSERT_TAIL(&vnode_active_list, &vnode_active_rover, v_list);
122 TAILQ_INSERT_TAIL(&vnode_inactive_list, &vnode_inactive_mid1, v_list);
123 TAILQ_INSERT_TAIL(&vnode_inactive_list, &vnode_inactive_mid2, v_list);
124 TAILQ_INSERT_TAIL(&vnode_inactive_list, &vnode_inactive_rover, v_list);
125 spin_init(&vfs_spin);
126 kmalloc_raise_limit(M_VNODE, 0); /* unlimited */
134 _vsetflags(struct vnode *vp, int flags)
136 atomic_set_int(&vp->v_flag, flags);
141 _vclrflags(struct vnode *vp, int flags)
143 atomic_clear_int(&vp->v_flag, flags);
147 vsetflags(struct vnode *vp, int flags)
149 _vsetflags(vp, flags);
153 vclrflags(struct vnode *vp, int flags)
155 _vclrflags(vp, flags);
159 * Remove the vnode from the inactive list.
161 * _vactivate() may only be called while the vnode lock or VX lock is held.
162 * The vnode spinlock need not be held.
166 _vactivate(struct vnode *vp)
169 if ((ulong)vp == trackvnode)
170 kprintf("_vactivate %p %08x\n", vp, vp->v_flag);
172 spin_lock(&vfs_spin);
173 KKASSERT(vp->v_state == VS_INACTIVE || vp->v_state == VS_CACHED);
174 if (vp->v_state == VS_INACTIVE) {
175 TAILQ_REMOVE(&vnode_inactive_list, vp, v_list);
178 TAILQ_INSERT_TAIL(&vnode_active_list, vp, v_list);
179 vp->v_state = VS_ACTIVE;
181 spin_unlock(&vfs_spin);
185 * Put a vnode on the inactive list. The vnode must not currently reside on
186 * any list (must be VS_CACHED). Vnode should be VINACTIVE.
188 * Caller must hold v_spin
192 _vinactive(struct vnode *vp)
195 if ((ulong)vp == trackvnode) {
196 kprintf("_vinactive %p %08x\n", vp, vp->v_flag);
200 spin_lock(&vfs_spin);
201 KKASSERT(vp->v_state == VS_CACHED);
204 * Distinguish between basically dead vnodes, vnodes with cached
205 * data, and vnodes without cached data. A rover will shift the
206 * vnodes around as their cache status is lost.
208 if (vp->v_flag & VRECLAIMED) {
209 TAILQ_INSERT_HEAD(&vnode_inactive_list, vp, v_list);
210 } else if (vp->v_object && vp->v_object->resident_page_count) {
211 TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_list);
212 } else if (vp->v_object && vp->v_object->swblock_count) {
213 TAILQ_INSERT_BEFORE(&vnode_inactive_mid2, vp, v_list);
215 TAILQ_INSERT_BEFORE(&vnode_inactive_mid1, vp, v_list);
218 vp->v_state = VS_INACTIVE;
219 spin_unlock(&vfs_spin);
224 _vinactive_tail(struct vnode *vp)
227 if ((ulong)vp == trackvnode)
228 kprintf("_vinactive_tail %p %08x\n", vp, vp->v_flag);
230 spin_lock(&vfs_spin);
231 KKASSERT(vp->v_state == VS_CACHED);
232 TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_list);
234 vp->v_state = VS_INACTIVE;
235 spin_unlock(&vfs_spin);
239 * Return a C boolean if we should put the vnode on the inactive list
240 * (VS_INACTIVE) or leave it alone.
242 * This routine is only valid if the vnode is already either VS_INACTIVE or
243 * VS_CACHED, or if it can become VS_INACTIV or VS_CACHED via
246 * WARNING! We used to indicate FALSE if the vnode had an object with
247 * resident pages but we no longer do that because it makes
248 * managing kern.maxvnodes difficult. Instead we rely on vfree()
249 * to place the vnode properly on the list.
251 * WARNING! This functions is typically called with v_spin held.
253 static __inline boolean_t
254 vshouldfree(struct vnode *vp)
256 return (vp->v_auxrefs == 0);
260 * Add a ref to an active vnode. This function should never be called
261 * with an inactive vnode (use vget() instead), but might be called
265 vref(struct vnode *vp)
267 KASSERT((VREFCNT(vp) > 0 && vp->v_state != VS_INACTIVE),
268 ("vref: bad refcnt %08x %d", vp->v_refcnt, vp->v_state));
269 atomic_add_int(&vp->v_refcnt, 1);
273 * Release a ref on an active or inactive vnode.
275 * If VREF_FINALIZE is set this will deactivate the vnode on the 1->0
276 * transition, otherwise we leave the vnode in the active list and
277 * do a lockless transition to 0, which is very important for the
281 vrele(struct vnode *vp)
284 int count = vp->v_refcnt;
286 KKASSERT((count & VREF_MASK) > 0);
291 if ((count & VREF_MASK) > 1) {
292 if (atomic_cmpset_int(&vp->v_refcnt, count, count - 1))
298 * 1->0 transition case must handle possible finalization.
299 * When finalizing we transition 1->0x40000000. Note that
300 * cachedvnodes is only adjusted on transitions to ->0.
302 if (count & VREF_FINALIZE) {
304 if (atomic_cmpset_int(&vp->v_refcnt,
305 count, VREF_TERMINATE)) {
311 if (atomic_cmpset_int(&vp->v_refcnt, count, 0)) {
312 atomic_add_int(&cachedvnodes, 1);
321 * Add an auxiliary data structure reference to the vnode. Auxiliary
322 * references do not change the state of the vnode or prevent them
323 * from being deactivated, reclaimed, or placed on or removed from
326 * An auxiliary reference DOES prevent the vnode from being destroyed,
327 * allowing you to vx_lock() it, test state, etc.
329 * An auxiliary reference DOES NOT move a vnode out of the VS_INACTIVE
330 * state once it has entered it.
332 * WARNING! vhold() must not acquire v_spin. The spinlock may or may not
333 * already be held by the caller. vdrop() will clean up the
337 vhold(struct vnode *vp)
339 atomic_add_int(&vp->v_auxrefs, 1);
343 * Remove an auxiliary reference from the vnode.
345 * vdrop must check for the case where a vnode is held past its reclamation.
346 * We use v_spin to interlock VS_CACHED -> VS_INACTIVE transitions.
349 vdrop(struct vnode *vp)
352 int count = vp->v_auxrefs;
360 if (atomic_cmpset_int(&vp->v_auxrefs, count, count - 1))
366 * 1->0 transition case must check for reclaimed vnodes that
367 * are expected to be placed on the inactive list.
369 * v_spin is required for the 1->0 transition.
371 * 1->0 and 0->1 transitions are allowed to race. The
372 * vp simply remains on the inactive list.
374 spin_lock(&vp->v_spin);
375 if (atomic_cmpset_int(&vp->v_auxrefs, 1, 0)) {
376 if (vp->v_state == VS_CACHED && vshouldfree(vp))
378 spin_unlock(&vp->v_spin);
381 spin_unlock(&vp->v_spin);
387 * This function is called with vp vx_lock'd when the last active reference
388 * on the vnode is released, typically via vrele(). v_refcnt will be set
391 * Additional vrefs are allowed to race but will not result in a reentrant
392 * call to vnode_terminate() due to VREF_TERMINATE.
394 * NOTE: v_mount may be NULL due to assigmment to dead_vnode_vops
396 * NOTE: The vnode may be marked inactive with dirty buffers
397 * or dirty pages in its cached VM object still present.
399 * NOTE: VS_FREE should not be set on entry (the vnode was expected to
400 * previously be active). We lose control of the vnode the instant
401 * it is placed on the free list.
403 * The VX lock is required when transitioning to VS_CACHED but is
404 * not sufficient for the vshouldfree() interlocked test or when
405 * transitioning away from VS_CACHED. v_spin is also required for
409 vnode_terminate(struct vnode *vp)
411 KKASSERT(vp->v_state == VS_ACTIVE);
413 if ((vp->v_flag & VINACTIVE) == 0) {
414 _vsetflags(vp, VINACTIVE);
417 /* might deactivate page */
419 spin_lock(&vp->v_spin);
420 if (vp->v_state == VS_ACTIVE) {
421 spin_lock(&vfs_spin);
422 KKASSERT(vp->v_state == VS_ACTIVE);
423 TAILQ_REMOVE(&vnode_active_list, vp, v_list);
425 vp->v_state = VS_CACHED;
426 spin_unlock(&vfs_spin);
428 if (vp->v_state == VS_CACHED && vshouldfree(vp))
430 spin_unlock(&vp->v_spin);
434 /****************************************************************
435 * VX LOCKING FUNCTIONS *
436 ****************************************************************
438 * These functions lock vnodes for reclamation and deactivation related
439 * activities. The caller must already be holding some sort of reference
445 vx_lock(struct vnode *vp)
447 lockmgr(&vp->v_lock, LK_EXCLUSIVE);
451 * The non-blocking version also uses a slightly different mechanic.
452 * This function will explicitly fail not only if it cannot acquire
453 * the lock normally, but also if the caller already holds a lock.
455 * The adjusted mechanic is used to close a loophole where complex
456 * VOP_RECLAIM code can circle around recursively and allocate the
457 * same vnode it is trying to destroy from the freelist.
459 * Any filesystem (aka UFS) which puts LK_CANRECURSE in lk_flags can
460 * cause the incorrect behavior to occur. If not for that lockmgr()
461 * would do the right thing.
464 vx_lock_nonblock(struct vnode *vp)
466 if (lockcountnb(&vp->v_lock))
468 return(lockmgr(&vp->v_lock, LK_EXCLUSIVE | LK_NOWAIT));
472 vx_unlock(struct vnode *vp)
474 lockmgr(&vp->v_lock, LK_RELEASE);
477 /****************************************************************
478 * VNODE ACQUISITION FUNCTIONS *
479 ****************************************************************
481 * These functions must be used when accessing a vnode that has no
482 * chance of being destroyed in a SMP race. That means the caller will
483 * usually either hold an auxiliary reference (such as the namecache)
484 * or hold some other lock that ensures that the vnode cannot be destroyed.
486 * These functions are MANDATORY for any code chain accessing a vnode
487 * whos activation state is not known.
489 * vget() can be called with LK_NOWAIT and will return EBUSY if the
490 * lock cannot be immediately acquired.
492 * vget()/vput() are used when reactivation is desired.
494 * vx_get() and vx_put() are used when reactivation is not desired.
497 vget(struct vnode *vp, int flags)
502 * A lock type must be passed
504 if ((flags & LK_TYPE_MASK) == 0) {
505 panic("vget() called with no lock specified!");
510 * Reference the structure and then acquire the lock.
512 * NOTE: The requested lock might be a shared lock and does
513 * not protect our access to the refcnt or other fields.
515 if (atomic_fetchadd_int(&vp->v_refcnt, 1) == 0)
516 atomic_add_int(&cachedvnodes, -1);
518 if ((error = vn_lock(vp, flags)) != 0) {
520 * The lock failed, undo and return an error. This will not
521 * normally trigger a termination.
524 } else if (vp->v_flag & VRECLAIMED) {
526 * The node is being reclaimed and cannot be reactivated
527 * any more, undo and return ENOENT.
532 } else if (vp->v_state == VS_ACTIVE) {
534 * A VS_ACTIVE vnode coupled with the fact that we have
535 * a vnode lock (even if shared) prevents v_state from
536 * changing. Since the vnode is not in a VRECLAIMED state,
537 * we can safely clear VINACTIVE.
539 * NOTE! Multiple threads may clear VINACTIVE if this is
540 * shared lock. This race is allowed.
542 _vclrflags(vp, VINACTIVE);
546 * If the vnode is not VS_ACTIVE it must be reactivated
547 * in addition to clearing VINACTIVE. An exclusive spin_lock
548 * is needed to manipulate the vnode's list.
550 * Because the lockmgr lock might be shared, we might race
551 * another reactivation, which we handle. In this situation,
552 * however, the refcnt prevents other v_state races.
554 * As with above, clearing VINACTIVE is allowed to race other
555 * clearings of VINACTIVE.
557 _vclrflags(vp, VINACTIVE);
558 spin_lock(&vp->v_spin);
560 switch(vp->v_state) {
563 atomic_clear_int(&vp->v_refcnt, VREF_TERMINATE);
564 spin_unlock(&vp->v_spin);
568 atomic_clear_int(&vp->v_refcnt, VREF_TERMINATE);
569 spin_unlock(&vp->v_spin);
572 spin_unlock(&vp->v_spin);
575 spin_unlock(&vp->v_spin);
576 panic("Impossible VS_DYING state");
587 debug_vput(struct vnode *vp, const char *filename, int line)
589 kprintf("vput(%p) %s:%d\n", vp, filename, line);
600 vput(struct vnode *vp)
609 * Acquire the vnode lock unguarded.
611 * XXX The vx_*() locks should use auxrefs, not the main reference counter.
614 vx_get(struct vnode *vp)
616 if (atomic_fetchadd_int(&vp->v_refcnt, 1) == 0)
617 atomic_add_int(&cachedvnodes, -1);
618 lockmgr(&vp->v_lock, LK_EXCLUSIVE);
622 vx_get_nonblock(struct vnode *vp)
626 error = lockmgr(&vp->v_lock, LK_EXCLUSIVE | LK_NOWAIT);
628 if (atomic_fetchadd_int(&vp->v_refcnt, 1) == 0)
629 atomic_add_int(&cachedvnodes, -1);
635 * Relase a VX lock that also held a ref on the vnode.
637 * vx_put needs to check for VS_CACHED->VS_INACTIVE transitions to catch
638 * the case where e.g. vnlru issues a vgone*(), but should otherwise
639 * not mess with the v_state.
642 vx_put(struct vnode *vp)
644 if (vp->v_state == VS_CACHED && vshouldfree(vp))
646 lockmgr(&vp->v_lock, LK_RELEASE);
651 * The rover looks for vnodes past the midline with no cached data and
652 * moves them to before the midline. If we do not do this the midline
653 * can wind up in a degenerate state.
657 vnode_free_rover_scan_locked(void)
662 * Get the vnode after the rover. The rover roves between mid1 and
663 * the end so the only special vnode it can encounter is mid2.
665 vp = TAILQ_NEXT(&vnode_inactive_rover, v_list);
666 if (vp == &vnode_inactive_mid2) {
667 vp = TAILQ_NEXT(vp, v_list);
668 rover_state = ROVER_MID2;
670 KKASSERT(vp != &vnode_inactive_mid1);
673 * Start over if we finished the scan.
675 TAILQ_REMOVE(&vnode_inactive_list, &vnode_inactive_rover, v_list);
677 TAILQ_INSERT_AFTER(&vnode_inactive_list, &vnode_inactive_mid1,
678 &vnode_inactive_rover, v_list);
679 rover_state = ROVER_MID1;
682 TAILQ_INSERT_AFTER(&vnode_inactive_list, vp,
683 &vnode_inactive_rover, v_list);
686 * Shift vp if appropriate.
688 if (vp->v_object && vp->v_object->resident_page_count) {
690 * Promote vnode with resident pages to section 3.
692 if (rover_state == ROVER_MID1) {
693 TAILQ_REMOVE(&vnode_inactive_list, vp, v_list);
694 TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_list);
696 } else if (vp->v_object && vp->v_object->swblock_count) {
698 * Demote vnode with only swap pages to section 2
700 if (rover_state == ROVER_MID2) {
701 TAILQ_REMOVE(&vnode_inactive_list, vp, v_list);
702 TAILQ_INSERT_BEFORE(&vnode_inactive_mid2, vp, v_list);
706 * Demote vnode with no cached data to section 1
708 TAILQ_REMOVE(&vnode_inactive_list, vp, v_list);
709 TAILQ_INSERT_BEFORE(&vnode_inactive_mid1, vp, v_list);
714 * Called from vnlru_proc()
717 vnode_free_rover_scan(int count)
719 spin_lock(&vfs_spin);
722 vnode_free_rover_scan_locked();
724 spin_unlock(&vfs_spin);
728 * Try to reuse a vnode from the free list. This function is somewhat
729 * advisory in that NULL can be returned as a normal case, even if free
730 * vnodes are present.
732 * The scan is limited because it can result in excessive CPU use during
733 * periods of extreme vnode use.
735 * NOTE: The returned vnode is not completely initialized.
741 cleanfreevnode(int maxcount)
747 * Try to deactivate some vnodes cached on the active list.
749 for (count = 0; count < maxcount; count++) {
750 if (cachedvnodes - inactivevnodes < inactivevnodes)
753 spin_lock(&vfs_spin);
754 vp = TAILQ_NEXT(&vnode_active_rover, v_list);
755 TAILQ_REMOVE(&vnode_active_list, &vnode_active_rover, v_list);
757 TAILQ_INSERT_HEAD(&vnode_active_list,
758 &vnode_active_rover, v_list);
760 TAILQ_INSERT_AFTER(&vnode_active_list, vp,
761 &vnode_active_rover, v_list);
763 if (vp == NULL || vp->v_refcnt != 0) {
764 spin_unlock(&vfs_spin);
769 * Try to deactivate the vnode.
771 if (atomic_fetchadd_int(&vp->v_refcnt, 1) == 0)
772 atomic_add_int(&cachedvnodes, -1);
773 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);
774 spin_unlock(&vfs_spin);
779 * Loop trying to lock the first vnode on the free list.
782 * We use a bad hack in vx_lock_nonblock() which avoids
783 * the lock order reversal between vfs_spin and v_spin.
784 * This is very fragile code and I don't want to use
787 for (count = 0; count < maxcount; count++) {
788 spin_lock(&vfs_spin);
789 vnode_free_rover_scan_locked();
790 vnode_free_rover_scan_locked();
791 vp = TAILQ_FIRST(&vnode_inactive_list);
792 while (vp == &vnode_inactive_mid1 ||
793 vp == &vnode_inactive_mid2 ||
794 vp == &vnode_inactive_rover) {
795 vp = TAILQ_NEXT(vp, v_list);
798 spin_unlock(&vfs_spin);
801 if (vx_lock_nonblock(vp)) {
802 KKASSERT(vp->v_state == VS_INACTIVE);
803 TAILQ_REMOVE(&vnode_inactive_list, vp, v_list);
804 TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_list);
805 spin_unlock(&vfs_spin);
810 * The vnode should be inactive (VREF_TERMINATE should still
811 * be set in v_refcnt). Since we pulled it from the inactive
812 * list it should obviously not be VS_CACHED. Activate the
815 * Once removed from the inactive list we inherit the
816 * VREF_TERMINATE which prevents loss of control while
817 * we mess with the vnode.
819 KKASSERT(vp->v_state == VS_INACTIVE);
820 TAILQ_REMOVE(&vnode_inactive_list, vp, v_list);
822 vp->v_state = VS_DYING;
823 spin_unlock(&vfs_spin);
825 if ((ulong)vp == trackvnode)
826 kprintf("cleanfreevnode %p %08x\n", vp, vp->v_flag);
829 * Do not reclaim/reuse a vnode while auxillary refs exists.
830 * This includes namecache refs due to a related ncp being
831 * locked or having children, a VM object association, or
834 * We will make this test several times as auxrefs can
835 * get incremented on us without any spinlocks being held
836 * until we have removed all namecache and inode references
839 * The inactive list association reinherits the v_refcnt.
842 vp->v_state = VS_CACHED;
848 KKASSERT(vp->v_flag & VINACTIVE);
849 KKASSERT(vp->v_refcnt & VREF_TERMINATE);
852 * Holding the VX lock on an inactive vnode prevents it
853 * from being reactivated or reused. New namecache
854 * associations can only be made using active vnodes.
856 * Another thread may be blocked on our vnode lock while
857 * holding a namecache lock. We can only reuse this vnode
858 * if we can clear all namecache associations without
861 * Because VCACHED is already in the correct state (cleared)
862 * we cannot race other vdrop()s occuring at the same time
863 * and can safely place vp on the free list.
865 if ((vp->v_flag & VRECLAIMED) == 0) {
866 if (cache_inval_vp_nonblock(vp)) {
867 vp->v_state = VS_CACHED;
873 /* vnode is still VX locked */
877 * We can destroy the vnode if no primary or auxiliary
878 * references remain other then ours, else put it
879 * back on the free list and keep looking.
881 * Either the free list inherits the last reference
882 * or we fall through and sysref_activate() the last
885 * Since the vnode is in a VRECLAIMED state, no new
886 * namecache associations could have been made.
888 KKASSERT(vp->v_state == VS_DYING);
889 KKASSERT(TAILQ_EMPTY(&vp->v_namecache));
891 (vp->v_refcnt & ~VREF_FINALIZE) != VREF_TERMINATE) {
892 vp->v_state = VS_CACHED;
899 * Nothing should have been able to access this vp.
901 atomic_clear_int(&vp->v_refcnt, VREF_TERMINATE|VREF_FINALIZE);
902 KASSERT(vp->v_refcnt == 0,
903 ("vp %p badrefs %08x", vp, vp->v_refcnt));
906 * Return a VX locked vnode suitable for reuse. The caller
907 * inherits the sysref.
909 KKASSERT(vp->v_state == VS_DYING);
916 * Obtain a new vnode. The returned vnode is VX locked & vrefd.
918 * All new vnodes set the VAGE flags. An open() of the vnode will
919 * decrement the (2-bit) flags. Vnodes which are opened several times
920 * are thus retained in the cache over vnodes which are merely stat()d.
922 * We always allocate the vnode. Attempting to recycle existing vnodes
923 * here can lead to numerous deadlocks, particularly with softupdates.
926 allocvnode(int lktimeout, int lkflags)
931 * Do not flag for recyclement unless there are enough free vnodes
932 * to recycle and the number of vnodes has exceeded our target.
934 if (inactivevnodes >= wantfreevnodes && numvnodes >= desiredvnodes) {
935 struct thread *td = curthread;
937 atomic_set_int(&td->td_lwp->lwp_mpflags, LWP_MP_VNLRU);
940 vp = kmalloc(sizeof(*vp), M_VNODE, M_ZERO | M_WAITOK);
942 lwkt_token_init(&vp->v_token, "vnode");
943 lockinit(&vp->v_lock, "vnode", 0, 0);
944 TAILQ_INIT(&vp->v_namecache);
945 RB_INIT(&vp->v_rbclean_tree);
946 RB_INIT(&vp->v_rbdirty_tree);
947 RB_INIT(&vp->v_rbhash_tree);
948 spin_init(&vp->v_spin);
950 lockmgr(&vp->v_lock, LK_EXCLUSIVE);
951 atomic_add_int(&numvnodes, 1);
953 vp->v_flag = VAGE0 | VAGE1;
956 * lktimeout only applies when LK_TIMELOCK is used, and only
957 * the pageout daemon uses it. The timeout may not be zero
958 * or the pageout daemon can deadlock in low-VM situations.
962 lockreinit(&vp->v_lock, "vnode", lktimeout, lkflags);
963 KKASSERT(TAILQ_EMPTY(&vp->v_namecache));
964 /* exclusive lock still held */
966 vp->v_filesize = NOOFFSET;
969 vp->v_state = VS_CACHED;
976 * Called after a process has allocated a vnode via allocvnode()
977 * and we detected that too many vnodes were present.
979 * Try to reuse vnodes if we hit the max. This situation only
980 * occurs in certain large-memory (2G+) situations on 32 bit systems,
981 * or if kern.maxvnodes is set to very low values.
983 * This function is called just prior to a return to userland if the
984 * process at some point had to allocate a new vnode during the last
985 * system call and the vnode count was found to be excessive.
987 * WARNING: Sometimes numvnodes can blow out due to children being
988 * present under directory vnodes in the namecache. For the
989 * moment use an if() instead of a while() and note that if
990 * we were to use a while() we would still have to break out
991 * if freesomevnodes() returned 0.
996 if (numvnodes > desiredvnodes && cachedvnodes > wantfreevnodes)
997 freesomevnodes(batchfreevnodes);
1004 freesomevnodes(int n)
1010 if ((vp = cleanfreevnode(n * 2)) == NULL)
1015 atomic_add_int(&numvnodes, -1);