kernel - Address excessive stall in pageout during deadlock avoidance
authorMatthew Dillon <dillon@apollo.backplane.com>
Thu, 1 Apr 2010 18:06:07 +0000 (11:06 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Thu, 1 Apr 2010 18:09:55 +0000 (11:09 -0700)
* The pageout daemon uses LK_TIMELOCK to avoid deadlocking on a vnode
  that might be held locked during a pfault, which waits up to 1/10 of
  a second.

  If the vnode has a large number of pageable pages each page may go
  through the timeout.  This can result in the pageout daemon stalling
  for an excessive amount of time.

  Reduce instances of the problem by remembering the last vnode which
  failed its timelock and using LK_NOWAIT for later pages.  A single
  vnode is remembered for now.

Reported-by: Francois Tigeot <ftigeot@wolfpond.org>
sys/vm/vm_pageout.c

index 691824b..a32ecf7 100644 (file)
@@ -696,6 +696,7 @@ vm_pageout_scan(int pass)
        struct vm_pageout_scan_info info;
        vm_page_t m, next;
        struct vm_page marker;
+       struct vnode *vpfailed;         /* warning, allowed to be stale */
        int maxscan, pcount;
        int recycle_count;
        int inactive_shortage, active_shortage;
@@ -760,6 +761,7 @@ vm_pageout_scan(int pass)
         */
        crit_enter();
 rescan0:
+       vpfailed = NULL;
        maxscan = vmstats.v_inactive_count;
        for (m = TAILQ_FIRST(&vm_page_queues[PQ_INACTIVE].pl);
             m != NULL && maxscan-- > 0 && inactive_shortage > 0;
@@ -946,12 +948,23 @@ rescan0:
                         * vm_wait while holding this vnode.  We skip the 
                         * vnode if we can't get it in a reasonable amount
                         * of time.
+                        *
+                        * vpfailed is used to (try to) avoid the case where
+                        * a large number of pages are associated with a
+                        * locked vnode, which could cause the pageout daemon
+                        * to stall for an excessive amount of time.
                         */
-
                        if (object->type == OBJT_VNODE) {
-                               vp = object->handle;
+                               int flags;
 
-                               if (vget(vp, LK_EXCLUSIVE|LK_NOOBJ|LK_TIMELOCK)) {
+                               vp = object->handle;
+                               flags = LK_EXCLUSIVE | LK_NOOBJ;
+                               if (vp == vpfailed)
+                                       flags |= LK_NOWAIT;
+                               else
+                                       flags |= LK_TIMELOCK;
+                               if (vget(vp, flags) != 0) {
+                                       vpfailed = vp;
                                        ++pageout_lock_miss;
                                        if (object->flags & OBJ_MIGHTBEDIRTY)
                                                    vnodes_skipped++;