kernel - Fix improper assertion panic in vinvalbuf()
authorMatthew Dillon <dillon@apollo.backplane.com>
Sat, 8 Dec 2012 22:22:15 +0000 (14:22 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sat, 8 Dec 2012 22:22:15 +0000 (14:22 -0800)
* Related to the removal of vhold/vdrop from buffer cache buffers, the
  state of a vnode being cleaned can now contain more active buffers and
  I/O's at the time of the vinvalbuf() call.

* Remove the 'vinvalbuf: dirty bufs' assertion and panic.  It is no longer
  a correct assertion.  Note that we've also had sporatic reports of this
  panic even prior to the work so it might not have been a completely
  correct assertion before either.

* Rework the vinvalbuf buffer-flushing and I/O-waiting code a bit.  We
  have to wait for I/O at least once and it is probably a good idea to
  wait for I/O after each buffer flush pass too, to avoid live-locks.

Reported-by: vsrinivas
sys/kern/vfs_subr.c

index f8ae7c3..acbb590 100644 (file)
@@ -304,7 +304,7 @@ vinvalbuf(struct vnode *vp, int flags, int slpflag, int slptimeo)
                if (!RB_EMPTY(&vp->v_rbdirty_tree)) {
                        if ((error = VOP_FSYNC(vp, MNT_WAIT, 0)) != 0)
                                goto done;
-
+#if 0
                        /*
                         * Dirty bufs may be left or generated via races
                         * in circumstances where vinvalbuf() is called on
@@ -316,6 +316,7 @@ vinvalbuf(struct vnode *vp, int flags, int slpflag, int slptimeo)
                            !RB_EMPTY(&vp->v_rbdirty_tree))) {
                                panic("vinvalbuf: dirty bufs");
                        }
+#endif
                }
        }
        info.slptimeo = slptimeo;
@@ -326,30 +327,34 @@ vinvalbuf(struct vnode *vp, int flags, int slpflag, int slptimeo)
        info.vp = vp;
 
        /*
-        * Flush the buffer cache until nothing is left.
+        * Flush the buffer cache until nothing is left, wait for all I/O
+        * to complete.  At least one pass is required.  We might block
+        * in the pip code so we have to re-check.  Order is important.
         */
-       while (!RB_EMPTY(&vp->v_rbclean_tree) || 
-              !RB_EMPTY(&vp->v_rbdirty_tree)) {
-               info.clean = 1;
-               error = RB_SCAN(buf_rb_tree, &vp->v_rbclean_tree, NULL,
-                               vinvalbuf_bp, &info);
-               if (error == 0) {
+       do {
+               /*
+                * Flush buffer cache
+                */
+               if (!RB_EMPTY(&vp->v_rbclean_tree)) {
+                       info.clean = 1;
+                       error = RB_SCAN(buf_rb_tree, &vp->v_rbclean_tree,
+                                       NULL, vinvalbuf_bp, &info);
+               }
+               if (!RB_EMPTY(&vp->v_rbdirty_tree)) {
                        info.clean = 0;
-                       error = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL,
-                                       vinvalbuf_bp, &info);
+                       error = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree,
+                                       NULL, vinvalbuf_bp, &info);
                }
-       }
 
-       /*
-        * Wait for I/O completion.  We may block in the pip code so we have
-        * to re-check.
-        */
-       do {
+               /*
+                * Wait for I/O completion.
+                */
                bio_track_wait(&vp->v_track_write, 0, 0);
-               if ((object = vp->v_object) != NULL) {
+               if ((object = vp->v_object) != NULL)
                        refcount_wait(&object->paging_in_progress, "vnvlbx");
-               }
-       } while (bio_track_active(&vp->v_track_write));
+       } while (bio_track_active(&vp->v_track_write) ||
+                !RB_EMPTY(&vp->v_rbclean_tree) ||
+                !RB_EMPTY(&vp->v_rbdirty_tree));
 
        /*
         * Destroy the copy in the VM cache, too.