kernel - Fix a race and enable the VM read shortcut feature by default
authorMatthew Dillon <dillon@apollo.backplane.com>
Mon, 18 Feb 2013 20:08:29 +0000 (12:08 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Mon, 18 Feb 2013 20:08:29 +0000 (12:08 -0800)
* Fix a lookup/access race.  No known cases hit the race but decided
  it needed to be fixed for safety.

  Instead of looking up and holding the VM page we know try to busy it,
  and only access the content if we are able to do so non-blocking.
  This costs a bit more in overhead but handles the page more properly.

  /usr/obj/usr/src
  time tar cf /dev/null .

  0.734u 5.781s 0:06.51 100.0%    24+66k 0+0io 0pf+0w (shorcut disabled)
  0.664u 2.382s 0:03.05 99.6%     24+66k 0+0io 0pf+0w (shorcut enabled)

* Default vm.read_shortcut_enable to 1.  The feature is now enabled by
  default.

* The feature has been in the tree a while default disabled and needs wider
  use, so it is being enabled by default.  The feature is only useful on
  64-bit systems (i.e. so the DMAP can be used).  It allows the buffer
  cache and the VM page mapping code to be completely bypassed in situations
  where the file data is available in the VM page cache.

sys/kern/vfs_helper.c

index 80efceb..bf62f9d 100644 (file)
@@ -64,7 +64,7 @@
 
 #ifdef LWBUF_IS_OPTIMAL
 
 
 #ifdef LWBUF_IS_OPTIMAL
 
-static int vm_read_shortcut_enable = 0;
+static int vm_read_shortcut_enable = 1;
 static long vm_read_shortcut_count;
 static long vm_read_shortcut_failed;
 SYSCTL_INT(_vm, OID_AUTO, read_shortcut_enable, CTLFLAG_RW,
 static long vm_read_shortcut_count;
 static long vm_read_shortcut_failed;
 SYSCTL_INT(_vm, OID_AUTO, read_shortcut_enable, CTLFLAG_RW,
@@ -358,27 +358,23 @@ vop_helper_read_shortcut(struct vop_read_args *ap)
                if (n == 0)
                        break;  /* hit EOF */
 
                if (n == 0)
                        break;  /* hit EOF */
 
-               m = vm_page_lookup(obj, OFF_TO_IDX(uio->uio_offset));
-               if (m == NULL) {
+               m = vm_page_lookup_busy_try(obj, OFF_TO_IDX(uio->uio_offset),
+                                           FALSE, &error);
+               if (error || m == NULL) {
                        ++vm_read_shortcut_failed;
                        ++vm_read_shortcut_failed;
-                       break;
-               }
-               vm_page_hold(m);
-               if (m->flags & PG_BUSY) {
-                       ++vm_read_shortcut_failed;
-                       vm_page_unhold(m);
+                       error = 0;
                        break;
                }
                if ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
                        ++vm_read_shortcut_failed;
                        break;
                }
                if ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
                        ++vm_read_shortcut_failed;
-                       vm_page_unhold(m);
+                       vm_page_wakeup(m);
                        break;
                }
                lwb = lwbuf_alloc(m, &lwb_cache);
                error = uiomove((char *)lwbuf_kva(lwb) + offset, n, uio);
                vm_page_flag_set(m, PG_REFERENCED);
                lwbuf_free(lwb);
                        break;
                }
                lwb = lwbuf_alloc(m, &lwb_cache);
                error = uiomove((char *)lwbuf_kva(lwb) + offset, n, uio);
                vm_page_flag_set(m, PG_REFERENCED);
                lwbuf_free(lwb);
-               vm_page_unhold(m);
+               vm_page_wakeup(m);
        }
        vm_object_drop(obj);
 
        }
        vm_object_drop(obj);