kernel - Add debugging and attempt to fix vm_prefault issue
authorMatthew Dillon <dillon@apollo.backplane.com>
Thu, 14 Jul 2011 01:21:10 +0000 (18:21 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Thu, 14 Jul 2011 01:21:10 +0000 (18:21 -0700)
* Add debugging assertions and attempt to fix a race in the vm_prefault
  code when running through backing_object chains.

* The fix may be incomplete, we really need a way to determine whether any
  chain element has changed state during the scan.  The generation count
  may be too excessive as it also covers vm_page insertions.

Reported-by: Peter Avalos <peter@theshell.com>
sys/vm/vm_fault.c
sys/vm/vm_object.c
sys/vm/vm_object.h

index f5f5f25..4db9b4f 100644 (file)
@@ -2012,7 +2012,11 @@ vm_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry, int prot)
        if (lp == NULL || (pmap != vmspace_pmap(lp->lwp_vmspace)))
                return;
 
+       lwkt_gettoken(&vm_token);
+
        object = entry->object.vm_object;
+       KKASSERT(object != NULL);
+       vm_object_hold(object);
 
        starta = addra - PFBAK * PAGE_SIZE;
        if (starta < entry->start)
@@ -2020,9 +2024,10 @@ vm_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry, int prot)
        else if (starta > addra)
                starta = 0;
 
-       lwkt_gettoken(&vm_token);
+       KKASSERT(object == entry->object.vm_object);
        for (i = 0; i < PAGEORDER_SIZE; i++) {
                vm_object_t lobject;
+               vm_object_t nobject;
                int allocated = 0;
 
                addr = addra + vm_prefault_pageorder[i];
@@ -2047,12 +2052,18 @@ vm_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry, int prot)
                 * In order to not have to check the pager via *haspage*()
                 * we stop if any non-default object is encountered.  e.g.
                 * a vnode or swap object would stop the loop.
+                *
+                * XXX It is unclear whether hold chaining is sufficient
+                *     to maintain the validity of the backing object chain.
                 */
                index = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
                lobject = object;
                pindex = index;
                pprot = prot;
 
+               KKASSERT(lobject == entry->object.vm_object);
+               vm_object_hold(lobject);
+
                while ((m = vm_page_lookup(lobject, pindex)) == NULL) {
                        if (lobject->type != OBJT_DEFAULT)
                                break;
@@ -2064,7 +2075,8 @@ vm_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry, int prot)
                                    vm_page_count_min(0)) {
                                        break;
                                }
-                               /* note: allocate from base object */
+
+                               /* NOTE: allocated from base object */
                                m = vm_page_alloc(object, index,
                                              VM_ALLOC_NORMAL | VM_ALLOC_ZERO);
 
@@ -2072,7 +2084,8 @@ vm_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry, int prot)
                                        vm_page_zero_fill(m);
                                } else {
 #ifdef PMAP_DEBUG
-                                       pmap_page_assertzero(VM_PAGE_TO_PHYS(m));
+                                       pmap_page_assertzero(
+                                                       VM_PAGE_TO_PHYS(m));
 #endif
                                        vm_page_flag_clear(m, PG_ZERO);
                                        mycpu->gd_cnt.v_ozfod++;
@@ -2086,10 +2099,29 @@ vm_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry, int prot)
                        }
                        if (lobject->backing_object_offset & PAGE_MASK)
                                break;
-                       pindex += lobject->backing_object_offset >> PAGE_SHIFT;
-                       lobject = lobject->backing_object;
+                       while ((nobject = lobject->backing_object) != NULL) {
+                               vm_object_hold(nobject);
+                               if (nobject == lobject->backing_object) {
+                                       pindex +=
+                                           lobject->backing_object_offset >>
+                                           PAGE_SHIFT;
+                                       vm_object_lock_swap();
+                                       vm_object_drop(lobject);
+                                       lobject = nobject;
+                                       break;
+                               }
+                               vm_object_drop(nobject);
+                       }
+                       if (nobject == NULL) {
+                               kprintf("vm_prefault: Warning, backing object "
+                                       "race averted lobject %p\n",
+                                       lobject);
+                               continue;
+                       }
                        pprot &= ~VM_PROT_WRITE;
                }
+               vm_object_drop(lobject);
+
                /*
                 * NOTE: lobject now invalid (if we did a zero-fill we didn't
                 *       bother assigning lobject = object).
@@ -2127,10 +2159,14 @@ vm_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry, int prot)
                        pmap_enter(pmap, addr, m, pprot, 0);
                        vm_page_deactivate(m);
                        vm_page_wakeup(m);
-               } else if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
+               } else if (
+                   ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
                    (m->busy == 0) &&
                    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
-
+                       /*
+                        * A fully valid page not undergoing soft I/O can
+                        * be immediately entered into the pmap.
+                        */
                        vm_page_busy(m);
                        if ((m->queue - m->pc) == PQ_CACHE) {
                                vm_page_deactivate(m);
@@ -2141,5 +2177,6 @@ vm_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry, int prot)
                        vm_page_wakeup(m);
                }
        }
+       vm_object_drop(object);
        lwkt_reltoken(&vm_token);
 }
index c197b5e..9a298bf 100644 (file)
@@ -163,7 +163,7 @@ vm_object_lock_init(vm_object_t obj)
 #endif
 }
 
-static void
+void
 vm_object_lock_swap(void)
 {
        lwkt_token_swap();
index ce4e307..9a66a83 100644 (file)
@@ -283,6 +283,7 @@ void vm_object_init2 (void);
 vm_page_t vm_fault_object_page(vm_object_t, vm_ooffset_t, vm_prot_t, int, int *);
 void vm_object_dead_sleep(vm_object_t, const char *);
 void vm_object_dead_wakeup(vm_object_t);
+void vm_object_lock_swap(void);
 void vm_object_lock(vm_object_t);
 void vm_object_unlock(vm_object_t);