Add a new procedure, vm_fault_page(), which does all actions related to
authorMatthew Dillon <dillon@dragonflybsd.org>
Sat, 6 Jan 2007 22:35:47 +0000 (22:35 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Sat, 6 Jan 2007 22:35:47 +0000 (22:35 +0000)
faulting in a VM page given a vm_map and virtual address, include any
necessary I/O, but returns the held page instead of entering it into a pmap.

Use the new function in procfs_rwmem, allowing gdb to 'see' memory that
is governed by a virtual page table.

sys/vfs/procfs/procfs_mem.c
sys/vm/vm_extern.h
sys/vm/vm_fault.c

index 70819b7..67cbd09 100644 (file)
@@ -38,7 +38,7 @@
  *     @(#)procfs_mem.c        8.5 (Berkeley) 6/15/94
  *
  * $FreeBSD: src/sys/miscfs/procfs/procfs_mem.c,v 1.46.2.3 2002/01/22 17:22:59 nectar Exp $
- * $DragonFly: src/sys/vfs/procfs/procfs_mem.c,v 1.12 2006/12/28 21:24:02 dillon Exp $
+ * $DragonFly: src/sys/vfs/procfs/procfs_mem.c,v 1.13 2007/01/06 22:35:46 dillon Exp $
  */
 
 /*
@@ -103,15 +103,8 @@ procfs_rwmem(struct proc *curp, struct proc *p, struct uio *uio)
         * makes things easier.  This way is trivial - right?
         */
        do {
-               vm_map_t tmap;
                vm_offset_t uva;
                int page_offset;                /* offset into page */
-               vm_map_entry_t out_entry;
-               vm_prot_t out_prot;
-               boolean_t wired;
-               vm_pindex_t pindex;
-               vm_object_t object;
-               vm_object_t nobject;
                u_int len;
                vm_page_t m;
 
@@ -131,68 +124,10 @@ procfs_rwmem(struct proc *curp, struct proc *p, struct uio *uio)
                /*
                 * Fault the page on behalf of the process
                 */
-               error = vm_fault(map, pageno, reqprot, VM_FAULT_NORMAL);
+               m = vm_fault_page(map, pageno, reqprot,
+                                 VM_FAULT_NORMAL, &error);
                if (error) {
-                       error = EFAULT;
-                       break;
-               }
-
-               /*
-                * Now we need to get the page.  out_entry, out_prot, wired,
-                * and single_use aren't used.  One would think the vm code
-                * would be a *bit* nicer...  We use tmap because
-                * vm_map_lookup() can change the map argument.
-                */
-               tmap = map;
-               error = vm_map_lookup(&tmap, pageno, reqprot,
-                             &out_entry, &object, &pindex, &out_prot,
-                             &wired);
-
-               if (error) {
-                       error = EFAULT;
-                       break;
-               }
-
-               /*
-                * spl protection is required to avoid interrupt freeing
-                * races, reference the object to avoid it being ripped
-                * out from under us if we block.
-                */
-               crit_enter();
-               vm_object_reference(object);
-again:
-               m = vm_page_lookup(object, pindex);
-
-               /*
-                * Allow fallback to backing objects if we are reading
-                */
-               while (m == NULL && !writing && object->backing_object) {
-                       pindex += OFF_TO_IDX(object->backing_object_offset);
-                       nobject = object->backing_object;
-                       vm_object_reference(nobject);
-                       vm_object_deallocate(object);
-                       object = nobject;
-                       m = vm_page_lookup(object, pindex);
-               }
-
-               /*
-                * Wait for any I/O's to complete, then hold the page
-                * so we can release the spl.
-                */
-               if (m) {
-                       if (vm_page_sleep_busy(m, FALSE, "rwmem"))
-                               goto again;
-                       vm_page_hold(m);
-               }
-               crit_exit();
-
-               /*
-                * We no longer need the object.  If we do not have a page
-                * then cleanup.
-                */
-               vm_object_deallocate(object);
-               if (m == NULL) {
-                       vm_map_lookup_done(tmap, out_entry, 0);
+                       KKASSERT(m == NULL);
                        error = EFAULT;
                        break;
                }
@@ -201,7 +136,6 @@ again:
                 * Cleanup tmap then create a temporary KVA mapping and
                 * do the I/O.
                 */
-               vm_map_lookup_done(tmap, out_entry, 0);
                pmap_kenter(kva, VM_PAGE_TO_PHYS(m));
                error = uiomove((caddr_t)(kva + page_offset), len, uio);
                pmap_kremove(kva);
index 76748b8..76f5a09 100644 (file)
@@ -32,7 +32,7 @@
  *
  *     @(#)vm_extern.h 8.2 (Berkeley) 1/12/94
  * $FreeBSD: src/sys/vm/vm_extern.h,v 1.46.2.3 2003/01/13 22:51:17 dillon Exp $
- * $DragonFly: src/sys/vm/vm_extern.h,v 1.21 2006/12/28 21:24:02 dillon Exp $
+ * $DragonFly: src/sys/vm/vm_extern.h,v 1.22 2007/01/06 22:35:47 dillon Exp $
  */
 
 #ifndef _VM_VM_EXTERN_H_
@@ -89,6 +89,7 @@ int swaponvp (struct thread *, struct vnode *, u_long);
 void swapout_procs (int);
 int useracc(c_caddr_t, int, int);
 int vm_fault (vm_map_t, vm_offset_t, vm_prot_t, int);
+vm_page_t vm_fault_page (vm_map_t, vm_offset_t, vm_prot_t, int, int *);
 void vm_fault_copy_entry (vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t);
 void vm_fault_unwire (vm_map_t, vm_map_entry_t);
 int vm_fault_wire (vm_map_t, vm_map_entry_t, boolean_t);
index 25be785..e0a3af5 100644 (file)
@@ -67,7 +67,7 @@
  * rights to redistribute these changes.
  *
  * $FreeBSD: src/sys/vm/vm_fault.c,v 1.108.2.8 2002/02/26 05:49:27 silby Exp $
- * $DragonFly: src/sys/vm/vm_fault.c,v 1.34 2007/01/01 22:51:18 corecode Exp $
+ * $DragonFly: src/sys/vm/vm_fault.c,v 1.35 2007/01/06 22:35:47 dillon Exp $
  */
 
 /*
@@ -403,6 +403,172 @@ RetryFault:
        return (KERN_SUCCESS);
 }
 
+/*
+ * Fault-in the specified virtual address in the specified map, doing all
+ * necessary manipulation of the object store and all necessary I/O.  Return
+ * a held VM page or NULL, and set *errorp.  The related pmap is not
+ * updated.
+ *
+ * Since the pmap is not updated, this routine may not be used to wire
+ * the page.
+ */
+vm_page_t
+vm_fault_page(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+             int fault_flags, int *errorp)
+{
+       int result;
+       vm_pindex_t first_pindex;
+       struct faultstate fs;
+
+       mycpu->gd_cnt.v_vm_faults++;
+
+       fs.didlimit = 0;
+       fs.hardfault = 0;
+       fs.fault_flags = fault_flags;
+       KKASSERT((fault_flags & VM_FAULT_WIRE_MASK) == 0);
+
+RetryFault:
+       /*
+        * Find the vm_map_entry representing the backing store and resolve
+        * the top level object and page index.  This may have the side
+        * effect of executing a copy-on-write on the map entry and/or
+        * creating a shadow object, but will not COW any actual VM pages.
+        *
+        * On success fs.map is left read-locked and various other fields 
+        * are initialized but not otherwise referenced or locked.
+        *
+        * NOTE!  vm_map_lookup will upgrade the fault_type to VM_FAULT_WRITE
+        * if the map entry is a virtual page table and also writable,
+        * so we can set the 'A'accessed bit in the virtual page table entry.
+        */
+       fs.map = map;
+       result = vm_map_lookup(&fs.map, vaddr, fault_type,
+                              &fs.entry, &fs.first_object,
+                              &first_pindex, &fs.first_prot, &fs.wired);
+
+       if (result != KERN_SUCCESS) {
+               *errorp = result;
+               return (NULL);
+       }
+
+       /*
+        * fs.map is read-locked
+        *
+        * Misc checks.  Save the map generation number to detect races.
+        */
+       fs.map_generation = fs.map->timestamp;
+
+       if (fs.entry->eflags & MAP_ENTRY_NOFAULT) {
+               panic("vm_fault: fault on nofault entry, addr: %lx",
+                   (u_long)vaddr);
+       }
+
+       /*
+        * A system map entry may return a NULL object.  No object means
+        * no pager means an unrecoverable kernel fault.
+        */
+       if (fs.first_object == NULL) {
+               panic("vm_fault: unrecoverable fault at %p in entry %p",
+                       (void *)vaddr, fs.entry);
+       }
+
+       /*
+        * Make a reference to this object to prevent its disposal while we
+        * are messing with it.  Once we have the reference, the map is free
+        * to be diddled.  Since objects reference their shadows (and copies),
+        * they will stay around as well.
+        *
+        * Bump the paging-in-progress count to prevent size changes (e.g.
+        * truncation operations) during I/O.  This must be done after
+        * obtaining the vnode lock in order to avoid possible deadlocks.
+        */
+       vm_object_reference(fs.first_object);
+       fs.vp = vnode_pager_lock(fs.first_object);
+       vm_object_pip_add(fs.first_object, 1);
+
+       fs.lookup_still_valid = TRUE;
+       fs.first_m = NULL;
+       fs.object = fs.first_object;    /* so unlock_and_deallocate works */
+
+       /*
+        * If the entry is wired we cannot change the page protection.
+        */
+       if (fs.wired)
+               fault_type = fs.first_prot;
+
+       /*
+        * The page we want is at (first_object, first_pindex), but if the
+        * vm_map_entry is VM_MAPTYPE_VPAGETABLE we have to traverse the
+        * page table to figure out the actual pindex.
+        *
+        * NOTE!  DEVELOPMENT IN PROGRESS, THIS IS AN INITIAL IMPLEMENTATION
+        * ONLY
+        */
+       if (fs.entry->maptype == VM_MAPTYPE_VPAGETABLE) {
+               result = vm_fault_vpagetable(&fs, &first_pindex,
+                                            fs.entry->aux.master_pde);
+               if (result == KERN_TRY_AGAIN)
+                       goto RetryFault;
+               if (result != KERN_SUCCESS) {
+                       *errorp = result;
+                       return (NULL);
+               }
+       }
+
+       /*
+        * Now we have the actual (object, pindex), fault in the page.  If
+        * vm_fault_object() fails it will unlock and deallocate the FS
+        * data.   If it succeeds everything remains locked and fs->object
+        * will have an additinal PIP count if it is not equal to
+        * fs->first_object
+        */
+       result = vm_fault_object(&fs, first_pindex, fault_type);
+
+       if (result == KERN_TRY_AGAIN)
+               goto RetryFault;
+       if (result != KERN_SUCCESS) {
+               *errorp = result;
+               return(NULL);
+       }
+
+       /*
+        * On success vm_fault_object() does not unlock or deallocate, and fs.m
+        * will contain a busied page.
+        */
+       unlock_things(&fs);
+
+       /*
+        * Return a held page.  We are not doing any pmap manipulation so do
+        * not set PG_MAPPED.
+        */
+       vm_page_flag_clear(fs.m, PG_ZERO);
+       vm_page_flag_set(fs.m, PG_REFERENCED);
+       vm_page_hold(fs.m);
+
+       /*
+        * Unbusy the page by activating it.  It remains held and will not
+        * be reclaimed.
+        */
+       vm_page_activate(fs.m);
+
+       if (curthread->td_lwp) {
+               if (fs.hardfault) {
+                       curthread->td_lwp->lwp_ru.ru_majflt++;
+               } else {
+                       curthread->td_lwp->lwp_ru.ru_minflt++;
+               }
+       }
+
+       /*
+        * Unlock everything, and return the held page.
+        */
+       vm_page_wakeup(fs.m);
+       vm_object_deallocate(fs.first_object);
+
+       *errorp = 0;
+       return(fs.m);
+}
+
 /*
  * Translate the virtual page number (first_pindex) that is relative
  * to the address space into a logical page number that is relative to the