From 4e158347a04873e5456761ed747ea6d1e9a7ed79 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 6 Jan 2007 22:35:47 +0000 Subject: [PATCH] Add a new procedure, vm_fault_page(), which does all actions related to faulting in a VM page given a vm_map and virtual address, include any necessary I/O, but returns the held page instead of entering it into a pmap. Use the new function in procfs_rwmem, allowing gdb to 'see' memory that is governed by a virtual page table. --- sys/vfs/procfs/procfs_mem.c | 74 +--------------- sys/vm/vm_extern.h | 3 +- sys/vm/vm_fault.c | 168 +++++++++++++++++++++++++++++++++++- 3 files changed, 173 insertions(+), 72 deletions(-) diff --git a/sys/vfs/procfs/procfs_mem.c b/sys/vfs/procfs/procfs_mem.c index 70819b7e73..67cbd09c4a 100644 --- a/sys/vfs/procfs/procfs_mem.c +++ b/sys/vfs/procfs/procfs_mem.c @@ -38,7 +38,7 @@ * @(#)procfs_mem.c 8.5 (Berkeley) 6/15/94 * * $FreeBSD: src/sys/miscfs/procfs/procfs_mem.c,v 1.46.2.3 2002/01/22 17:22:59 nectar Exp $ - * $DragonFly: src/sys/vfs/procfs/procfs_mem.c,v 1.12 2006/12/28 21:24:02 dillon Exp $ + * $DragonFly: src/sys/vfs/procfs/procfs_mem.c,v 1.13 2007/01/06 22:35:46 dillon Exp $ */ /* @@ -103,15 +103,8 @@ procfs_rwmem(struct proc *curp, struct proc *p, struct uio *uio) * makes things easier. This way is trivial - right? */ do { - vm_map_t tmap; vm_offset_t uva; int page_offset; /* offset into page */ - vm_map_entry_t out_entry; - vm_prot_t out_prot; - boolean_t wired; - vm_pindex_t pindex; - vm_object_t object; - vm_object_t nobject; u_int len; vm_page_t m; @@ -131,68 +124,10 @@ procfs_rwmem(struct proc *curp, struct proc *p, struct uio *uio) /* * Fault the page on behalf of the process */ - error = vm_fault(map, pageno, reqprot, VM_FAULT_NORMAL); + m = vm_fault_page(map, pageno, reqprot, + VM_FAULT_NORMAL, &error); if (error) { - error = EFAULT; - break; - } - - /* - * Now we need to get the page. out_entry, out_prot, wired, - * and single_use aren't used. One would think the vm code - * would be a *bit* nicer... We use tmap because - * vm_map_lookup() can change the map argument. - */ - tmap = map; - error = vm_map_lookup(&tmap, pageno, reqprot, - &out_entry, &object, &pindex, &out_prot, - &wired); - - if (error) { - error = EFAULT; - break; - } - - /* - * spl protection is required to avoid interrupt freeing - * races, reference the object to avoid it being ripped - * out from under us if we block. - */ - crit_enter(); - vm_object_reference(object); -again: - m = vm_page_lookup(object, pindex); - - /* - * Allow fallback to backing objects if we are reading - */ - while (m == NULL && !writing && object->backing_object) { - pindex += OFF_TO_IDX(object->backing_object_offset); - nobject = object->backing_object; - vm_object_reference(nobject); - vm_object_deallocate(object); - object = nobject; - m = vm_page_lookup(object, pindex); - } - - /* - * Wait for any I/O's to complete, then hold the page - * so we can release the spl. - */ - if (m) { - if (vm_page_sleep_busy(m, FALSE, "rwmem")) - goto again; - vm_page_hold(m); - } - crit_exit(); - - /* - * We no longer need the object. If we do not have a page - * then cleanup. - */ - vm_object_deallocate(object); - if (m == NULL) { - vm_map_lookup_done(tmap, out_entry, 0); + KKASSERT(m == NULL); error = EFAULT; break; } @@ -201,7 +136,6 @@ again: * Cleanup tmap then create a temporary KVA mapping and * do the I/O. */ - vm_map_lookup_done(tmap, out_entry, 0); pmap_kenter(kva, VM_PAGE_TO_PHYS(m)); error = uiomove((caddr_t)(kva + page_offset), len, uio); pmap_kremove(kva); diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h index 76748b8199..76f5a09af0 100644 --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -32,7 +32,7 @@ * * @(#)vm_extern.h 8.2 (Berkeley) 1/12/94 * $FreeBSD: src/sys/vm/vm_extern.h,v 1.46.2.3 2003/01/13 22:51:17 dillon Exp $ - * $DragonFly: src/sys/vm/vm_extern.h,v 1.21 2006/12/28 21:24:02 dillon Exp $ + * $DragonFly: src/sys/vm/vm_extern.h,v 1.22 2007/01/06 22:35:47 dillon Exp $ */ #ifndef _VM_VM_EXTERN_H_ @@ -89,6 +89,7 @@ int swaponvp (struct thread *, struct vnode *, u_long); void swapout_procs (int); int useracc(c_caddr_t, int, int); int vm_fault (vm_map_t, vm_offset_t, vm_prot_t, int); +vm_page_t vm_fault_page (vm_map_t, vm_offset_t, vm_prot_t, int, int *); void vm_fault_copy_entry (vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t); void vm_fault_unwire (vm_map_t, vm_map_entry_t); int vm_fault_wire (vm_map_t, vm_map_entry_t, boolean_t); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 25be785f75..e0a3af5dd6 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -67,7 +67,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/vm_fault.c,v 1.108.2.8 2002/02/26 05:49:27 silby Exp $ - * $DragonFly: src/sys/vm/vm_fault.c,v 1.34 2007/01/01 22:51:18 corecode Exp $ + * $DragonFly: src/sys/vm/vm_fault.c,v 1.35 2007/01/06 22:35:47 dillon Exp $ */ /* @@ -403,6 +403,172 @@ RetryFault: return (KERN_SUCCESS); } +/* + * Fault-in the specified virtual address in the specified map, doing all + * necessary manipulation of the object store and all necessary I/O. Return + * a held VM page or NULL, and set *errorp. The related pmap is not + * updated. + * + * Since the pmap is not updated, this routine may not be used to wire + * the page. + */ +vm_page_t +vm_fault_page(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, + int fault_flags, int *errorp) +{ + int result; + vm_pindex_t first_pindex; + struct faultstate fs; + + mycpu->gd_cnt.v_vm_faults++; + + fs.didlimit = 0; + fs.hardfault = 0; + fs.fault_flags = fault_flags; + KKASSERT((fault_flags & VM_FAULT_WIRE_MASK) == 0); + +RetryFault: + /* + * Find the vm_map_entry representing the backing store and resolve + * the top level object and page index. This may have the side + * effect of executing a copy-on-write on the map entry and/or + * creating a shadow object, but will not COW any actual VM pages. + * + * On success fs.map is left read-locked and various other fields + * are initialized but not otherwise referenced or locked. + * + * NOTE! vm_map_lookup will upgrade the fault_type to VM_FAULT_WRITE + * if the map entry is a virtual page table and also writable, + * so we can set the 'A'accessed bit in the virtual page table entry. + */ + fs.map = map; + result = vm_map_lookup(&fs.map, vaddr, fault_type, + &fs.entry, &fs.first_object, + &first_pindex, &fs.first_prot, &fs.wired); + + if (result != KERN_SUCCESS) { + *errorp = result; + return (NULL); + } + + /* + * fs.map is read-locked + * + * Misc checks. Save the map generation number to detect races. + */ + fs.map_generation = fs.map->timestamp; + + if (fs.entry->eflags & MAP_ENTRY_NOFAULT) { + panic("vm_fault: fault on nofault entry, addr: %lx", + (u_long)vaddr); + } + + /* + * A system map entry may return a NULL object. No object means + * no pager means an unrecoverable kernel fault. + */ + if (fs.first_object == NULL) { + panic("vm_fault: unrecoverable fault at %p in entry %p", + (void *)vaddr, fs.entry); + } + + /* + * Make a reference to this object to prevent its disposal while we + * are messing with it. Once we have the reference, the map is free + * to be diddled. Since objects reference their shadows (and copies), + * they will stay around as well. + * + * Bump the paging-in-progress count to prevent size changes (e.g. + * truncation operations) during I/O. This must be done after + * obtaining the vnode lock in order to avoid possible deadlocks. + */ + vm_object_reference(fs.first_object); + fs.vp = vnode_pager_lock(fs.first_object); + vm_object_pip_add(fs.first_object, 1); + + fs.lookup_still_valid = TRUE; + fs.first_m = NULL; + fs.object = fs.first_object; /* so unlock_and_deallocate works */ + + /* + * If the entry is wired we cannot change the page protection. + */ + if (fs.wired) + fault_type = fs.first_prot; + + /* + * The page we want is at (first_object, first_pindex), but if the + * vm_map_entry is VM_MAPTYPE_VPAGETABLE we have to traverse the + * page table to figure out the actual pindex. + * + * NOTE! DEVELOPMENT IN PROGRESS, THIS IS AN INITIAL IMPLEMENTATION + * ONLY + */ + if (fs.entry->maptype == VM_MAPTYPE_VPAGETABLE) { + result = vm_fault_vpagetable(&fs, &first_pindex, + fs.entry->aux.master_pde); + if (result == KERN_TRY_AGAIN) + goto RetryFault; + if (result != KERN_SUCCESS) { + *errorp = result; + return (NULL); + } + } + + /* + * Now we have the actual (object, pindex), fault in the page. If + * vm_fault_object() fails it will unlock and deallocate the FS + * data. If it succeeds everything remains locked and fs->object + * will have an additinal PIP count if it is not equal to + * fs->first_object + */ + result = vm_fault_object(&fs, first_pindex, fault_type); + + if (result == KERN_TRY_AGAIN) + goto RetryFault; + if (result != KERN_SUCCESS) { + *errorp = result; + return(NULL); + } + + /* + * On success vm_fault_object() does not unlock or deallocate, and fs.m + * will contain a busied page. + */ + unlock_things(&fs); + + /* + * Return a held page. We are not doing any pmap manipulation so do + * not set PG_MAPPED. + */ + vm_page_flag_clear(fs.m, PG_ZERO); + vm_page_flag_set(fs.m, PG_REFERENCED); + vm_page_hold(fs.m); + + /* + * Unbusy the page by activating it. It remains held and will not + * be reclaimed. + */ + vm_page_activate(fs.m); + + if (curthread->td_lwp) { + if (fs.hardfault) { + curthread->td_lwp->lwp_ru.ru_majflt++; + } else { + curthread->td_lwp->lwp_ru.ru_minflt++; + } + } + + /* + * Unlock everything, and return the held page. + */ + vm_page_wakeup(fs.m); + vm_object_deallocate(fs.first_object); + + *errorp = 0; + return(fs.m); +} + /* * Translate the virtual page number (first_pindex) that is relative * to the address space into a logical page number that is relative to the -- 2.41.0