From 1b9d3514e66df9d4bc5a25c898c7c8cbe414e3e4 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sun, 17 Jan 2010 17:27:50 -0800 Subject: [PATCH] kernel - simplify vm pager ops, add pre-faulting for zero-fill pages. * Remove the behind and ahead arguments to struct pagerops->pgo_getpages, and pagerops->pgo_haspage. Adjust pgo_getpages() to pgo_getpage(), change *_pager_getpages() to *_pager_getpage(), etc. Add a sequential access flag to the call. The VM system is no longer responsible for dealing with read-ahead on pager ops. The individual pagers are now responsible. The vnode pager now specifies the sequential access heuristic based on the hint passed to it. HAMMER uses this hint to issue readaheads via the buffer cache. * Move, rename, and consolidate pmap_prefault(). Remove this function from all platform sources and place it in vm/vm_fault.c. Add a simple platform-specific pmap_prefault_ok() function to test particular virtual addresses. * The new prefault code is called vm_prefault(). Enhance the code to also prefault and make writable (when it can) zero-fill pages. The new zero-fill prefault feature improves buildworld times by over 5% by greatly reducing the number of VM faults taken during normal program operation. This particularly helps larger applications and concurrent applications in SMP systems. The code is conditionalized such that small applications (which do not benefit much from prefaulting zero-fill) still run about as fast as they did before. * Fix an issue in vm_fault() where the vm_map was being unlocked before the prefault code was called when it really needs to be unlocked after the prefault code is called. --- sys/kern/kern_exec.c | 32 +-- sys/platform/pc32/i386/pmap.c | 115 ++------- sys/platform/pc32/i386/trap.c | 17 +- sys/platform/pc64/x86_64/pmap.c | 116 ++------- sys/platform/pc64/x86_64/trap.c | 16 +- sys/platform/vkernel/i386/trap.c | 16 +- sys/platform/vkernel/platform/pmap.c | 127 ++------- sys/vm/default_pager.c | 12 +- sys/vm/device_pager.c | 39 ++- sys/vm/phys_pager.c | 46 +--- sys/vm/pmap.h | 3 +- sys/vm/swap_pager.c | 32 +-- sys/vm/swap_pager.h | 2 +- sys/vm/vm_fault.c | 373 +++++++++++++++++++-------- sys/vm/vm_map.h | 2 +- sys/vm/vm_object.c | 10 +- sys/vm/vm_pager.c | 14 +- sys/vm/vm_pager.h | 44 ++-- sys/vm/vnode_pager.c | 55 ++-- 19 files changed, 457 insertions(+), 614 deletions(-) diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index cfc92e31f3..b9b2e2ac60 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -578,9 +578,8 @@ int exec_map_page(struct image_params *imgp, vm_pindex_t pageno, struct sf_buf **psfb, const char **pdata) { - int rv, i; - int initial_pagein; - vm_page_t ma[VM_INITIAL_PAGEIN]; + int rv; + vm_page_t ma; vm_page_t m; vm_object_t object; @@ -598,29 +597,10 @@ exec_map_page(struct image_params *imgp, vm_pindex_t pageno, * need it for the lookup loop below (lookup/busy race), since * an interrupt can unbusy and free the page before our busy check. */ - crit_enter(); m = vm_page_grab(object, pageno, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); - - if ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) { - ma[0] = m; - initial_pagein = VM_INITIAL_PAGEIN; - if (initial_pagein + pageno > object->size) - initial_pagein = object->size - pageno; - for (i = 1; i < initial_pagein; i++) { - if ((m = vm_page_lookup(object, i + pageno)) != NULL) { - if ((m->flags & PG_BUSY) || m->busy) - break; - if (m->valid) - break; - vm_page_busy(m); - } else { - m = vm_page_alloc(object, i + pageno, VM_ALLOC_NORMAL); - if (m == NULL) - break; - } - ma[i] = m; - } - initial_pagein = i; + crit_enter(); + while ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) { + ma = m; /* * get_pages unbusies all the requested pages except the @@ -629,7 +609,7 @@ exec_map_page(struct image_params *imgp, vm_pindex_t pageno, * the buffer cache) so vnode_pager_freepage() must be * used to properly release it. */ - rv = vm_pager_get_pages(object, ma, initial_pagein, 0); + rv = vm_pager_get_page(object, &ma, 1); m = vm_page_lookup(object, pageno); if (rv != VM_PAGER_OK || m == NULL || m->valid == 0) { diff --git a/sys/platform/pc32/i386/pmap.c b/sys/platform/pc32/i386/pmap.c index 6b471b2b27..4721bc57eb 100644 --- a/sys/platform/pc32/i386/pmap.c +++ b/sys/platform/pc32/i386/pmap.c @@ -210,7 +210,6 @@ static void i386_protection_init (void); static __inline void pmap_clearbit (vm_page_t m, int bit); static void pmap_remove_all (vm_page_t m); -static void pmap_enter_quick (pmap_t pmap, vm_offset_t va, vm_page_t m); static int pmap_remove_pte (struct pmap *pmap, unsigned *ptq, vm_offset_t sva, pmap_inval_info_t info); static void pmap_remove_page (struct pmap *pmap, @@ -2272,7 +2271,7 @@ validate: * * This code currently may only be used on user pmaps, not kernel_pmap. */ -static void +void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m) { unsigned *pte; @@ -2486,107 +2485,23 @@ pmap_object_init_pt_callback(vm_page_t p, void *data) } /* - * pmap_prefault provides a quick way of clustering pagefaults into a - * processes address space. It is a "cousin" of pmap_object_init_pt, - * except it runs at page fault time instead of mmap time. + * Return TRUE if the pmap is in shape to trivially + * pre-fault the specified address. + * + * Returns FALSE if it would be non-trivial or if a + * pte is already loaded into the slot. */ -#define PFBAK 4 -#define PFFOR 4 -#define PAGEORDER_SIZE (PFBAK+PFFOR) - -static int pmap_prefault_pageorder[] = { - -PAGE_SIZE, PAGE_SIZE, - -2 * PAGE_SIZE, 2 * PAGE_SIZE, - -3 * PAGE_SIZE, 3 * PAGE_SIZE, - -4 * PAGE_SIZE, 4 * PAGE_SIZE -}; - -void -pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry) +int +pmap_prefault_ok(pmap_t pmap, vm_offset_t addr) { - int i; - vm_offset_t starta; - vm_offset_t addr; - vm_pindex_t pindex; - vm_page_t m; - vm_object_t object; - struct lwp *lp; - - /* - * We do not currently prefault mappings that use virtual page - * tables. We do not prefault foreign pmaps. - */ - if (entry->maptype == VM_MAPTYPE_VPAGETABLE) - return; - lp = curthread->td_lwp; - if (lp == NULL || (pmap != vmspace_pmap(lp->lwp_vmspace))) - return; - - object = entry->object.vm_object; - - starta = addra - PFBAK * PAGE_SIZE; - if (starta < entry->start) - starta = entry->start; - else if (starta > addra) - starta = 0; - - /* - * critical section protection is required to maintain the - * page/object association, interrupts can free pages and remove - * them from their objects. - */ - crit_enter(); - for (i = 0; i < PAGEORDER_SIZE; i++) { - vm_object_t lobject; - unsigned *pte; - - addr = addra + pmap_prefault_pageorder[i]; - if (addr > addra + (PFFOR * PAGE_SIZE)) - addr = 0; - - if (addr < starta || addr >= entry->end) - continue; - - if ((*pmap_pde(pmap, addr)) == 0) - continue; - - pte = (unsigned *) vtopte(addr); - if (*pte) - continue; - - pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; - lobject = object; - - for (m = vm_page_lookup(lobject, pindex); - (!m && (lobject->type == OBJT_DEFAULT) && - (lobject->backing_object)); - lobject = lobject->backing_object - ) { - if (lobject->backing_object_offset & PAGE_MASK) - break; - pindex += (lobject->backing_object_offset >> PAGE_SHIFT); - m = vm_page_lookup(lobject->backing_object, pindex); - } - - /* - * give-up when a page is not in memory - */ - if (m == NULL) - break; - - if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && - (m->busy == 0) && - (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { + unsigned *pte; - if ((m->queue - m->pc) == PQ_CACHE) { - vm_page_deactivate(m); - } - vm_page_busy(m); - pmap_enter_quick(pmap, addr, m); - vm_page_wakeup(m); - } - } - crit_exit(); + if ((*pmap_pde(pmap, addr)) == 0) + return(0); + pte = (unsigned *) vtopte(addr); + if (*pte) + return(0); + return(1); } /* diff --git a/sys/platform/pc32/i386/trap.c b/sys/platform/pc32/i386/trap.c index ab0f90e435..c4ba438b6e 100644 --- a/sys/platform/pc32/i386/trap.c +++ b/sys/platform/pc32/i386/trap.c @@ -936,6 +936,7 @@ trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) struct vmspace *vm = NULL; vm_map_t map = 0; int rv = 0; + int fault_flags; vm_prot_t ftype; thread_t td = curthread; struct lwp *lp = td->td_lwp; @@ -1002,11 +1003,17 @@ trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) goto nogo; } - /* Fault in the user page: */ - rv = vm_fault(map, va, ftype, - (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY - : VM_FAULT_NORMAL); - + /* + * Issue fault + */ + fault_flags = 0; + if (usermode) + fault_flags |= VM_FAULT_BURST; + if (ftype & VM_PROT_WRITE) + fault_flags |= VM_FAULT_DIRTY; + else + fault_flags |= VM_FAULT_NORMAL; + rv = vm_fault(map, va, ftype, fault_flags); PRELE(lp->lwp_proc); } else { /* diff --git a/sys/platform/pc64/x86_64/pmap.c b/sys/platform/pc64/x86_64/pmap.c index db0d39c5c1..d2d86c9d46 100644 --- a/sys/platform/pc64/x86_64/pmap.c +++ b/sys/platform/pc64/x86_64/pmap.c @@ -206,7 +206,6 @@ static pv_entry_t get_pv_entry (void); static void i386_protection_init (void); static void create_pagetables(vm_paddr_t *firstaddr); static void pmap_remove_all (vm_page_t m); -static void pmap_enter_quick (pmap_t pmap, vm_offset_t va, vm_page_t m); static int pmap_remove_pte (struct pmap *pmap, pt_entry_t *ptq, vm_offset_t sva, pmap_inval_info_t info); static void pmap_remove_page (struct pmap *pmap, @@ -2599,7 +2598,6 @@ validate: * * This code currently may only be used on user pmaps, not kernel_pmap. */ -static void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m) { @@ -2819,109 +2817,27 @@ pmap_object_init_pt_callback(vm_page_t p, void *data) } /* - * pmap_prefault provides a quick way of clustering pagefaults into a - * processes address space. It is a "cousin" of pmap_object_init_pt, - * except it runs at page fault time instead of mmap time. + * Return TRUE if the pmap is in shape to trivially + * pre-fault the specified address. + * + * Returns FALSE if it would be non-trivial or if a + * pte is already loaded into the slot. */ -#define PFBAK 4 -#define PFFOR 4 -#define PAGEORDER_SIZE (PFBAK+PFFOR) - -static int pmap_prefault_pageorder[] = { - -PAGE_SIZE, PAGE_SIZE, - -2 * PAGE_SIZE, 2 * PAGE_SIZE, - -3 * PAGE_SIZE, 3 * PAGE_SIZE, - -4 * PAGE_SIZE, 4 * PAGE_SIZE -}; - -void -pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry) +int +pmap_prefault_ok(pmap_t pmap, vm_offset_t addr) { - int i; - vm_offset_t starta; - vm_offset_t addr; - vm_pindex_t pindex; - vm_page_t m; - vm_object_t object; - struct lwp *lp; - - /* - * We do not currently prefault mappings that use virtual page - * tables. We do not prefault foreign pmaps. - */ - if (entry->maptype == VM_MAPTYPE_VPAGETABLE) - return; - lp = curthread->td_lwp; - if (lp == NULL || (pmap != vmspace_pmap(lp->lwp_vmspace))) - return; - - object = entry->object.vm_object; - - starta = addra - PFBAK * PAGE_SIZE; - if (starta < entry->start) - starta = entry->start; - else if (starta > addra) - starta = 0; - - /* - * critical section protection is required to maintain the - * page/object association, interrupts can free pages and remove - * them from their objects. - */ - crit_enter(); - for (i = 0; i < PAGEORDER_SIZE; i++) { - vm_object_t lobject; - pt_entry_t *pte; - pd_entry_t *pde; - - addr = addra + pmap_prefault_pageorder[i]; - if (addr > addra + (PFFOR * PAGE_SIZE)) - addr = 0; - - if (addr < starta || addr >= entry->end) - continue; - - pde = pmap_pde(pmap, addr); - if (pde == NULL || *pde == 0) - continue; - - pte = vtopte(addr); - if (*pte) - continue; - - pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; - lobject = object; - - for (m = vm_page_lookup(lobject, pindex); - (!m && (lobject->type == OBJT_DEFAULT) && - (lobject->backing_object)); - lobject = lobject->backing_object - ) { - if (lobject->backing_object_offset & PAGE_MASK) - break; - pindex += (lobject->backing_object_offset >> PAGE_SHIFT); - m = vm_page_lookup(lobject->backing_object, pindex); - } + pt_entry_t *pte; + pd_entry_t *pde; - /* - * give-up when a page is not in memory - */ - if (m == NULL) - break; + pde = pmap_pde(pmap, addr); + if (pde == NULL || *pde == 0) + return(0); - if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && - (m->busy == 0) && - (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { + pte = vtopte(addr); + if (*pte) + return(0); - if ((m->queue - m->pc) == PQ_CACHE) { - vm_page_deactivate(m); - } - vm_page_busy(m); - pmap_enter_quick(pmap, addr, m); - vm_page_wakeup(m); - } - } - crit_exit(); + return(1); } /* diff --git a/sys/platform/pc64/x86_64/trap.c b/sys/platform/pc64/x86_64/trap.c index 1ad9f1da9b..3a8c1dbf98 100644 --- a/sys/platform/pc64/x86_64/trap.c +++ b/sys/platform/pc64/x86_64/trap.c @@ -817,6 +817,7 @@ trap_pfault(struct trapframe *frame, int usermode) struct vmspace *vm = NULL; vm_map_t map; int rv = 0; + int fault_flags; vm_prot_t ftype; thread_t td = curthread; struct lwp *lp = td->td_lwp; @@ -880,10 +881,17 @@ trap_pfault(struct trapframe *frame, int usermode) goto nogo; } - /* Fault in the user page: */ - rv = vm_fault(map, va, ftype, - (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY - : VM_FAULT_NORMAL); + /* + * Issue fault + */ + fault_flags = 0; + if (usermode) + fault_flags |= VM_FAULT_BURST; + if (ftype & VM_PROT_WRITE) + fault_flags |= VM_FAULT_DIRTY; + else + fault_flags |= VM_FAULT_NORMAL; + rv = vm_fault(map, va, ftype, fault_flags); PRELE(lp->lwp_proc); } else { diff --git a/sys/platform/vkernel/i386/trap.c b/sys/platform/vkernel/i386/trap.c index 664adc38c3..cb548217c6 100644 --- a/sys/platform/vkernel/i386/trap.c +++ b/sys/platform/vkernel/i386/trap.c @@ -886,6 +886,7 @@ trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) struct vmspace *vm = NULL; vm_map_t map = 0; int rv = 0; + int fault_flags; vm_prot_t ftype; thread_t td = curthread; struct lwp *lp = td->td_lwp; @@ -938,10 +939,17 @@ trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) goto nogo; } - /* Fault in the user page: */ - rv = vm_fault(map, va, ftype, - (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY - : VM_FAULT_NORMAL); + /* + * Issue fault + */ + fault_flags = 0; + if (usermode) + fault_flags |= VM_FAULT_BURST; + if (ftype & VM_PROT_WRITE) + fault_flags |= VM_FAULT_DIRTY; + else + fault_flags |= VM_FAULT_NORMAL; + rv = vm_fault(map, va, ftype, fault_flags); PRELE(lp->lwp_proc); } else { diff --git a/sys/platform/vkernel/platform/pmap.c b/sys/platform/vkernel/platform/pmap.c index e3231125fc..b742846963 100644 --- a/sys/platform/vkernel/platform/pmap.c +++ b/sys/platform/vkernel/platform/pmap.c @@ -1894,7 +1894,7 @@ validate: * * Currently this routine may only be used on user pmaps, not kernel_pmap. */ -static void +void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m) { vpte_t *pte; @@ -2101,122 +2101,25 @@ pmap_object_init_pt_callback(vm_page_t p, void *data) } /* - * pmap_prefault provides a quick way of clustering pagefaults into a - * processes address space. It is a "cousin" of pmap_object_init_pt, - * except it runs at page fault time instead of mmap time. + * Return TRUE if the pmap is in shape to trivially + * pre-fault the specified address. + * + * Returns FALSE if it would be non-trivial or if a + * pte is already loaded into the slot. */ -#define PFBAK 4 -#define PFFOR 4 -#define PAGEORDER_SIZE (PFBAK+PFFOR) - -static int pmap_prefault_pageorder[] = { - -PAGE_SIZE, PAGE_SIZE, - -2 * PAGE_SIZE, 2 * PAGE_SIZE, - -3 * PAGE_SIZE, 3 * PAGE_SIZE, - -4 * PAGE_SIZE, 4 * PAGE_SIZE -}; - -void -pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry) +int +pmap_prefault_ok(pmap_t pmap, vm_offset_t addr) { - vm_offset_t starta; - vm_offset_t addr; - vm_pindex_t pindex; - vm_page_t m; - vm_object_t object; - struct lwp *lp; - int i; - - /* - * We do not currently prefault mappings that use virtual page - * tables. We do not prefault foreign pmaps. - */ - if (entry->maptype == VM_MAPTYPE_VPAGETABLE) - return; - lp = curthread->td_lwp; - if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace)) - return; - - object = entry->object.vm_object; - - starta = addra - PFBAK * PAGE_SIZE; - if (starta < entry->start) - starta = entry->start; - else if (starta > addra) - starta = 0; - - /* - * critical section protection is required to maintain the - * page/object association, interrupts can free pages and remove - * them from their objects. - */ - crit_enter(); - for (i = 0; i < PAGEORDER_SIZE; i++) { - vm_object_t lobject; - vpte_t *pte; - - addr = addra + pmap_prefault_pageorder[i]; - if (addr > addra + (PFFOR * PAGE_SIZE)) - addr = 0; - - if (addr < starta || addr >= entry->end) - continue; - - /* - * Make sure the page table page already exists - */ - if ((*pmap_pde(pmap, addr)) == 0) - continue; - - /* - * Get a pointer to the pte and make sure that no valid page - * has been mapped. - */ - pte = get_ptbase(pmap, addr); - if (*pte) - continue; - - /* - * Get the page to be mapped - */ - pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; - lobject = object; - - for (m = vm_page_lookup(lobject, pindex); - (!m && (lobject->type == OBJT_DEFAULT) && - (lobject->backing_object)); - lobject = lobject->backing_object - ) { - if (lobject->backing_object_offset & PAGE_MASK) - break; - pindex += (lobject->backing_object_offset >> PAGE_SHIFT); - m = vm_page_lookup(lobject->backing_object, pindex); - } - - /* - * give-up when a page is not in memory - */ - if (m == NULL) - break; + vpte_t *pte; - /* - * If everything meets the requirements for pmap_enter_quick(), - * then enter the page. - */ + if ((*pmap_pde(pmap, addr)) == 0) + return(0); - if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && - (m->busy == 0) && - (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { + pte = get_ptbase(pmap, addr); + if (*pte) + return(0); - if ((m->queue - m->pc) == PQ_CACHE) { - vm_page_deactivate(m); - } - vm_page_busy(m); - pmap_enter_quick(pmap, addr, m); - vm_page_wakeup(m); - } - } - crit_exit(); + return(1); } /* diff --git a/sys/vm/default_pager.c b/sys/vm/default_pager.c index 3b42272900..94c79ffb03 100644 --- a/sys/vm/default_pager.c +++ b/sys/vm/default_pager.c @@ -51,11 +51,10 @@ static vm_object_t default_pager_alloc (void *, off_t, vm_prot_t, off_t); static void default_pager_dealloc (vm_object_t); -static int default_pager_getpages (vm_object_t, vm_page_t *, int, int); +static int default_pager_getpage (vm_object_t, vm_page_t *, int); static void default_pager_putpages (vm_object_t, vm_page_t *, int, boolean_t, int *); -static boolean_t default_pager_haspage (vm_object_t, vm_pindex_t, int *, - int *); +static boolean_t default_pager_haspage (vm_object_t, vm_pindex_t); /* * pagerops for OBJT_DEFAULT - "default pager". */ @@ -63,7 +62,7 @@ struct pagerops defaultpagerops = { NULL, default_pager_alloc, default_pager_dealloc, - default_pager_getpages, + default_pager_getpage, default_pager_putpages, default_pager_haspage, NULL @@ -104,7 +103,7 @@ default_pager_dealloc(vm_object_t object) */ static int -default_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) +default_pager_getpage(vm_object_t object, vm_page_t *mpp, int seqaccess) { return VM_PAGER_FAIL; } @@ -137,8 +136,7 @@ default_pager_putpages(vm_object_t object, vm_page_t *m, int c, boolean_t sync, */ static boolean_t -default_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, - int *after) +default_pager_haspage(vm_object_t object, vm_pindex_t pindex) { return FALSE; } diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index e8a436b9e7..d0ad2678f9 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -59,11 +59,10 @@ static void dev_pager_init (void); static vm_object_t dev_pager_alloc (void *, off_t, vm_prot_t, off_t); static void dev_pager_dealloc (vm_object_t); -static int dev_pager_getpages (vm_object_t, vm_page_t *, int, int); +static int dev_pager_getpage (vm_object_t, vm_page_t *, int); static void dev_pager_putpages (vm_object_t, vm_page_t *, int, boolean_t, int *); -static boolean_t dev_pager_haspage (vm_object_t, vm_pindex_t, int *, - int *); +static boolean_t dev_pager_haspage (vm_object_t, vm_pindex_t); /* list of device pager objects */ static struct pagerlst dev_pager_object_list; @@ -79,7 +78,7 @@ struct pagerops devicepagerops = { dev_pager_init, dev_pager_alloc, dev_pager_dealloc, - dev_pager_getpages, + dev_pager_getpage, dev_pager_putpages, dev_pager_haspage, NULL @@ -180,29 +179,30 @@ dev_pager_dealloc(vm_object_t object) } static int -dev_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) +dev_pager_getpage(vm_object_t object, vm_page_t *mpp, int seqaccess) { vm_offset_t offset; vm_paddr_t paddr; vm_page_t page; cdev_t dev; int prot; - int i; + page = *mpp; dev = object->handle; - offset = m[reqpage]->pindex; + offset = page->pindex; prot = PROT_READ; /* XXX should pass in? */ - paddr = pmap_phys_address(dev_dmmap(dev, (vm_offset_t) offset << PAGE_SHIFT, prot)); + paddr = pmap_phys_address( + dev_dmmap(dev, (vm_offset_t)offset << PAGE_SHIFT, prot)); KASSERT(paddr != -1,("dev_pager_getpage: map function returns error")); - if (m[reqpage]->flags & PG_FICTITIOUS) { + if (page->flags & PG_FICTITIOUS) { /* * If the passed in reqpage page is a fake page, update it * with the new physical address. */ - m[reqpage]->phys_addr = paddr; - m[reqpage]->valid = VM_PAGE_BITS_ALL; + page->phys_addr = paddr; + page->valid = VM_PAGE_BITS_ALL; } else { /* * Replace the passed in reqpage page with our own fake page @@ -211,32 +211,23 @@ dev_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) page = dev_pager_getfake(paddr); TAILQ_INSERT_TAIL(&object->un_pager.devp.devp_pglist, page, pageq); crit_enter(); - vm_page_free(m[reqpage]); + vm_page_free(*mpp); vm_page_insert(page, object, offset); crit_exit(); } - for (i = 0; i < count; i++) { - if (i != reqpage) - vm_page_free(m[i]); - } return (VM_PAGER_OK); } static void -dev_pager_putpages(vm_object_t object, vm_page_t *m, int count, boolean_t sync, - int *rtvals) +dev_pager_putpages(vm_object_t object, vm_page_t *m, + int count, boolean_t sync, int *rtvals) { panic("dev_pager_putpage called"); } static boolean_t -dev_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, - int *after) +dev_pager_haspage(vm_object_t object, vm_pindex_t pindex) { - if (before != NULL) - *before = 0; - if (after != NULL) - *after = 0; return (TRUE); } diff --git a/sys/vm/phys_pager.c b/sys/vm/phys_pager.c index f878ff4571..a146cb10cf 100644 --- a/sys/vm/phys_pager.c +++ b/sys/vm/phys_pager.c @@ -118,36 +118,26 @@ phys_pager_dealloc(vm_object_t object) } static int -phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) +phys_pager_getpage(vm_object_t object, vm_page_t *mpp, int seqaccess) { - int i; + vm_page_t m = *mpp; crit_enter(); - /* - * Fill as many pages as vm_fault has allocated for us. - */ - for (i = 0; i < count; i++) { - if ((m[i]->flags & PG_ZERO) == 0) - vm_page_zero_fill(m[i]); - vm_page_flag_set(m[i], PG_ZERO); - /* Switch off pv_entries */ - vm_page_unmanage(m[i]); - m[i]->valid = VM_PAGE_BITS_ALL; - m[i]->dirty = 0; - /* The requested page must remain busy, the others not. */ - if (reqpage != i) { - vm_page_flag_clear(m[i], PG_BUSY); - m[i]->busy = 0; - } - } + if ((m->flags & PG_ZERO) == 0) + vm_page_zero_fill(m); + vm_page_flag_set(m, PG_ZERO); + /* Switch off pv_entries */ + vm_page_unmanage(m); + m->valid = VM_PAGE_BITS_ALL; + m->dirty = 0; crit_exit(); return (VM_PAGER_OK); } static void -phys_pager_putpages(vm_object_t object, vm_page_t *m, int count, boolean_t sync, - int *rtvals) +phys_pager_putpages(vm_object_t object, vm_page_t *m, int count, + boolean_t sync, int *rtvals) { panic("phys_pager_putpage called"); @@ -163,18 +153,10 @@ phys_pager_putpages(vm_object_t object, vm_page_t *m, int count, boolean_t sync, #ifndef PHYSCLUSTER #define PHYSCLUSTER 1024 #endif + static boolean_t -phys_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, - int *after) +phys_pager_haspage(vm_object_t object, vm_pindex_t pindex) { - vm_pindex_t base, end; - - base = pindex & (~(PHYSCLUSTER - 1)); - end = base + (PHYSCLUSTER - 1); - if (before != NULL) - *before = pindex - base; - if (after != NULL) - *after = end - pindex; return (TRUE); } @@ -182,7 +164,7 @@ struct pagerops physpagerops = { phys_pager_init, phys_pager_alloc, phys_pager_dealloc, - phys_pager_getpages, + phys_pager_getpage, phys_pager_putpages, phys_pager_haspage, NULL diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h index 4f29034951..02242f8c51 100644 --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -148,6 +148,7 @@ void pmap_copy_page_frag (vm_paddr_t, vm_paddr_t, size_t bytes); void pmap_destroy (pmap_t); void pmap_enter (pmap_t, vm_offset_t, struct vm_page *, vm_prot_t, boolean_t); +void pmap_enter_quick (pmap_t, vm_offset_t, struct vm_page *); vm_paddr_t pmap_extract (pmap_t pmap, vm_offset_t va); void pmap_growkernel (vm_offset_t); void pmap_init (void); @@ -184,7 +185,7 @@ void pmap_remove_pages (pmap_t, vm_offset_t, vm_offset_t); void pmap_zero_page (vm_paddr_t); void pmap_page_assertzero (vm_paddr_t); void pmap_zero_page_area (vm_paddr_t, int off, int size); -void pmap_prefault (pmap_t, vm_offset_t, vm_map_entry_t); +int pmap_prefault_ok (pmap_t, vm_offset_t); int pmap_mincore (pmap_t pmap, vm_offset_t addr); void pmap_init_proc (struct proc *); void pmap_init_thread (struct thread *td); diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index c05455deb5..3fef04b786 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -189,7 +189,7 @@ static vm_object_t swap_pager_alloc (void *handle, off_t size, vm_prot_t prot, off_t offset); static void swap_pager_dealloc (vm_object_t object); -static int swap_pager_getpages (vm_object_t, vm_page_t *, int, int); +static int swap_pager_getpage (vm_object_t, vm_page_t *, int); static void swap_pager_init (void); static void swap_pager_unswapped (vm_page_t); static void swap_pager_strategy (vm_object_t, struct bio *); @@ -199,7 +199,7 @@ struct pagerops swappagerops = { swap_pager_init, /* early system initialization of pager */ swap_pager_alloc, /* allocate an OBJT_SWAP object */ swap_pager_dealloc, /* deallocate an OBJT_SWAP object */ - swap_pager_getpages, /* pagein */ + swap_pager_getpage, /* pagein */ swap_pager_putpages, /* pageout */ swap_pager_haspage, /* get backing store status for page */ swap_pager_unswapped, /* remove swap related to page */ @@ -749,8 +749,7 @@ swap_pager_copy(vm_object_t srcobject, vm_object_t dstobject, */ boolean_t -swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, - int *after) +swap_pager_haspage(vm_object_t object, vm_pindex_t pindex) { daddr_t blk0; @@ -763,17 +762,13 @@ swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, if (blk0 == SWAPBLK_NONE) { crit_exit(); - if (before) - *before = 0; - if (after) - *after = 0; return (FALSE); } +#if 0 /* * find backwards-looking contiguous good backing store */ - if (before != NULL) { int i; @@ -805,6 +800,7 @@ swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, } *after = (i - 1); } +#endif crit_exit(); return (TRUE); } @@ -1126,18 +1122,19 @@ swap_chain_iodone(struct bio *biox) */ static int -swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) +swap_pager_getpage(vm_object_t object, vm_page_t *mpp, int seqaccess) { struct buf *bp; struct bio *bio; vm_page_t mreq; int i; int j; + int reqpage; daddr_t blk; vm_offset_t kva; vm_pindex_t lastpindex; - mreq = m[reqpage]; + mreq = *mpp; if (mreq->object != object) { panic("swap_pager_getpages: object mismatch %p/%p", @@ -1159,6 +1156,7 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) crit_enter(); blk = swp_pager_meta_ctl(mreq->object, mreq->pindex, 0); +#if 0 for (i = reqpage - 1; i >= 0; --i) { daddr_t iblk; @@ -1193,6 +1191,7 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) for (k = j; k < count; ++k) vm_page_free(m[k]); } +#endif crit_exit(); @@ -1215,8 +1214,11 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) /* * map our page(s) into kva for input */ + i = 0; + j = 1; + reqpage = 0; - pmap_qenter(kva, m + i, j - i); + pmap_qenter(kva, mpp + i, j - i); bp->b_data = (caddr_t) kva; bp->b_bcount = PAGE_SIZE * (j - i); @@ -1229,8 +1231,8 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) int k; for (k = i; k < j; ++k) { - bp->b_xio.xio_pages[k - i] = m[k]; - vm_page_flag_set(m[k], PG_SWAPINPROG); + bp->b_xio.xio_pages[k - i] = mpp[k]; + vm_page_flag_set(mpp[k], PG_SWAPINPROG); } } bp->b_xio.xio_npages = j - i; @@ -1244,7 +1246,7 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) */ vm_object_pip_add(mreq->object, bp->b_xio.xio_npages); - lastpindex = m[j-1]->pindex; + lastpindex = mpp[j-1]->pindex; /* * perform the I/O. NOTE!!! bp cannot be considered valid after diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h index 9d24f3473d..9df6fd1e33 100644 --- a/sys/vm/swap_pager.h +++ b/sys/vm/swap_pager.h @@ -92,7 +92,7 @@ extern int swap_pager_full; extern struct blist *swapblist; void swap_pager_putpages (vm_object_t, struct vm_page **, int, boolean_t, int *); -boolean_t swap_pager_haspage (vm_object_t object, vm_pindex_t pindex, int *before, int *after); +boolean_t swap_pager_haspage (vm_object_t object, vm_pindex_t pindex); int swap_pager_swp_alloc (vm_object_t, int); void swap_pager_copy (vm_object_t, vm_object_t, vm_pindex_t, int); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 2c9fac87be..f1c66b8b2e 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -101,10 +101,6 @@ #include #include -#define VM_FAULT_READ_AHEAD 8 -#define VM_FAULT_READ_BEHIND 7 -#define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1) - struct faultstate { vm_page_t m; vm_object_t object; @@ -124,15 +120,19 @@ struct faultstate { struct vnode *vp; }; -static int burst_fault = 1; -SYSCTL_INT(_vm, OID_AUTO, burst_fault, CTLFLAG_RW, &burst_fault, 0, ""); +static int vm_fast_fault = 1; +SYSCTL_INT(_vm, OID_AUTO, fast_fault, CTLFLAG_RW, &vm_fast_fault, 0, ""); static int debug_cluster = 0; SYSCTL_INT(_vm, OID_AUTO, debug_cluster, CTLFLAG_RW, &debug_cluster, 0, ""); static int vm_fault_object(struct faultstate *, vm_pindex_t, vm_prot_t); static int vm_fault_vpagetable(struct faultstate *, vm_pindex_t *, vpte_t, int); +#if 0 static int vm_fault_additional_pages (vm_page_t, int, int, vm_page_t *, int *); +#endif static int vm_fault_ratelimit(struct vmspace *); +static void vm_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry, + int prot); static __inline void release_page(struct faultstate *fs) @@ -377,12 +377,19 @@ RetryFault: * * Enter the page into the pmap and do pmap-related adjustments. */ - unlock_things(&fs); pmap_enter(fs.map->pmap, vaddr, fs.m, fs.prot, fs.wired); - if (((fs.fault_flags & VM_FAULT_WIRE_MASK) == 0) && (fs.wired == 0)) { - pmap_prefault(fs.map->pmap, vaddr, fs.entry); + /* + * Burst in a few more pages if possible. The fs.map should still + * be locked. + */ + if (fault_flags & VM_FAULT_BURST) { + if ((fs.fault_flags & VM_FAULT_WIRE_MASK) == 0 && + fs.wired == 0) { + vm_prefault(fs.map->pmap, vaddr, fs.entry, fs.prot); + } } + unlock_things(&fs); vm_page_flag_clear(fs.m, PG_ZERO); vm_page_flag_set(fs.m, PG_REFERENCED); @@ -877,9 +884,7 @@ vm_fault_object(struct faultstate *fs, vm_pindex_t first_pindex, vm_prot_t fault_type) { vm_object_t next_object; - vm_page_t marray[VM_FAULT_READ]; vm_pindex_t pindex; - int faultcount; fs->prot = fs->first_prot; fs->object = fs->first_object; @@ -1049,10 +1054,9 @@ vm_fault_object(struct faultstate *fs, readrest: /* - * We have found a valid page or we have allocated a new page. - * The page thus may not be valid or may not be entirely - * valid. Even if entirely valid we may have hit a read-ahead - * mark and desire to keep the pipeline going. + * We have found an invalid or partially valid page, a + * potentially fully valid page with a read-ahead mark, + * or we have allocated a new page. * * Attempt to fault-in the page if there is a chance that the * pager has it, and potentially fault in additional pages @@ -1064,124 +1068,106 @@ readrest: if (TRYPAGER(fs)) { int rv; - int reqpage; - int ahead, behind; + int seqaccess; u_char behavior = vm_map_entry_behavior(fs->entry); - if (behavior == MAP_ENTRY_BEHAV_RANDOM) { - ahead = 0; - behind = 0; - } else { - behind = pindex; - KKASSERT(behind >= 0); - if (behind > VM_FAULT_READ_BEHIND) - behind = VM_FAULT_READ_BEHIND; - - ahead = fs->object->size - pindex; - if (ahead < 1) - ahead = 1; - if (ahead > VM_FAULT_READ_AHEAD) - ahead = VM_FAULT_READ_AHEAD; - } + if (behavior == MAP_ENTRY_BEHAV_RANDOM) + seqaccess = 0; + else + seqaccess = -1; + /* + * If sequential access is detected then attempt + * to deactivate/cache pages behind the scan to + * prevent resource hogging. + * + * Use of PG_RAM to detect sequential access + * also simulates multi-zone sequential access + * detection for free. + * + * NOTE: Partially valid dirty pages cannot be + * deactivated without causing NFS picemeal + * writes to barf. + */ if ((fs->first_object->type != OBJT_DEVICE) && (behavior == MAP_ENTRY_BEHAV_SEQUENTIAL || (behavior != MAP_ENTRY_BEHAV_RANDOM && - pindex >= fs->entry->lastr && - pindex < fs->entry->lastr + VM_FAULT_READ)) + (fs->m->flags & PG_RAM))) ) { - vm_pindex_t firstpindex, tmppindex; - - if (first_pindex < 2 * VM_FAULT_READ) - firstpindex = 0; - else - firstpindex = first_pindex - 2 * VM_FAULT_READ; + vm_pindex_t scan_pindex; + int scan_count = 16; + + if (first_pindex < 16) { + scan_pindex = 0; + scan_count = 0; + } else { + scan_pindex = first_pindex - 16; + if (scan_pindex < 16) + scan_count = scan_pindex; + else + scan_count = 16; + } - /* - * note: partially valid pages cannot be - * included in the lookahead - NFS piecemeal - * writes will barf on it badly. - * - * spl protection is required to avoid races - * between the lookup and an interrupt - * unbusy/free sequence occuring prior to - * our busy check. - */ crit_enter(); - for (tmppindex = first_pindex - 1; - tmppindex >= firstpindex; - --tmppindex - ) { + while (scan_count) { vm_page_t mt; - mt = vm_page_lookup(fs->first_object, tmppindex); - if (mt == NULL || (mt->valid != VM_PAGE_BITS_ALL)) + mt = vm_page_lookup(fs->first_object, + scan_pindex); + if (mt == NULL || + (mt->valid != VM_PAGE_BITS_ALL)) { break; + } if (mt->busy || - (mt->flags & (PG_BUSY | PG_FICTITIOUS | PG_UNMANAGED)) || - mt->hold_count || - mt->wire_count) - continue; + (mt->flags & (PG_BUSY | PG_FICTITIOUS | PG_UNMANAGED)) || + mt->hold_count || + mt->wire_count) { + goto skip; + } if (mt->dirty == 0) vm_page_test_dirty(mt); if (mt->dirty) { vm_page_busy(mt); - vm_page_protect(mt, VM_PROT_NONE); + vm_page_protect(mt, + VM_PROT_NONE); vm_page_deactivate(mt); vm_page_wakeup(mt); } else { vm_page_cache(mt); } +skip: + --scan_count; + --scan_pindex; } crit_exit(); - ahead += behind; - behind = 0; + seqaccess = 1; } /* - * now we find out if any other pages should be paged - * in at this time this routine checks to see if the - * pages surrounding this fault reside in the same - * object as the page for this fault. If they do, - * then they are faulted in also into the object. The - * array "marray" returned contains an array of - * vm_page_t structs where one of them is the - * vm_page_t passed to the routine. The reqpage - * return value is the index into the marray for the - * vm_page_t passed to the routine. - * - * fs.m plus the additional pages are PG_BUSY'd. - */ - faultcount = vm_fault_additional_pages( - fs->m, behind, ahead, marray, &reqpage); - - /* - * update lastr imperfectly (we do not know how much - * getpages will actually read), but good enough. + * Avoid deadlocking against the map when doing I/O. + * fs.object and the page is PG_BUSY'd. */ - fs->entry->lastr = pindex + faultcount - behind; + unlock_map(fs); /* - * Call the pager to retrieve the data, if any, after - * releasing the lock on the map. We hold a ref on - * fs.object and the pages are PG_BUSY'd. + * Acquire the page data. We still hold a ref on + * fs.object and the page has been PG_BUSY's. + * + * The pager may replace the page (for example, in + * order to enter a fictitious page into the + * object). If it does so it is responsible for + * cleaning up the passed page and properly setting + * the new page PG_BUSY. */ - unlock_map(fs); - - if (faultcount) { - rv = vm_pager_get_pages(fs->object, marray, - faultcount, reqpage); + if (vm_pager_has_page(fs->object, pindex)) { + rv = vm_pager_get_page(fs->object, &fs->m, + seqaccess); } else { rv = VM_PAGER_FAIL; } if (rv == VM_PAGER_OK) { - /* - * Found the page. Leave it busy while we play - * with it. - */ - /* * Relookup in case pager changed page. Pager * is responsible for disposition of old page @@ -1220,20 +1206,21 @@ readrest: else kprintf("vm_fault: pager read error, thread %p (%s)\n", curthread, curproc->p_comm); } + /* * Data outside the range of the pager or an I/O error * * The page may have been wired during the pagein, * e.g. by the buffer cache, and cannot simply be - * freed. Call vnode_pager_freepag() to deal with it. + * freed. Call vnode_pager_freepage() to deal with it. */ /* * XXX - the check for kernel_map is a kludge to work * around having the machine panic on a kernel space * fault w/ I/O error. */ - if (((fs->map != &kernel_map) && (rv == VM_PAGER_ERROR)) || - (rv == VM_PAGER_BAD)) { + if (((fs->map != &kernel_map) && + (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { vnode_pager_freepage(fs->m); fs->m = NULL; unlock_and_deallocate(fs); @@ -1301,19 +1288,17 @@ readrest: } } - KASSERT((fs->m->flags & PG_BUSY) != 0, - ("vm_fault: not busy after main loop")); - /* * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock * is held.] - */ - - /* + * * If the page is being written, but isn't already owned by the * top-level object, we have to copy it into a new page owned by the * top-level object. */ + KASSERT((fs->m->flags & PG_BUSY) != 0, + ("vm_fault: not busy after main loop")); + if (fs->object != fs->first_object) { /* * We only really need to copy if we want to write it. @@ -1700,6 +1685,7 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map, } } +#if 0 /* * This routine checks around the requested page for other pages that @@ -1843,3 +1829,180 @@ vm_fault_additional_pages(vm_page_t m, int rbehind, int rahead, return (i); } + +#endif + +/* + * vm_prefault() provides a quick way of clustering pagefaults into a + * processes address space. It is a "cousin" of pmap_object_init_pt, + * except it runs at page fault time instead of mmap time. + * + * This code used to be per-platform pmap_prefault(). It is now + * machine-independent and enhanced to also pre-fault zero-fill pages + * (see vm.fast_fault) as well as make them writable, which greatly + * reduces the number of page faults programs incur. + * + * Application performance when pre-faulting zero-fill pages is heavily + * dependent on the application. Very tiny applications like /bin/echo + * lose a little performance while applications of any appreciable size + * gain performance. Prefaulting multiple pages also reduces SMP + * congestion and can improve SMP performance significantly. + * + * NOTE! prot may allow writing but this only applies to the top level + * object. If we wind up mapping a page extracted from a backing + * object we have to make sure it is read-only. + * + * NOTE! The caller has already handled any COW operations on the + * vm_map_entry via the normal fault code. Do NOT call this + * shortcut unless the normal fault code has run on this entry. + */ +#define PFBAK 4 +#define PFFOR 4 +#define PAGEORDER_SIZE (PFBAK+PFFOR) + +static int vm_prefault_pageorder[] = { + -PAGE_SIZE, PAGE_SIZE, + -2 * PAGE_SIZE, 2 * PAGE_SIZE, + -3 * PAGE_SIZE, 3 * PAGE_SIZE, + -4 * PAGE_SIZE, 4 * PAGE_SIZE +}; + +static void +vm_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry, int prot) +{ + struct lwp *lp; + vm_page_t m; + vm_offset_t starta; + vm_offset_t addr; + vm_pindex_t index; + vm_pindex_t pindex; + vm_object_t object; + int pprot; + int i; + + /* + * We do not currently prefault mappings that use virtual page + * tables. We do not prefault foreign pmaps. + */ + if (entry->maptype == VM_MAPTYPE_VPAGETABLE) + return; + lp = curthread->td_lwp; + if (lp == NULL || (pmap != vmspace_pmap(lp->lwp_vmspace))) + return; + + object = entry->object.vm_object; + + starta = addra - PFBAK * PAGE_SIZE; + if (starta < entry->start) + starta = entry->start; + else if (starta > addra) + starta = 0; + + /* + * critical section protection is required to maintain the + * page/object association, interrupts can free pages and remove + * them from their objects. + */ + crit_enter(); + for (i = 0; i < PAGEORDER_SIZE; i++) { + vm_object_t lobject; + + addr = addra + vm_prefault_pageorder[i]; + if (addr > addra + (PFFOR * PAGE_SIZE)) + addr = 0; + + if (addr < starta || addr >= entry->end) + continue; + + if (pmap_prefault_ok(pmap, addr) == 0) + continue; + + /* + * Follow the VM object chain to obtain the page to be mapped + * into the pmap. + * + * If we reach the terminal object without finding a page + * and we determine it would be advantageous, then allocate + * a zero-fill page for the base object. The base object + * is guaranteed to be OBJT_DEFAULT for this case. + */ + index = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; + lobject = object; + pindex = index; + pprot = prot; + + while ((m = vm_page_lookup(lobject, pindex)) == NULL) { + if (lobject->type != OBJT_DEFAULT) + break; + if (lobject->backing_object == NULL) { + if (vm_fast_fault == 0) + break; + if (vm_prefault_pageorder[i] < 0 || + (prot & VM_PROT_WRITE) == 0 || + vm_page_count_min(0)) { + break; + } + m = vm_page_alloc(object, index, + VM_ALLOC_NORMAL | VM_ALLOC_ZERO); + + if ((m->flags & PG_ZERO) == 0) { + vm_page_zero_fill(m); + } else { + vm_page_flag_clear(m, PG_ZERO); + mycpu->gd_cnt.v_ozfod++; + } + mycpu->gd_cnt.v_zfod++; + m->valid = VM_PAGE_BITS_ALL; + vm_page_wakeup(m); + pprot = prot; + /* lobject = object .. not needed */ + break; + } + if (lobject->backing_object_offset & PAGE_MASK) + break; + pindex += lobject->backing_object_offset >> PAGE_SHIFT; + lobject = lobject->backing_object; + pprot &= ~VM_PROT_WRITE; + } + /* + * NOTE: lobject now invalid (if we did a zero-fill we didn't + * bother assigning lobject = object). + * + * Give-up if the page is not available. + */ + if (m == NULL) + break; + + /* + * Do not conditionalize on PG_RAM. If pages are present in + * the VM system we assume optimal caching. If caching is + * not optimal the I/O gravy train will be restarted when we + * hit an unavailable page. We do not want to try to restart + * the gravy train now because we really don't know how much + * of the object has been cached. The cost for restarting + * the gravy train should be low (since accesses will likely + * be I/O bound anyway). + * + * The object must be marked dirty if we are mapping a + * writable page. + */ + if (pprot & VM_PROT_WRITE) + vm_object_set_writeable_dirty(m->object); + + /* + * Enter the page into the pmap if appropriate. + */ + if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && + (m->busy == 0) && + (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { + + if ((m->queue - m->pc) == PQ_CACHE) { + vm_page_deactivate(m); + } + vm_page_busy(m); + pmap_enter(pmap, addr, m, pprot, 0); + vm_page_wakeup(m); + } + } + crit_exit(); +} diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index dc9b8e96f1..e33e98c6c2 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -148,7 +148,6 @@ struct vm_map_entry { vm_prot_t max_protection; /* maximum protection */ vm_inherit_t inheritance; /* inheritance */ int wired_count; /* can be paged if = 0 */ - vm_pindex_t lastr; /* last read */ }; #define MAP_ENTRY_NOSYNC 0x0001 @@ -421,6 +420,7 @@ vmspace_resident_count(struct vmspace *vmspace) #define VM_FAULT_NORMAL 0x00 /* Nothing special */ #define VM_FAULT_CHANGE_WIRING 0x01 /* Change the wiring as appropriate */ #define VM_FAULT_USER_WIRE 0x02 /* Likewise, but for user purposes */ +#define VM_FAULT_BURST 0x04 /* Burst fault can be done */ #define VM_FAULT_DIRTY 0x08 /* Dirty the page */ #define VM_FAULT_WIRE_MASK (VM_FAULT_CHANGE_WIRING|VM_FAULT_USER_WIRE) diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index c4401e5afe..e8515d5b1e 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -1198,9 +1198,8 @@ vm_object_backing_scan_callback(vm_page_t p, void *data) */ pp = vm_page_lookup(object, new_pindex); - if ( - (pp == NULL || pp->valid == 0) && - !vm_pager_has_page(object, new_pindex, NULL, NULL) + if ((pp == NULL || pp->valid == 0) && + !vm_pager_has_page(object, new_pindex) ) { info->error = 0; /* problemo */ return(-1); /* stop the scan */ @@ -1273,10 +1272,7 @@ vm_object_backing_scan_callback(vm_page_t p, void *data) } pp = vm_page_lookup(object, new_pindex); - if ( - pp != NULL || - vm_pager_has_page(object, new_pindex, NULL, NULL) - ) { + if (pp != NULL || vm_pager_has_page(object, new_pindex)) { /* * page already exists in parent OR swap exists * for this location in the parent. Destroy diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c index 388a3c4e03..65bc20ec77 100644 --- a/sys/vm/vm_pager.c +++ b/sys/vm/vm_pager.c @@ -99,14 +99,14 @@ extern struct pagerops physpagerops; int cluster_pbuf_freecnt = -1; /* unlimited to begin with */ -static int dead_pager_getpages (vm_object_t, vm_page_t *, int, int); +static int dead_pager_getpage (vm_object_t, vm_page_t *, int); static vm_object_t dead_pager_alloc (void *, off_t, vm_prot_t, off_t); static void dead_pager_putpages (vm_object_t, vm_page_t *, int, int, int *); -static boolean_t dead_pager_haspage (vm_object_t, vm_pindex_t, int *, int *); +static boolean_t dead_pager_haspage (vm_object_t, vm_pindex_t); static void dead_pager_dealloc (vm_object_t); static int -dead_pager_getpages(vm_object_t obj, vm_page_t *ma, int count, int req) +dead_pager_getpage(vm_object_t obj, vm_page_t *mpp, int seqaccess) { return VM_PAGER_FAIL; } @@ -129,12 +129,8 @@ dead_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags, } static int -dead_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *prev, int *next) +dead_pager_haspage(vm_object_t object, vm_pindex_t pindex) { - if (prev) - *prev = 0; - if (next) - *next = 0; return FALSE; } @@ -148,7 +144,7 @@ static struct pagerops deadpagerops = { NULL, dead_pager_alloc, dead_pager_dealloc, - dead_pager_getpages, + dead_pager_getpage, dead_pager_putpages, dead_pager_haspage, NULL diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h index e216876ad6..9ba68a9515 100644 --- a/sys/vm/vm_pager.h +++ b/sys/vm/vm_pager.h @@ -66,9 +66,9 @@ struct pagerops { void (*pgo_init) (void); /* Initialize pager. */ vm_object_t (*pgo_alloc) (void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t); /* Allocate pager. */ void (*pgo_dealloc) (vm_object_t); /* Disassociate. */ - int (*pgo_getpages) (vm_object_t, vm_page_t *, int, int); /* Get (read) page. */ - void (*pgo_putpages) (vm_object_t, vm_page_t *, int, int, int *); /* Put (write) page. */ - boolean_t (*pgo_haspage) (vm_object_t, vm_pindex_t, int *, int *); /* Does pager have page? */ + int (*pgo_getpage) (vm_object_t, vm_page_t *, int); + void (*pgo_putpages) (vm_object_t, vm_page_t *, int, int, int *); + boolean_t (*pgo_haspage) (vm_object_t, vm_pindex_t); void (*pgo_pageunswapped) (vm_page_t); void (*pgo_strategy) (vm_object_t, struct bio *); }; @@ -109,8 +109,8 @@ extern struct pagerops *pagertab[]; vm_object_t vm_pager_allocate (objtype_t, void *, off_t, vm_prot_t, off_t); void vm_pager_bufferinit (void); void vm_pager_deallocate (vm_object_t); -static __inline int vm_pager_get_pages (vm_object_t, vm_page_t *, int, int); -static __inline boolean_t vm_pager_has_page (vm_object_t, vm_pindex_t, int *, int *); +static __inline int vm_pager_get_page (vm_object_t, vm_page_t *, int); +static __inline boolean_t vm_pager_has_page (vm_object_t, vm_pindex_t); void vm_pager_init (void); vm_object_t vm_pager_object_lookup (struct pagerlst *, void *); void vm_pager_sync (void); @@ -121,25 +121,23 @@ void waitchainbuf(struct buf *bp, int count, int done); void autochaindone(struct buf *bp); /* - * vm_page_get_pages: + * vm_page_get_pages: * - * Retrieve pages from the VM system in order to map them into an object - * ( or into VM space somewhere ). If the pagein was successful, we - * must fully validate it. + * Retrieve the contents of the page from the object pager. Note that the + * object pager might replace the page. + * + * If the pagein was successful, we must fully validate it so it can be + * memory mapped. */ static __inline int -vm_pager_get_pages( - vm_object_t object, - vm_page_t *m, - int count, - int reqpage -) { +vm_pager_get_page(vm_object_t object, vm_page_t *m, int seqaccess) +{ int r; - r = (*pagertab[object->type]->pgo_getpages)(object, m, count, reqpage); - if (r == VM_PAGER_OK && m[reqpage]->valid != VM_PAGE_BITS_ALL) { - vm_page_zero_invalid(m[reqpage], TRUE); + r = (*pagertab[object->type]->pgo_getpage)(object, m, seqaccess); + if (r == VM_PAGER_OK && (*m)->valid != VM_PAGE_BITS_ALL) { + vm_page_zero_invalid(*m, TRUE); } return(r); } @@ -168,13 +166,9 @@ vm_pager_put_pages( */ static __inline boolean_t -vm_pager_has_page( - vm_object_t object, - vm_pindex_t offset, - int *before, - int *after -) { - return ((*pagertab[object->type]->pgo_haspage) (object, offset, before, after)); +vm_pager_has_page(vm_object_t object, vm_pindex_t offset) +{ + return ((*pagertab[object->type]->pgo_haspage)(object, offset)); } /* diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index fdf9bf01b1..62b91cb046 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -75,15 +75,15 @@ #include static void vnode_pager_dealloc (vm_object_t); -static int vnode_pager_getpages (vm_object_t, vm_page_t *, int, int); +static int vnode_pager_getpage (vm_object_t, vm_page_t *, int); static void vnode_pager_putpages (vm_object_t, vm_page_t *, int, boolean_t, int *); -static boolean_t vnode_pager_haspage (vm_object_t, vm_pindex_t, int *, int *); +static boolean_t vnode_pager_haspage (vm_object_t, vm_pindex_t); struct pagerops vnodepagerops = { NULL, vnode_pager_alloc, vnode_pager_dealloc, - vnode_pager_getpages, + vnode_pager_getpage, vnode_pager_putpages, vnode_pager_haspage, NULL @@ -192,8 +192,7 @@ vnode_pager_dealloc(vm_object_t object) * not including the requested page. */ static boolean_t -vnode_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, - int *after) +vnode_pager_haspage(vm_object_t object, vm_pindex_t pindex) { struct vnode *vp = object->handle; off_t loffset; @@ -222,6 +221,8 @@ vnode_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, voff = loffset % bsize; /* + * XXX + * * BMAP returns byte counts before and after, where after * is inclusive of the base page. haspage must return page * counts before and after where after does not include the @@ -231,28 +232,11 @@ vnode_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, * compatibility. The base page is still considered valid if * no error is returned. */ - error = VOP_BMAP(vp, loffset - voff, &doffset, after, before, 0); - if (error) { - if (before) - *before = 0; - if (after) - *after = 0; + error = VOP_BMAP(vp, loffset - voff, &doffset, NULL, NULL, 0); + if (error) return TRUE; - } if (doffset == NOOFFSET) return FALSE; - - if (before) { - *before = (*before + voff) >> PAGE_SHIFT; - } - if (after) { - *after -= voff; - if (loffset + *after > vp->v_filesize) - *after = vp->v_filesize - loffset; - *after >>= PAGE_SHIFT; - if (*after < 0) - *after = 0; - } return TRUE; } @@ -402,14 +386,13 @@ vnode_pager_freepage(vm_page_t m) * backing vp's VOP_GETPAGES. */ static int -vnode_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) +vnode_pager_getpage(vm_object_t object, vm_page_t *mpp, int seqaccess) { int rtval; struct vnode *vp; - int bytes = count * PAGE_SIZE; vp = object->handle; - rtval = VOP_GETPAGES(vp, m, bytes, reqpage, 0); + rtval = VOP_GETPAGES(vp, mpp, PAGE_SIZE, 0, 0); if (rtval == EOPNOTSUPP) panic("vnode_pager: vfs's must implement vop_getpages\n"); return rtval; @@ -425,7 +408,7 @@ vnode_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) * pages. Just construct and issue a READ. */ int -vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount, +vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *mpp, int bytecount, int reqpage) { struct iovec aiov; @@ -468,11 +451,11 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount, * entire range is past file EOF discard everything and generate * a pagein error. */ - foff = IDX_TO_OFF(m[0]->pindex); + foff = IDX_TO_OFF(mpp[0]->pindex); if (foff >= vp->v_filesize) { for (i = 0; i < count; i++) { if (i != reqpage) - vnode_pager_freepage(m[i]); + vnode_pager_freepage(mpp[i]); } return VM_PAGER_ERROR; } @@ -483,7 +466,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount, while (count > i) { --count; if (count != reqpage) - vnode_pager_freepage(m[count]); + vnode_pager_freepage(mpp[count]); } } @@ -505,7 +488,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount, * Severe hack to avoid deadlocks with the buffer cache */ for (i = 0; i < count; ++i) { - vm_page_t mt = m[i]; + vm_page_t mt = mpp[i]; vm_page_io_start(mt); vm_page_wakeup(mt); @@ -518,7 +501,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount, /* if (bytecount > PAGE_SIZE)*/ ioflags |= IO_SEQMAX << IO_SEQSHIFT; - aiov.iov_base = (caddr_t) 0; + aiov.iov_base = NULL; aiov.iov_len = bytecount; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; @@ -536,7 +519,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount, * Severe hack to avoid deadlocks with the buffer cache */ for (i = 0; i < count; ++i) { - vm_page_t mt = m[i]; + vm_page_t mt = mpp[i]; while (vm_page_sleep_busy(mt, FALSE, "getpgs")) ; @@ -551,7 +534,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount, bytecount -= auio.uio_resid; for (i = 0; i < count; ++i) { - vm_page_t mt = m[i]; + vm_page_t mt = mpp[i]; if (i != reqpage) { if (error == 0 && mt->valid) { @@ -578,7 +561,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount, } } if (error) { - kprintf("vnode_pager_getpages: I/O read error\n"); + kprintf("vnode_pager_getpage: I/O read error\n"); } return (error ? VM_PAGER_ERROR : VM_PAGER_OK); } -- 2.41.0