Close an interrupt race between vm_page_lookup() and (typically) a
authorMatthew Dillon <dillon@dragonflybsd.org>
Thu, 13 May 2004 17:40:19 +0000 (17:40 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Thu, 13 May 2004 17:40:19 +0000 (17:40 +0000)
vm_page_sleep_busy() check by using the correct spl protection.
An interrupt can occur inbetween the two operations and unbusy/free
the page in question, causing the busy check to fail and for the code
to fall through and then operate on a page that may have been freed
and possibly even reused.   Also note that vm_page_grab() had the same
issue between the lookup, busy check, and vm_page_busy() call.

Close an interrupt race when scanning a VM object's memq.  Interrupts
can free pages, removing them from memq, which interferes with memq scans
and can cause a page unassociated with the object to be processed as if it
were associated with the object.

Calls to vm_page_hold() and vm_page_unhold() require spl protection.

Rename the passed socket descriptor argument in sendfile() to make the
code more readable.

Fix several serious bugs in procfs_rwmem().  In particular, force it to
block if a page is busy and then retry.

Get rid of vm_pager_map_pag() and vm_pager_unmap_page(), make the functions
that used to use these routines use SFBUF's instead.

Get rid of the (userland?) 4MB page mapping feature in pmap_object_init_pt()
for now.  The code appears to not track the page directory properly and
could result in a non-zero page being freed as PG_ZERO.

This commit also includes updated code comments and some additional
non-operational code cleanups.

19 files changed:
sys/i386/i386/pmap.c
sys/kern/kern_exec.c
sys/kern/kern_exit.c
sys/kern/kern_xio.c
sys/kern/uipc_syscalls.c
sys/kern/vfs_bio.c
sys/platform/pc32/i386/pmap.c
sys/sys/xio.h
sys/vfs/procfs/procfs_mem.c
sys/vm/vm_fault.c
sys/vm/vm_map.c
sys/vm/vm_mmap.c
sys/vm/vm_object.c
sys/vm/vm_page.c
sys/vm/vm_page.h
sys/vm/vm_pageout.c
sys/vm/vm_pager.c
sys/vm/vm_pager.h
sys/vm/vnode_pager.c

index b891a45..8db1a05 100644 (file)
@@ -40,7 +40,7 @@
  *
  *     from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
  * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $
- * $DragonFly: src/sys/i386/i386/Attic/pmap.c,v 1.38 2004/05/05 22:09:16 dillon Exp $
+ * $DragonFly: src/sys/i386/i386/Attic/pmap.c,v 1.39 2004/05/13 17:40:14 dillon Exp $
  */
 
 /*
@@ -863,15 +863,24 @@ pmap_qremove(vm_offset_t va, int count)
 #endif
 }
 
+/*
+ * This routine works like vm_page_lookup() but also blocks as long as the
+ * page is busy.  This routine does not busy the page it returns.
+ *
+ * Unless the caller is managing objects whos pages are in a known state,
+ * the call should be made at splvm() so the page's object association
+ * remains valid on return.
+ */
 static vm_page_t
 pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
 {
        vm_page_t m;
+
 retry:
        m = vm_page_lookup(object, pindex);
        if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
                goto retry;
-       return m;
+       return(m);
 }
 
 /*
@@ -930,14 +939,20 @@ pmap_swapout_proc(struct proc *p)
 {
 #if 0
        int i;
+       int s;
        vm_object_t upobj;
        vm_page_t m;
 
        upobj = p->p_upages_obj;
+
        /*
-        * let the upages be paged
+        * Unwiring the pages allow them to be paged to their backing store
+        * (swap).
+        *
+        * splvm() protection not required since nobody will be messing with
+        * the pages but us.
         */
-       for(i=0;i<UPAGES;i++) {
+       for (i = 0; i < UPAGES; i++) {
                if ((m = vm_page_lookup(upobj, i)) == NULL)
                        panic("pmap_swapout_proc: upage already missing???");
                vm_page_dirty(m);
@@ -958,9 +973,12 @@ pmap_swapin_proc(struct proc *p)
        vm_object_t upobj;
        vm_page_t m;
 
+       /*
+        * splvm() protection not required since nobody will be messing with
+        * the pages but us.
+        */
        upobj = p->p_upages_obj;
-       for(i=0;i<UPAGES;i++) {
-
+       for (i = 0; i < UPAGES; i++) {
                m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
                pmap_kenter((vm_offset_t)p->p_addr + (i * PAGE_SIZE),
@@ -973,7 +991,6 @@ pmap_swapin_proc(struct proc *p)
                        m = vm_page_lookup(upobj, i);
                        m->valid = VM_PAGE_BITS_ALL;
                }
-
                vm_page_wire(m);
                vm_page_wakeup(m);
                vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
@@ -1327,6 +1344,7 @@ pmap_release(struct pmap *pmap)
        vm_page_t p,n,ptdpg;
        vm_object_t object = pmap->pm_pteobj;
        int curgeneration;
+       int s;
 
 #if defined(DIAGNOSTIC)
        if (object->ref_count != 1)
@@ -1335,6 +1353,7 @@ pmap_release(struct pmap *pmap)
        
        ptdpg = NULL;
 retry:
+       s = splvm();
        curgeneration = object->generation;
        for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
                n = TAILQ_NEXT(p, listq);
@@ -1344,10 +1363,13 @@ retry:
                }
                while (1) {
                        if (!pmap_release_free_page(pmap, p) &&
-                               (object->generation != curgeneration))
+                           (object->generation != curgeneration)) {
+                               splx(s);
                                goto retry;
+                       }
                }
        }
+       splx(s);
 
        if (ptdpg && !pmap_release_free_page(pmap, ptdpg))
                goto retry;
@@ -2197,10 +2219,11 @@ pmap_kenter_temporary(vm_paddr_t pa, int i)
 }
 
 #define MAX_INIT_PT (96)
+
 /*
- * pmap_object_init_pt preloads the ptes for a given object
- * into the specified pmap.  This eliminates the blast of soft
- * faults on process startup and immediately after an mmap.
+ * This routine preloads the ptes for a given object into the specified pmap.
+ * This eliminates the blast of soft faults on process startup and
+ * immediately after an mmap.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot,
@@ -2211,19 +2234,27 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot,
        int psize;
        vm_page_t p, mpte;
        int objpgs;
+       int s;
 
        if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL)
                return;
 
+#if 0
+       /* 
+        * XXX you must be joking, entering PTE's into a user page table
+        * without any accounting?  This could result in the page table
+        * being freed while it still contains mappings (free with PG_ZERO
+        * assumption leading to a non-zero page being marked PG_ZERO).
+        */
        /*
         * This code maps large physical mmap regions into the
         * processor address space.  Note that some shortcuts
         * are taken, but the code works.
         */
        if (pseflag &&
-               (object->type == OBJT_DEVICE) &&
-               ((addr & (NBPDR - 1)) == 0) &&
-               ((size & (NBPDR - 1)) == 0) ) {
+           (object->type == OBJT_DEVICE) &&
+           ((addr & (NBPDR - 1)) == 0) &&
+           ((size & (NBPDR - 1)) == 0) ) {
                int i;
                vm_page_t m[1];
                unsigned int ptepindex;
@@ -2262,9 +2293,9 @@ retry:
 
                pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
                npdes = size >> PDRSHIFT;
-               for(i=0;i<npdes;i++) {
+               for (i = 0; i < npdes; i++) {
                        pmap->pm_pdir[ptepindex] =
-                               (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
+                           (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
                        ptepa += NBPDR;
                        ptepindex += 1;
                }
@@ -2273,6 +2304,7 @@ retry:
                smp_invltlb();
                return;
        }
+#endif
 
        psize = i386_btop(size);
 
@@ -2288,26 +2320,30 @@ retry:
                psize = object->size - pindex;
        }
 
-       mpte = NULL;
+
        /*
-        * if we are processing a major portion of the object, then scan the
+        * If we are processing a major portion of the object, then scan the
         * entire thing.
+        *
+        * We cannot safely scan the object's memq unless we are at splvm(),
+        * since interrupts can remove pages from objects.
         */
+       s = splvm();
+       mpte = NULL;
        if (psize > (object->resident_page_count >> 2)) {
                objpgs = psize;
 
                for (p = TAILQ_FIRST(&object->memq);
-                   ((objpgs > 0) && (p != NULL));
-                   p = TAILQ_NEXT(p, listq)) {
-
+                   objpgs > 0 && p != NULL;
+                   p = TAILQ_NEXT(p, listq)
+               ) {
                        tmpidx = p->pindex;
-                       if (tmpidx < pindex) {
+                       if (tmpidx < pindex)
                                continue;
-                       }
                        tmpidx -= pindex;
-                       if (tmpidx >= psize) {
+                       if (tmpidx >= psize)
                                continue;
-                       }
+
                        /*
                         * don't allow an madvise to blow away our really
                         * free pages allocating pv entries.
@@ -2357,13 +2393,13 @@ retry:
                        }
                }
        }
+       splx(s);
 }
 
 /*
- * pmap_prefault provides a quick way of clustering
- * pagefaults into a processes address space.  It is a "cousin"
- * of pmap_object_init_pt, except it runs at page fault time instead
- * of mmap time.
+ * pmap_prefault provides a quick way of clustering pagefaults into a
+ * processes address space.  It is a "cousin" of pmap_object_init_pt, 
+ * except it runs at page fault time instead of mmap time.
  */
 #define PFBAK 4
 #define PFFOR 4
@@ -2380,6 +2416,7 @@ void
 pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
 {
        int i;
+       int s;
        vm_offset_t starta;
        vm_offset_t addr;
        vm_pindex_t pindex;
@@ -2392,13 +2429,18 @@ pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
        object = entry->object.vm_object;
 
        starta = addra - PFBAK * PAGE_SIZE;
-       if (starta < entry->start) {
+       if (starta < entry->start)
                starta = entry->start;
-       } else if (starta > addra) {
+       else if (starta > addra)
                starta = 0;
-       }
 
+       /*
+        * splvm() protection is required to maintain the page/object 
+        * association, interrupts can free pages and remove them from
+        * their objects.
+        */
        mpte = NULL;
+       s = splvm();
        for (i = 0; i < PAGEORDER_SIZE; i++) {
                vm_object_t lobject;
                unsigned *pte;
@@ -2419,9 +2461,12 @@ pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
 
                pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
                lobject = object;
+
                for (m = vm_page_lookup(lobject, pindex);
-                   (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
-                   lobject = lobject->backing_object) {
+                   (!m && (lobject->type == OBJT_DEFAULT) &&
+                    (lobject->backing_object));
+                   lobject = lobject->backing_object
+               ) {
                        if (lobject->backing_object_offset & PAGE_MASK)
                                break;
                        pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
@@ -2447,6 +2492,7 @@ pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
                        vm_page_wakeup(m);
                }
        }
+       splx(s);
 }
 
 /*
@@ -2510,6 +2556,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
        vm_offset_t pdnxt;
        unsigned src_frame, dst_frame;
        vm_page_t m;
+       int s;
 
        if (dst_addr != src_addr)
                return;
@@ -2529,7 +2576,13 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
        pmap_inval_add(&info, dst_pmap, -1);
        pmap_inval_add(&info, src_pmap, -1);
 
-       for(addr = src_addr; addr < end_addr; addr = pdnxt) {
+       /*
+        * splvm() protection is required to maintain the page/object
+        * association, interrupts can free pages and remove them from 
+        * their objects.
+        */
+       s = splvm();
+       for (addr = src_addr; addr < end_addr; addr = pdnxt) {
                unsigned *src_pte, *dst_pte;
                vm_page_t dstmpte, srcmpte;
                vm_offset_t srcptepaddr;
@@ -2607,6 +2660,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
                        dst_pte++;
                }
        }
+       splx(s);
        pmap_inval_flush(&info);
 }      
 
index 2cf23c8..d4bd73c 100644 (file)
@@ -24,7 +24,7 @@
  * SUCH DAMAGE.
  *
  * $FreeBSD: src/sys/kern/kern_exec.c,v 1.107.2.15 2002/07/30 15:40:46 nectar Exp $
- * $DragonFly: src/sys/kern/kern_exec.c,v 1.26 2004/05/10 10:37:46 hmp Exp $
+ * $DragonFly: src/sys/kern/kern_exec.c,v 1.27 2004/05/13 17:40:15 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -493,53 +493,64 @@ exec_map_first_page(struct image_params *imgp)
        int s, rv, i;
        int initial_pagein;
        vm_page_t ma[VM_INITIAL_PAGEIN];
+       vm_page_t m;
        vm_object_t object;
 
        if (imgp->firstpage)
                exec_unmap_first_page(imgp);
 
        VOP_GETVOBJECT(imgp->vp, &object);
-       s = splvm();
 
-       ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
+       /*
+        * We shouldn't need protection for vm_page_grab() but we certainly
+        * need it for the lookup loop below (lookup/busy race), since
+        * an interrupt can unbusy and free the page before our busy check.
+        */
+       s = splvm();
+       m = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
-       if ((ma[0]->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
+       if ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
+               ma[0] = m;
                initial_pagein = VM_INITIAL_PAGEIN;
                if (initial_pagein > object->size)
                        initial_pagein = object->size;
                for (i = 1; i < initial_pagein; i++) {
-                       if ((ma[i] = vm_page_lookup(object, i)) != NULL) {
-                               if ((ma[i]->flags & PG_BUSY) || ma[i]->busy)
+                       if ((m = vm_page_lookup(object, i)) != NULL) {
+                               if ((m->flags & PG_BUSY) || m->busy)
                                        break;
-                               if (ma[i]->valid)
+                               if (m->valid)
                                        break;
-                               vm_page_busy(ma[i]);
+                               vm_page_busy(m);
                        } else {
-                               ma[i] = vm_page_alloc(object, i, VM_ALLOC_NORMAL);
-                               if (ma[i] == NULL)
+                               m = vm_page_alloc(object, i, VM_ALLOC_NORMAL);
+                               if (m == NULL)
                                        break;
                        }
+                       ma[i] = m;
                }
                initial_pagein = i;
 
+               /*
+                * get_pages unbusies all the requested pages except the
+                * primary page (at index 0 in this case).
+                */
                rv = vm_pager_get_pages(object, ma, initial_pagein, 0);
-               ma[0] = vm_page_lookup(object, 0);
+               m = vm_page_lookup(object, 0);
 
-               if ((rv != VM_PAGER_OK) || (ma[0] == NULL) || (ma[0]->valid == 0)) {
-                       if (ma[0]) {
-                               vm_page_protect(ma[0], VM_PROT_NONE);
-                               vm_page_free(ma[0]);
+               if (rv != VM_PAGER_OK || m == NULL || m->valid == 0) {
+                       if (m) {
+                               vm_page_protect(m, VM_PROT_NONE);
+                               vm_page_free(m);
                        }
                        splx(s);
                        return EIO;
                }
        }
-
-       vm_page_hold(ma[0]);
-       vm_page_wakeup(ma[0]);
+       vm_page_hold(m);
+       vm_page_wakeup(m);      /* unbusy the page */
        splx(s);
 
-       imgp->firstpage = sf_buf_alloc(ma[0], SFBA_QUICK);
+       imgp->firstpage = sf_buf_alloc(m, SFBA_QUICK);
        imgp->image_header = (void *)sf_buf_kva(imgp->firstpage);
 
        return 0;
@@ -550,7 +561,9 @@ exec_unmap_first_page(imgp)
        struct image_params *imgp;
 {
        vm_page_t m;
+       int s;
 
+       s = splvm();
        if (imgp->firstpage != NULL) {
                m = sf_buf_page(imgp->firstpage);
                sf_buf_free(imgp->firstpage);
@@ -558,6 +571,7 @@ exec_unmap_first_page(imgp)
                imgp->image_header = NULL;
                vm_page_unhold(m);
        }
+       splx(s);
 }
 
 /*
index cd08d69..319a309 100644 (file)
@@ -37,7 +37,7 @@
  *
  *     @(#)kern_exit.c 8.7 (Berkeley) 2/12/94
  * $FreeBSD: src/sys/kern/kern_exit.c,v 1.92.2.11 2003/01/13 22:51:16 dillon Exp $
- * $DragonFly: src/sys/kern/kern_exit.c,v 1.33 2004/03/30 19:14:11 dillon Exp $
+ * $DragonFly: src/sys/kern/kern_exit.c,v 1.34 2004/05/13 17:40:15 dillon Exp $
  */
 
 #include "opt_compat.h"
@@ -200,6 +200,8 @@ exit1(int rv)
         */
        semexit(p);
 
+       KKASSERT(p->p_numposixlocks == 0);
+
        /* The next two chunks should probably be moved to vmspace_exit. */
        vm = p->p_vmspace;
 
index 14f2a25..c52ddea 100644 (file)
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $DragonFly: src/sys/kern/kern_xio.c,v 1.4 2004/04/03 08:20:10 dillon Exp $
+ * $DragonFly: src/sys/kern/kern_xio.c,v 1.5 2004/05/13 17:40:15 dillon Exp $
  */
 /*
  * Kernel XIO interface.  An initialized XIO is basically a collection of
@@ -65,6 +65,9 @@
  * Initialize an XIO given a userspace buffer.  0 is returned on success,
  * an error code on failure.  The actual number of bytes that could be
  * accomodated in the XIO will be stored in xio_bytes.
+ *
+ * Note that you cannot legally accessed a previously cached linmap with 
+ * a newly initialized xio until after calling xio_linmap().
  */
 int
 xio_init_ubuf(xio_t xio, void *ubase, size_t ubytes, int flags)
@@ -74,6 +77,7 @@ xio_init_ubuf(xio_t xio, void *ubase, size_t ubytes, int flags)
     vm_page_t m;
     int i;
     int n;
+    int s;
     int vmprot;
 
     addr = trunc_page((vm_offset_t)ubase);
@@ -94,8 +98,10 @@ xio_init_ubuf(xio_t xio, void *ubase, size_t ubytes, int flags)
                break;
            if ((paddr = pmap_kextract(addr)) == 0)
                break;
+           s = splvm();
            m = PHYS_TO_VM_PAGE(paddr);
            vm_page_hold(m);
+           splx(s);
            xio->xio_pages[i] = m;
            ubytes -= n;
            xio->xio_bytes += n;
@@ -123,6 +129,9 @@ xio_init_ubuf(xio_t xio, void *ubase, size_t ubytes, int flags)
  * accomodated in the XIO will be stored in xio_bytes.
  *
  * vmprot is usually either VM_PROT_READ or VM_PROT_WRITE.
+ *
+ * Note that you cannot legally accessed a previously cached linmap with 
+ * a newly initialized xio until after calling xio_linmap().
  */
 int
 xio_init_kbuf(xio_t xio, void *kbase, size_t kbytes)
@@ -132,6 +141,7 @@ xio_init_kbuf(xio_t xio, void *kbase, size_t kbytes)
     vm_page_t m;
     int i;
     int n;
+    int s;
 
     addr = trunc_page((vm_offset_t)kbase);
     xio->xio_flags = 0;
@@ -144,8 +154,10 @@ xio_init_kbuf(xio_t xio, void *kbase, size_t kbytes)
     for (i = 0; n && i < XIO_INTERNAL_PAGES; ++i) {
        if ((paddr = pmap_kextract(addr)) == 0)
            break;
+       s = splvm();
        m = PHYS_TO_VM_PAGE(paddr);
        vm_page_hold(m);
+       splx(s);
        xio->xio_pages[i] = m;
        kbytes -= n;
        xio->xio_bytes += n;
@@ -166,16 +178,31 @@ xio_init_kbuf(xio_t xio, void *kbase, size_t kbytes)
     return(xio->xio_error);
 }
 
+/*
+ * Cleanup an XIO so it can be destroyed.  The pages associated with the
+ * XIO are released.  If a linear mapping buffer is active, it will be
+ * unlocked but the mappings will be left intact for optimal reconstitution
+ * in a later xio_linmap() call.
+ *
+ * Note that you cannot legally accessed the linmap on a released XIO.
+ */
 void
 xio_release(xio_t xio)
 {
     int i;
+    int s;
     vm_page_t m;
 
+    s = splvm();
     for (i = 0; i < xio->xio_npages; ++i) {
        m = xio->xio_pages[i];
        vm_page_unhold(m);
     }
+    splx(s);
+    if (xio->xio_flags & XIOF_LINMAP) {
+       xio->xio_flags &= ~XIOF_LINMAP;
+       /* XXX */
+    }
     xio->xio_offset = 0;
     xio->xio_npages = 0;
     xio->xio_bytes = 0;
index ade3b90..b10df9a 100644 (file)
@@ -35,7 +35,7 @@
  *
  *     @(#)uipc_syscalls.c     8.4 (Berkeley) 2/21/94
  * $FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.65.2.17 2003/04/04 17:11:16 tegge Exp $
- * $DragonFly: src/sys/kern/uipc_syscalls.c,v 1.34 2004/05/09 00:32:41 hsu Exp $
+ * $DragonFly: src/sys/kern/uipc_syscalls.c,v 1.35 2004/05/13 17:40:15 dillon Exp $
  */
 
 #include "opt_ktrace.h"
@@ -1422,7 +1422,7 @@ done:
 }
 
 int
-kern_sendfile(struct vnode *vp, int s, off_t offset, size_t nbytes,
+kern_sendfile(struct vnode *vp, int sfd, off_t offset, size_t nbytes,
     struct mbuf *mheader, off_t *sbytes, int flags)
 {
        struct thread *td = curthread;
@@ -1436,12 +1436,13 @@ kern_sendfile(struct vnode *vp, int s, off_t offset, size_t nbytes,
        off_t off, xfsize;
        off_t hbytes = 0;
        int error = 0;
+       int s;
 
        if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
                error = EINVAL;
                goto done;
        }
-       error = holdsock(p->p_fd, s, &fp);
+       error = holdsock(p->p_fd, sfd, &fp);
        if (error)
                goto done;
        so = (struct socket *)fp->f_data;
@@ -1505,29 +1506,27 @@ retry_lookup:
                /*
                 * Attempt to look up the page.  
                 *
-                *      Allocate if not found
-                *
-                *      Wait and loop if busy.
+                *      Allocate if not found, wait and loop if busy, then
+                *      wire the page.  splvm() protection is required to
+                *      maintain the object association (an interrupt can
+                *      free the page) through to the vm_page_wire() call.
                 */
+               s = splvm();
                pg = vm_page_lookup(obj, pindex);
-
                if (pg == NULL) {
                        pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
                        if (pg == NULL) {
                                VM_WAIT;
+                               splx(s);
                                goto retry_lookup;
                        }
                        vm_page_wakeup(pg);
                } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) {
+                       splx(s);
                        goto retry_lookup;
                }
-
-               /*
-                * Wire the page so it does not get ripped out from under
-                * us. 
-                */
-
                vm_page_wire(pg);
+               splx(s);
 
                /*
                 * If page is not valid for what we need, initiate I/O
index 27e3931..b8dae35 100644 (file)
@@ -12,7 +12,7 @@
  *             John S. Dyson.
  *
  * $FreeBSD: src/sys/kern/vfs_bio.c,v 1.242.2.20 2003/05/28 18:38:10 alc Exp $
- * $DragonFly: src/sys/kern/vfs_bio.c,v 1.24 2004/05/10 10:51:31 hmp Exp $
+ * $DragonFly: src/sys/kern/vfs_bio.c,v 1.25 2004/05/13 17:40:15 dillon Exp $
  */
 
 /*
@@ -1125,7 +1125,10 @@ brelse(struct buf * bp)
                        vm_page_flag_clear(m, PG_ZERO);
                        /*
                         * If we hit a bogus page, fixup *all* of them
-                        * now.
+                        * now.  Note that we left these pages wired
+                        * when we removed them so they had better exist,
+                        * and they cannot be ripped out from under us so
+                        * no splvm() protection is necessary.
                         */
                        if (m == bogus_page) {
                                VOP_GETVOBJECT(vp, &obj);
@@ -1704,6 +1707,10 @@ restart:
                LIST_REMOVE(bp, b_hash);
                LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 
+               /*
+                * spl protection not required when scrapping a buffer's
+                * contents because it is already wired.
+                */
                if (bp->b_bufsize)
                        allocbuf(bp, 0);
 
@@ -1974,11 +1981,14 @@ incore(struct vnode * vp, daddr_t blkno)
 }
 
 /*
- * Returns true if no I/O is needed to access the
- * associated VM object.  This is like incore except
- * it also hunts around in the VM system for the data.
+ * Returns true if no I/O is needed to access the associated VM object.
+ * This is like incore except it also hunts around in the VM system for
+ * the data.
+ *
+ * Note that we ignore vm_page_free() races from interrupts against our
+ * lookup, since if the caller is not protected our return value will not
+ * be any more valid then otherwise once we splx().
  */
-
 int
 inmem(struct vnode * vp, daddr_t blkno)
 {
@@ -2357,6 +2367,9 @@ loop:
 /*
  * Get an empty, disassociated buffer of given size.  The buffer is initially
  * set to B_INVAL.
+ *
+ * spl protection is not required for the allocbuf() call because races are
+ * impossible here.
  */
 struct buf *
 geteblk(int size)
@@ -2389,8 +2402,10 @@ geteblk(int size)
  *
  * allocbuf() only adjusts B_CACHE for VMIO buffers.  getblk() deals with
  * B_CACHE for the non-VMIO case.
+ *
+ * This routine does not need to be called at splbio() but you must own the
+ * buffer.
  */
-
 int
 allocbuf(struct buf *bp, int size)
 {
@@ -2548,17 +2563,23 @@ allocbuf(struct buf *bp, int size)
                        vm_object_t obj;
                        vm_offset_t toff;
                        vm_offset_t tinc;
+                       int s;
 
                        /*
                         * Step 1, bring in the VM pages from the object, 
                         * allocating them if necessary.  We must clear
                         * B_CACHE if these pages are not valid for the 
                         * range covered by the buffer.
+                        *
+                        * spl protection is required to protect against
+                        * interrupts unbusying and freeing pages between
+                        * our vm_page_lookup() and our busycheck/wiring
+                        * call.
                         */
-
                        vp = bp->b_vp;
                        VOP_GETVOBJECT(vp, &obj);
 
+                       s = splbio();
                        while (bp->b_npages < desiredpages) {
                                vm_page_t m;
                                vm_pindex_t pi;
@@ -2615,6 +2636,7 @@ allocbuf(struct buf *bp, int size)
                                bp->b_pages[bp->b_npages] = m;
                                ++bp->b_npages;
                        }
+                       splx(s);
 
                        /*
                         * Step 2.  We've loaded the pages into the buffer,
@@ -2822,7 +2844,10 @@ biodone(struct buf * bp)
                                resid = iosize;
 
                        /*
-                        * cleanup bogus pages, restoring the originals
+                        * cleanup bogus pages, restoring the originals.  Since
+                        * the originals should still be wired, we don't have
+                        * to worry about interrupt/freeing races destroying
+                        * the VM object association.
                         */
                        m = bp->b_pages[i];
                        if (m == bogus_page) {
@@ -2921,6 +2946,12 @@ vfs_unbusy_pages(struct buf * bp)
                for (i = 0; i < bp->b_npages; i++) {
                        vm_page_t m = bp->b_pages[i];
 
+                       /*
+                        * When restoring bogus changes the original pages
+                        * should still be wired, so we are in no danger of
+                        * losing the object association and do not need
+                        * spl protection particularly.
+                        */
                        if (m == bogus_page) {
                                m = vm_page_lookup(obj, OFF_TO_IDX(bp->b_offset) + i);
                                if (!m) {
index a76a10d..cc53a0e 100644 (file)
@@ -40,7 +40,7 @@
  *
  *     from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
  * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $
- * $DragonFly: src/sys/platform/pc32/i386/pmap.c,v 1.38 2004/05/05 22:09:16 dillon Exp $
+ * $DragonFly: src/sys/platform/pc32/i386/pmap.c,v 1.39 2004/05/13 17:40:14 dillon Exp $
  */
 
 /*
@@ -863,15 +863,24 @@ pmap_qremove(vm_offset_t va, int count)
 #endif
 }
 
+/*
+ * This routine works like vm_page_lookup() but also blocks as long as the
+ * page is busy.  This routine does not busy the page it returns.
+ *
+ * Unless the caller is managing objects whos pages are in a known state,
+ * the call should be made at splvm() so the page's object association
+ * remains valid on return.
+ */
 static vm_page_t
 pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
 {
        vm_page_t m;
+
 retry:
        m = vm_page_lookup(object, pindex);
        if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
                goto retry;
-       return m;
+       return(m);
 }
 
 /*
@@ -930,14 +939,20 @@ pmap_swapout_proc(struct proc *p)
 {
 #if 0
        int i;
+       int s;
        vm_object_t upobj;
        vm_page_t m;
 
        upobj = p->p_upages_obj;
+
        /*
-        * let the upages be paged
+        * Unwiring the pages allow them to be paged to their backing store
+        * (swap).
+        *
+        * splvm() protection not required since nobody will be messing with
+        * the pages but us.
         */
-       for(i=0;i<UPAGES;i++) {
+       for (i = 0; i < UPAGES; i++) {
                if ((m = vm_page_lookup(upobj, i)) == NULL)
                        panic("pmap_swapout_proc: upage already missing???");
                vm_page_dirty(m);
@@ -958,9 +973,12 @@ pmap_swapin_proc(struct proc *p)
        vm_object_t upobj;
        vm_page_t m;
 
+       /*
+        * splvm() protection not required since nobody will be messing with
+        * the pages but us.
+        */
        upobj = p->p_upages_obj;
-       for(i=0;i<UPAGES;i++) {
-
+       for (i = 0; i < UPAGES; i++) {
                m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
                pmap_kenter((vm_offset_t)p->p_addr + (i * PAGE_SIZE),
@@ -973,7 +991,6 @@ pmap_swapin_proc(struct proc *p)
                        m = vm_page_lookup(upobj, i);
                        m->valid = VM_PAGE_BITS_ALL;
                }
-
                vm_page_wire(m);
                vm_page_wakeup(m);
                vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
@@ -1327,6 +1344,7 @@ pmap_release(struct pmap *pmap)
        vm_page_t p,n,ptdpg;
        vm_object_t object = pmap->pm_pteobj;
        int curgeneration;
+       int s;
 
 #if defined(DIAGNOSTIC)
        if (object->ref_count != 1)
@@ -1335,6 +1353,7 @@ pmap_release(struct pmap *pmap)
        
        ptdpg = NULL;
 retry:
+       s = splvm();
        curgeneration = object->generation;
        for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
                n = TAILQ_NEXT(p, listq);
@@ -1344,10 +1363,13 @@ retry:
                }
                while (1) {
                        if (!pmap_release_free_page(pmap, p) &&
-                               (object->generation != curgeneration))
+                           (object->generation != curgeneration)) {
+                               splx(s);
                                goto retry;
+                       }
                }
        }
+       splx(s);
 
        if (ptdpg && !pmap_release_free_page(pmap, ptdpg))
                goto retry;
@@ -2197,10 +2219,11 @@ pmap_kenter_temporary(vm_paddr_t pa, int i)
 }
 
 #define MAX_INIT_PT (96)
+
 /*
- * pmap_object_init_pt preloads the ptes for a given object
- * into the specified pmap.  This eliminates the blast of soft
- * faults on process startup and immediately after an mmap.
+ * This routine preloads the ptes for a given object into the specified pmap.
+ * This eliminates the blast of soft faults on process startup and
+ * immediately after an mmap.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot,
@@ -2211,19 +2234,27 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot,
        int psize;
        vm_page_t p, mpte;
        int objpgs;
+       int s;
 
        if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL)
                return;
 
+#if 0
+       /* 
+        * XXX you must be joking, entering PTE's into a user page table
+        * without any accounting?  This could result in the page table
+        * being freed while it still contains mappings (free with PG_ZERO
+        * assumption leading to a non-zero page being marked PG_ZERO).
+        */
        /*
         * This code maps large physical mmap regions into the
         * processor address space.  Note that some shortcuts
         * are taken, but the code works.
         */
        if (pseflag &&
-               (object->type == OBJT_DEVICE) &&
-               ((addr & (NBPDR - 1)) == 0) &&
-               ((size & (NBPDR - 1)) == 0) ) {
+           (object->type == OBJT_DEVICE) &&
+           ((addr & (NBPDR - 1)) == 0) &&
+           ((size & (NBPDR - 1)) == 0) ) {
                int i;
                vm_page_t m[1];
                unsigned int ptepindex;
@@ -2262,9 +2293,9 @@ retry:
 
                pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
                npdes = size >> PDRSHIFT;
-               for(i=0;i<npdes;i++) {
+               for (i = 0; i < npdes; i++) {
                        pmap->pm_pdir[ptepindex] =
-                               (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
+                           (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
                        ptepa += NBPDR;
                        ptepindex += 1;
                }
@@ -2273,6 +2304,7 @@ retry:
                smp_invltlb();
                return;
        }
+#endif
 
        psize = i386_btop(size);
 
@@ -2288,26 +2320,30 @@ retry:
                psize = object->size - pindex;
        }
 
-       mpte = NULL;
+
        /*
-        * if we are processing a major portion of the object, then scan the
+        * If we are processing a major portion of the object, then scan the
         * entire thing.
+        *
+        * We cannot safely scan the object's memq unless we are at splvm(),
+        * since interrupts can remove pages from objects.
         */
+       s = splvm();
+       mpte = NULL;
        if (psize > (object->resident_page_count >> 2)) {
                objpgs = psize;
 
                for (p = TAILQ_FIRST(&object->memq);
-                   ((objpgs > 0) && (p != NULL));
-                   p = TAILQ_NEXT(p, listq)) {
-
+                   objpgs > 0 && p != NULL;
+                   p = TAILQ_NEXT(p, listq)
+               ) {
                        tmpidx = p->pindex;
-                       if (tmpidx < pindex) {
+                       if (tmpidx < pindex)
                                continue;
-                       }
                        tmpidx -= pindex;
-                       if (tmpidx >= psize) {
+                       if (tmpidx >= psize)
                                continue;
-                       }
+
                        /*
                         * don't allow an madvise to blow away our really
                         * free pages allocating pv entries.
@@ -2357,13 +2393,13 @@ retry:
                        }
                }
        }
+       splx(s);
 }
 
 /*
- * pmap_prefault provides a quick way of clustering
- * pagefaults into a processes address space.  It is a "cousin"
- * of pmap_object_init_pt, except it runs at page fault time instead
- * of mmap time.
+ * pmap_prefault provides a quick way of clustering pagefaults into a
+ * processes address space.  It is a "cousin" of pmap_object_init_pt, 
+ * except it runs at page fault time instead of mmap time.
  */
 #define PFBAK 4
 #define PFFOR 4
@@ -2380,6 +2416,7 @@ void
 pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
 {
        int i;
+       int s;
        vm_offset_t starta;
        vm_offset_t addr;
        vm_pindex_t pindex;
@@ -2392,13 +2429,18 @@ pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
        object = entry->object.vm_object;
 
        starta = addra - PFBAK * PAGE_SIZE;
-       if (starta < entry->start) {
+       if (starta < entry->start)
                starta = entry->start;
-       } else if (starta > addra) {
+       else if (starta > addra)
                starta = 0;
-       }
 
+       /*
+        * splvm() protection is required to maintain the page/object 
+        * association, interrupts can free pages and remove them from
+        * their objects.
+        */
        mpte = NULL;
+       s = splvm();
        for (i = 0; i < PAGEORDER_SIZE; i++) {
                vm_object_t lobject;
                unsigned *pte;
@@ -2419,9 +2461,12 @@ pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
 
                pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
                lobject = object;
+
                for (m = vm_page_lookup(lobject, pindex);
-                   (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
-                   lobject = lobject->backing_object) {
+                   (!m && (lobject->type == OBJT_DEFAULT) &&
+                    (lobject->backing_object));
+                   lobject = lobject->backing_object
+               ) {
                        if (lobject->backing_object_offset & PAGE_MASK)
                                break;
                        pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
@@ -2447,6 +2492,7 @@ pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
                        vm_page_wakeup(m);
                }
        }
+       splx(s);
 }
 
 /*
@@ -2510,6 +2556,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
        vm_offset_t pdnxt;
        unsigned src_frame, dst_frame;
        vm_page_t m;
+       int s;
 
        if (dst_addr != src_addr)
                return;
@@ -2529,7 +2576,13 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
        pmap_inval_add(&info, dst_pmap, -1);
        pmap_inval_add(&info, src_pmap, -1);
 
-       for(addr = src_addr; addr < end_addr; addr = pdnxt) {
+       /*
+        * splvm() protection is required to maintain the page/object
+        * association, interrupts can free pages and remove them from 
+        * their objects.
+        */
+       s = splvm();
+       for (addr = src_addr; addr < end_addr; addr = pdnxt) {
                unsigned *src_pte, *dst_pte;
                vm_page_t dstmpte, srcmpte;
                vm_offset_t srcptepaddr;
@@ -2607,6 +2660,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
                        dst_pte++;
                }
        }
+       splx(s);
        pmap_inval_flush(&info);
 }      
 
index 8adb236..23929eb 100644 (file)
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $DragonFly: src/sys/sys/xio.h,v 1.2 2004/04/09 12:51:20 joerg Exp $
+ * $DragonFly: src/sys/sys/xio.h,v 1.3 2004/05/13 17:40:17 dillon Exp $
  */
 
 /*
@@ -62,6 +62,7 @@ typedef struct xio *xio_t;
 
 #define XIOF_READ      0x0001
 #define XIOF_WRITE     0x0002
+#define XIOF_LINMAP    0x0004
 
 #endif
 
index 77fe926..400f009 100644 (file)
@@ -38,7 +38,7 @@
  *     @(#)procfs_mem.c        8.5 (Berkeley) 6/15/94
  *
  * $FreeBSD: src/sys/miscfs/procfs/procfs_mem.c,v 1.46.2.3 2002/01/22 17:22:59 nectar Exp $
- * $DragonFly: src/sys/vfs/procfs/procfs_mem.c,v 1.8 2004/05/02 03:05:11 cpressey Exp $
+ * $DragonFly: src/sys/vfs/procfs/procfs_mem.c,v 1.9 2004/05/13 17:40:19 dillon Exp $
  */
 
 /*
@@ -73,7 +73,6 @@ procfs_rwmem(struct proc *curp, struct proc *p, struct uio *uio)
        int writing;
        struct vmspace *vm;
        vm_map_t map;
-       vm_object_t object = NULL;
        vm_offset_t pageno = 0;         /* page number */
        vm_prot_t reqprot;
        vm_offset_t kva;
@@ -109,10 +108,10 @@ procfs_rwmem(struct proc *curp, struct proc *p, struct uio *uio)
                vm_prot_t out_prot;
                boolean_t wired;
                vm_pindex_t pindex;
+               vm_object_t object;
                u_int len;
                vm_page_t m;
-
-               object = NULL;
+               int s;
 
                uva = (vm_offset_t) uio->uio_offset;
 
@@ -149,77 +148,67 @@ procfs_rwmem(struct proc *curp, struct proc *p, struct uio *uio)
 
                if (error) {
                        error = EFAULT;
-
-                       /*
-                        * Make sure that there is no residue in 'object' from
-                        * an error return on vm_map_lookup.
-                        */
-                       object = NULL;
-
                        break;
                }
 
+               /*
+                * spl protection is required to avoid interrupt freeing
+                * races, reference the object to avoid it being ripped
+                * out from under us if we block.
+                */
+               s = splvm();
+               vm_object_reference(object);
+again:
                m = vm_page_lookup(object, pindex);
 
-               /* Allow fallback to backing objects if we are reading */
-
+               /*
+                * Allow fallback to backing objects if we are reading
+                */
                while (m == NULL && !writing && object->backing_object) {
+                       pindex += OFF_TO_IDX(object->backing_object_offset);
+                       object = object->backing_object;
+                       m = vm_page_lookup(object, pindex);
+               }
 
-                 pindex += OFF_TO_IDX(object->backing_object_offset);
-                 object = object->backing_object;
-
-                 m = vm_page_lookup(object, pindex);
+               /*
+                * Wait for any I/O's to complete, then hold the page
+                * so we can release the spl.
+                */
+               if (m) {
+                       if (vm_page_sleep_busy(m, FALSE, "rwmem"))
+                               goto again;
+                       vm_page_hold(m);
                }
+               splx(s);
 
+               /*
+                * We no longer need the object.  If we do not have a page
+                * then cleanup.
+                */
+               vm_object_deallocate(object);
                if (m == NULL) {
-                       error = EFAULT;
-
-                       /*
-                        * Make sure that there is no residue in 'object' from
-                        * an error return on vm_map_lookup.
-                        */
-                       object = NULL;
-
                        vm_map_lookup_done(tmap, out_entry, 0);
-
+                       error = EFAULT;
                        break;
                }
 
                /*
-                * Wire the page into memory
+                * Cleanup tmap then create a temporary KVA mapping and
+                * do the I/O.
                 */
-               vm_page_hold(m);
-
-               /*
-                * We're done with tmap now.
-                * But reference the object first, so that we won't loose
-                * it.
-                */
-               vm_object_reference(object);
                vm_map_lookup_done(tmap, out_entry, 0);
-
                pmap_kenter(kva, VM_PAGE_TO_PHYS(m));
-
-               /*
-                * Now do the i/o move.
-                */
                error = uiomove((caddr_t)(kva + page_offset), len, uio);
-
                pmap_kremove(kva);
 
                /*
-                * release the page and the object
+                * release the page and we are done
                 */
+               s = splbio();
                vm_page_unhold(m);
-               vm_object_deallocate(object);
-
-               object = NULL;
-
+               splx(s);
        } while (error == 0 && uio->uio_resid > 0);
 
-       if (object)
-               vm_object_deallocate(object);
-
        kmem_free(kernel_map, kva, PAGE_SIZE);
        vmspace_free(vm);
        return (error);
index e8986a8..20c0385 100644 (file)
@@ -67,7 +67,7 @@
  * rights to redistribute these changes.
  *
  * $FreeBSD: src/sys/vm/vm_fault.c,v 1.108.2.8 2002/02/26 05:49:27 silby Exp $
- * $DragonFly: src/sys/vm/vm_fault.c,v 1.13 2004/03/29 17:30:23 drhodus Exp $
+ * $DragonFly: src/sys/vm/vm_fault.c,v 1.14 2004/05/13 17:40:19 dillon Exp $
  */
 
 /*
@@ -193,13 +193,13 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags)
        vm_page_t marray[VM_FAULT_READ];
        int hardfault;
        int faultcount;
+       int s;
        struct faultstate fs;
 
        mycpu->gd_cnt.v_vm_faults++;
        hardfault = 0;
 
-RetryFault:;
-
+RetryFault:
        /*
         * Find the backing store object and offset into it to begin the
         * search.
@@ -288,12 +288,15 @@ RetryFault:;
                }
 
                /*
-                * See if page is resident
+                * See if page is resident.  spl protection is required
+                * to avoid an interrupt unbusy/free race against our
+                * lookup.  We must hold the protection through a page
+                * allocation or busy.
                 */
-                       
+               s = splvm();
                fs.m = vm_page_lookup(fs.object, fs.pindex);
                if (fs.m != NULL) {
-                       int queue, s;
+                       int queue;
                        /*
                         * Wait/Retry if the page is busy.  We have to do this
                         * if the page is busy via either PG_BUSY or 
@@ -312,21 +315,21 @@ RetryFault:;
                         */
                        if ((fs.m->flags & PG_BUSY) || fs.m->busy) {
                                unlock_things(&fs);
-                               (void)vm_page_sleep_busy(fs.m, TRUE, "vmpfw");
+                               vm_page_sleep_busy(fs.m, TRUE, "vmpfw");
                                mycpu->gd_cnt.v_intrans++;
                                vm_object_deallocate(fs.first_object);
+                               splx(s);
                                goto RetryFault;
                        }
 
                        queue = fs.m->queue;
-                       s = splvm();
                        vm_page_unqueue_nowakeup(fs.m);
-                       splx(s);
 
                        if ((queue - fs.m->pc) == PQ_CACHE && vm_page_count_severe()) {
                                vm_page_activate(fs.m);
                                unlock_and_deallocate(&fs);
                                VM_WAITPFAULT;
+                               splx(s);
                                goto RetryFault;
                        }
 
@@ -335,9 +338,14 @@ RetryFault:;
                         * pagedaemon.  If it still isn't completely valid
                         * (readable), jump to readrest, else break-out ( we
                         * found the page ).
+                        *
+                        * We can release the spl once we have marked the
+                        * page busy.
                         */
 
                        vm_page_busy(fs.m);
+                       splx(s);
+
                        if (((fs.m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) &&
                                fs.m->object != kernel_object && fs.m->object != kmem_object) {
                                goto readrest;
@@ -349,10 +357,13 @@ RetryFault:;
                /*
                 * Page is not resident, If this is the search termination
                 * or the pager might contain the page, allocate a new page.
+                *
+                * note: we are still in splvm().
                 */
 
                if (TRYPAGER || fs.object == fs.first_object) {
                        if (fs.pindex >= fs.object->size) {
+                               splx(s);
                                unlock_and_deallocate(&fs);
                                return (KERN_PROTECTION_FAILURE);
                        }
@@ -366,11 +377,13 @@ RetryFault:;
                                    (fs.vp || fs.object->backing_object)? VM_ALLOC_NORMAL: VM_ALLOC_NORMAL | VM_ALLOC_ZERO);
                        }
                        if (fs.m == NULL) {
+                               splx(s);
                                unlock_and_deallocate(&fs);
                                VM_WAITPFAULT;
                                goto RetryFault;
                        }
                }
+               splx(s);
 
 readrest:
                /*
@@ -381,6 +394,9 @@ readrest:
                 * Attempt to fault-in the page if there is a chance that the
                 * pager has it, and potentially fault in additional pages
                 * at the same time.
+                *
+                * We are NOT in splvm here and if TRYPAGER is true then
+                * fs.m will be non-NULL and will be PG_BUSY for us.
                 */
 
                if (TRYPAGER) {
@@ -419,11 +435,17 @@ readrest:
                                 * note: partially valid pages cannot be 
                                 * included in the lookahead - NFS piecemeal
                                 * writes will barf on it badly.
+                                *
+                                * spl protection is required to avoid races
+                                * between the lookup and an interrupt
+                                * unbusy/free sequence occuring prior to
+                                * our busy check.
                                 */
-
-                               for(tmppindex = fs.first_pindex - 1;
-                                       tmppindex >= firstpindex;
-                                       --tmppindex) {
+                               s = splvm();
+                               for (tmppindex = fs.first_pindex - 1;
+                                   tmppindex >= firstpindex;
+                                   --tmppindex
+                               ) {
                                        vm_page_t mt;
                                        mt = vm_page_lookup( fs.first_object, tmppindex);
                                        if (mt == NULL || (mt->valid != VM_PAGE_BITS_ALL))
@@ -442,6 +464,7 @@ readrest:
                                                vm_page_cache(mt);
                                        }
                                }
+                               splx(s);
 
                                ahead += behind;
                                behind = 0;
@@ -491,9 +514,13 @@ readrest:
                                 * Relookup in case pager changed page. Pager
                                 * is responsible for disposition of old page
                                 * if moved.
+                                *
+                                * XXX other code segments do relookups too.
+                                * It's a bad abstraction that needs to be
+                                * fixed/removed.
                                 */
                                fs.m = vm_page_lookup(fs.object, fs.pindex);
-                               if(!fs.m) {
+                               if (fs.m == NULL) {
                                        unlock_and_deallocate(&fs);
                                        goto RetryFault;
                                }
@@ -1179,6 +1206,10 @@ vm_fault_additional_pages(vm_page_t m, int rbehind, int rahead,
 
        /*
         * scan backward for the read behind pages -- in memory 
+        *
+        * Assume that if the page is not found an interrupt will not
+        * create it.  Theoretically interrupts can only remove (busy)
+        * pages, not create new associations.
         */
        if (pindex > 0) {
                if (rbehind > pindex) {
index 5442e8d..6628b8b 100644 (file)
@@ -62,7 +62,7 @@
  * rights to redistribute these changes.
  *
  * $FreeBSD: src/sys/vm/vm_map.c,v 1.187.2.19 2003/05/27 00:47:02 alc Exp $
- * $DragonFly: src/sys/vm/vm_map.c,v 1.26 2004/04/26 20:26:59 dillon Exp $
+ * $DragonFly: src/sys/vm/vm_map.c,v 1.27 2004/05/13 17:40:19 dillon Exp $
  */
 
 /*
@@ -2489,11 +2489,20 @@ vm_map_split(vm_map_entry_t entry)
 
        for (idx = 0; idx < size; idx++) {
                vm_page_t m;
+               int ss;         /* s used */
 
+               /*
+                * splvm protection is required to avoid a race between
+                * the lookup and an interrupt/unbusy/free and our busy
+                * check.
+                */
+               ss = splvm();
        retry:
                m = vm_page_lookup(orig_object, offidxstart + idx);
-               if (m == NULL)
+               if (m == NULL) {
+                       splx(ss);
                        continue;
+               }
 
                /*
                 * We must wait for pending I/O to complete before we can
@@ -2504,11 +2513,11 @@ vm_map_split(vm_map_entry_t entry)
                 */
                if (vm_page_sleep_busy(m, TRUE, "spltwt"))
                        goto retry;
-                       
                vm_page_busy(m);
                vm_page_rename(m, new_object, idx);
                /* page automatically made dirty by rename and cache handled */
                vm_page_busy(m);
+               splx(ss);
        }
 
        if (orig_object->type == OBJT_SWAP) {
@@ -2522,11 +2531,14 @@ vm_map_split(vm_map_entry_t entry)
                vm_object_pip_wakeup(orig_object);
        }
 
+       /*
+        * Wakeup the pages we played with.  No spl protection is needed
+        * for a simple wakeup.
+        */
        for (idx = 0; idx < size; idx++) {
                m = vm_page_lookup(new_object, idx);
-               if (m) {
+               if (m)
                        vm_page_wakeup(m);
-               }
        }
 
        entry->object.vm_object = new_object;
@@ -3262,6 +3274,8 @@ vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry, int count)
  * Implement uiomove with VM operations.  This handles (and collateral changes)
  * support every combination of source object modification, and COW type
  * operations.
+ *
+ * XXX this is extremely dangerous, enabling this option is NOT recommended.
  */
 int
 vm_uiomove(vm_map_t mapa, vm_object_t srcobject, off_t cp, int cnta,
@@ -3278,6 +3292,7 @@ vm_uiomove(vm_map_t mapa, vm_object_t srcobject, off_t cp, int cnta,
        off_t ooffset;
        int cnt;
        int count;
+       int s;
 
        if (npages)
                *npages = 0;
@@ -3315,9 +3330,17 @@ vm_uiomove(vm_map_t mapa, vm_object_t srcobject, off_t cp, int cnta,
                oindex = OFF_TO_IDX(cp);
                if (npages) {
                        vm_pindex_t idx;
+
+                       /*
+                        * spl protection is needed to avoid a race between
+                        * the lookup and an interrupt/unbusy/free occuring
+                        * prior to our busy check.
+                        */
+                       s = splvm();
                        for (idx = 0; idx < osize; idx++) {
                                vm_page_t m;
                                if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) {
+                                       splx(s);
                                        vm_map_lookup_done(map, entry, count);
                                        return 0;
                                }
@@ -3327,10 +3350,12 @@ vm_uiomove(vm_map_t mapa, vm_object_t srcobject, off_t cp, int cnta,
                                 */
                                if ((m->flags & PG_BUSY) ||
                                        ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
+                                       splx(s);
                                        vm_map_lookup_done(map, entry, count);
                                        return 0;
                                }
                        }
+                       splx(s);
                }
 
 /*
@@ -3511,7 +3536,7 @@ vm_freeze_copyopts(vm_object_t object, vm_pindex_t froma, vm_pindex_t toa)
        if (object->shadow_count > object->ref_count)
                panic("vm_freeze_copyopts: sc > rc");
 
-       while((robject = LIST_FIRST(&object->shadow_head)) != NULL) {
+       while ((robject = LIST_FIRST(&object->shadow_head)) != NULL) {
                vm_pindex_t bo_pindex;
                vm_page_t m_in, m_out;
 
index b6bfc42..4d19496 100644 (file)
@@ -39,7 +39,7 @@
  *
  *     @(#)vm_mmap.c   8.4 (Berkeley) 1/12/94
  * $FreeBSD: src/sys/vm/vm_mmap.c,v 1.108.2.6 2002/07/02 20:06:19 dillon Exp $
- * $DragonFly: src/sys/vm/vm_mmap.c,v 1.19 2004/03/23 22:54:32 dillon Exp $
+ * $DragonFly: src/sys/vm/vm_mmap.c,v 1.20 2004/05/13 17:40:19 dillon Exp $
  */
 
 /*
@@ -698,7 +698,7 @@ RestartScan:
                /*
                 * scan this entry one page at a time
                 */
-               while(addr < cend) {
+               while (addr < cend) {
                        /*
                         * Check pmap first, it is likely faster, also
                         * it can provide info as to whether we are the
@@ -709,17 +709,24 @@ RestartScan:
                                vm_pindex_t pindex;
                                vm_ooffset_t offset;
                                vm_page_t m;
+                               int s;
+
                                /*
                                 * calculate the page index into the object
                                 */
                                offset = current->offset + (addr - current->start);
                                pindex = OFF_TO_IDX(offset);
-                               m = vm_page_lookup(current->object.vm_object,
-                                       pindex);
+
                                /*
-                                * if the page is resident, then gather information about
-                                * it.
+                                * if the page is resident, then gather 
+                                * information about it.  spl protection is
+                                * required to maintain the object 
+                                * association.  And XXX what if the page is
+                                * busy?  What's the deal with that?
                                 */
+                               s = splvm();
+                               m = vm_page_lookup(current->object.vm_object,
+                                                   pindex);
                                if (m && m->valid) {
                                        mincoreinfo = MINCORE_INCORE;
                                        if (m->dirty ||
@@ -731,6 +738,7 @@ RestartScan:
                                                mincoreinfo |= MINCORE_REFERENCED_OTHER;
                                        }
                                }
+                               splx(s);
                        }
 
                        /*
index 630d9e2..ab6e932 100644 (file)
@@ -62,7 +62,7 @@
  * rights to redistribute these changes.
  *
  * $FreeBSD: src/sys/vm/vm_object.c,v 1.171.2.8 2003/05/26 19:17:56 alc Exp $
- * $DragonFly: src/sys/vm/vm_object.c,v 1.15 2004/05/10 11:05:13 hmp Exp $
+ * $DragonFly: src/sys/vm/vm_object.c,v 1.16 2004/05/13 17:40:19 dillon Exp $
  */
 
 /*
@@ -503,8 +503,6 @@ vm_object_terminate(vm_object_t object)
  *     synchronous clustering mode implementation.
  *
  *     Odd semantics: if start == end, we clean everything.
- *
- *     The object must be locked.
  */
 
 void
@@ -519,6 +517,7 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
        int pagerflags;
        int curgeneration;
        lwkt_tokref vlock;
+       int s;
 
        if (object->type != OBJT_VNODE ||
                (object->flags & OBJ_MIGHTBEDIRTY) == 0)
@@ -560,6 +559,15 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
 
                scanlimit = scanreset;
                tscan = tstart;
+
+               /*
+                * spl protection is required despite the obj generation
+                * tracking because we cannot safely call vm_page_test_dirty()
+                * or avoid page field tests against an interrupt unbusy/free
+                * race that might occur prior to the busy check in
+                * vm_object_page_collect_flush().
+                */
+               s = splvm();
                while (tscan < tend) {
                        curgeneration = object->generation;
                        p = vm_page_lookup(object, tscan);
@@ -593,8 +601,10 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
                         * This returns 0 if it was unable to busy the first
                         * page (i.e. had to sleep).
                         */
-                       tscan += vm_object_page_collect_flush(object, p, curgeneration, pagerflags);
+                       tscan += vm_object_page_collect_flush(object, p, 
+                                               curgeneration, pagerflags);
                }
+               splx(s);
 
                /*
                 * If everything was dirty and we flushed it successfully,
@@ -616,17 +626,21 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
         * However, if this is a nosync mmap then the object is likely to 
         * stay dirty so do not mess with the page and do not clear the
         * object flags.
+        *
+        * spl protection is required because an interrupt can remove page
+        * from the object.
         */
-
        clearobjflags = 1;
 
-       for(p = TAILQ_FIRST(&object->memq); p; p = TAILQ_NEXT(p, listq)) {
+       s = splvm();
+       for (p = TAILQ_FIRST(&object->memq); p; p = TAILQ_NEXT(p, listq)) {
                vm_page_flag_set(p, PG_CLEANCHK);
                if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC))
                        clearobjflags = 0;
                else
                        vm_page_protect(p, VM_PROT_READ);
        }
+       splx(s);
 
        if (clearobjflags && (tstart == 0) && (tend == object->size)) {
                struct vnode *vp;
@@ -642,10 +656,19 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
                 }
        }
 
+       /*
+        * spl protection is required both to avoid an interrupt unbusy/free
+        * race against a vm_page_lookup(), and also to ensure that the
+        * memq is consistent.  We do not want a busy page to be ripped out
+        * from under us.
+        */
+       s = splvm();
 rescan:
+       splx(s);        /* give interrupts a chance */
+       s = splvm();
        curgeneration = object->generation;
 
-       for(p = TAILQ_FIRST(&object->memq); p; p = np) {
+       for (p = TAILQ_FIRST(&object->memq); p; p = np) {
                int n;
 
                np = TAILQ_NEXT(p, listq);
@@ -692,6 +715,7 @@ again:
                                goto again;
                }
        }
+       splx(s);
 
 #if 0
        VOP_FSYNC(vp, NULL, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc);
@@ -701,11 +725,21 @@ again:
        return;
 }
 
+/*
+ * This routine must be called at splvm() to properly avoid an interrupt
+ * unbusy/free race that can occur prior to the busy check.
+ *
+ * Using the object generation number here to detect page ripout is not
+ * the best idea in the world. XXX
+ *
+ * NOTE: we operate under the assumption that a page found to not be busy
+ * will not be ripped out from under us by an interrupt.  XXX we should
+ * recode this to explicitly busy the pages.
+ */
 static int
 vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags)
 {
        int runlen;
-       int s;
        int maxf;
        int chkb;
        int maxb;
@@ -715,11 +749,9 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration,
        vm_page_t mab[vm_pageout_page_count];
        vm_page_t ma[vm_pageout_page_count];
 
-       s = splvm();
        pi = p->pindex;
        while (vm_page_sleep_busy(p, TRUE, "vpcwai")) {
                if (object->generation != curgeneration) {
-                       splx(s);
                        return(0);
                }
        }
@@ -793,7 +825,6 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration,
        }
        runlen = maxb + maxf + 1;
 
-       splx(s);
        vm_pageout_flush(ma, runlen, pagerflags);
        for (i = 0; i < runlen; i++) {
                if (ma[i]->valid & ma[i]->dirty) {
@@ -826,11 +857,14 @@ static void
 vm_object_deactivate_pages(vm_object_t object)
 {
        vm_page_t p, next;
+       int s;
 
+       s = splvm();
        for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) {
                next = TAILQ_NEXT(p, listq);
                vm_page_deactivate(p);
        }
+       splx(s);
 }
 #endif
 
@@ -845,22 +879,28 @@ vm_object_deactivate_pages(vm_object_t object)
  * NOTE: If the page is already at VM_PROT_NONE, calling
  * vm_page_protect will have no effect.
  */
-
 void
 vm_object_pmap_copy_1(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
 {
        vm_pindex_t idx;
        vm_page_t p;
+       int s;
 
        if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0)
                return;
 
+       /*
+        * spl protection needed to prevent races between the lookup,
+        * an interrupt unbusy/free, and our protect call.
+        */
+       s = splvm();
        for (idx = start; idx < end; idx++) {
                p = vm_page_lookup(object, idx);
                if (p == NULL)
                        continue;
                vm_page_protect(p, VM_PROT_READ);
        }
+       splx(s);
 }
 
 /*
@@ -875,15 +915,24 @@ void
 vm_object_pmap_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
 {
        vm_page_t p;
+       int s;
 
        if (object == NULL)
                return;
+
+       /*
+        * spl protection is required because an interrupt can unbusy/free
+        * a page.
+        */
+       s = splvm();
        for (p = TAILQ_FIRST(&object->memq);
-               p != NULL;
-               p = TAILQ_NEXT(p, listq)) {
+           p != NULL;
+           p = TAILQ_NEXT(p, listq)
+       ) {
                if (p->pindex >= start && p->pindex < end)
                        vm_page_protect(p, VM_PROT_NONE);
        }
+       splx(s);
        if ((start == 0) && (object->size == end))
                vm_object_clear_flag(object, OBJ_WRITEABLE);
 }
@@ -915,6 +964,7 @@ vm_object_madvise(vm_object_t object, vm_pindex_t pindex, int count, int advise)
        vm_pindex_t end, tpindex;
        vm_object_t tobject;
        vm_page_t m;
+       int s;
 
        if (object == NULL)
                return;
@@ -942,6 +992,12 @@ shadowlookup:
                        }
                }
 
+               /*
+                * spl protection is required to avoid a race between the
+                * lookup, an interrupt unbusy/free, and our busy check.
+                */
+
+               s = splvm();
                m = vm_page_lookup(tobject, tpindex);
 
                if (m == NULL) {
@@ -954,6 +1010,7 @@ shadowlookup:
                        /*
                         * next object
                         */
+                       splx(s);
                        tobject = tobject->backing_object;
                        if (tobject == NULL)
                                continue;
@@ -973,11 +1030,20 @@ shadowlookup:
                    (m->flags & PG_UNMANAGED) ||
                    m->valid != VM_PAGE_BITS_ALL
                ) {
+                       splx(s);
                        continue;
                }
 
-               if (vm_page_sleep_busy(m, TRUE, "madvpo"))
+               if (vm_page_sleep_busy(m, TRUE, "madvpo")) {
+                       splx(s);
                        goto relookup;
+               }
+               splx(s);
+
+               /*
+                * Theoretically once a page is known not to be busy, an
+                * interrupt cannot come along and rip it out from under us.
+                */
 
                if (advise == MADV_WILLNEED) {
                        vm_page_activate(m);
@@ -1094,6 +1160,11 @@ vm_object_backing_scan(vm_object_t object, int op)
        vm_object_t backing_object;
        vm_pindex_t backing_offset_index;
 
+       /*
+        * spl protection is required to avoid races between the memq/lookup,
+        * an interrupt doing an unbusy/free, and our busy check.  Amoung
+        * other things.
+        */
        s = splvm();
 
        backing_object = object->backing_object;
@@ -1493,8 +1564,6 @@ vm_object_collapse(vm_object_t object)
  *
  *     Removes all physical pages in the specified
  *     object range from the object's list of pages.
- *
- *     The object must be locked.
  */
 void
 vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
@@ -1503,9 +1572,9 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
        vm_page_t p, next;
        unsigned int size;
        int all;
+       int s;
 
-       if (object == NULL ||
-           object->resident_page_count == 0)
+       if (object == NULL || object->resident_page_count == 0)
                return;
 
        all = ((end == 0) && (start == 0));
@@ -1515,9 +1584,17 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
         * remove pages from the object (we must instead remove the page
         * references, and then destroy the object).
         */
-       KASSERT(object->type != OBJT_PHYS, ("attempt to remove pages from a physical object"));
+       KASSERT(object->type != OBJT_PHYS, 
+               ("attempt to remove pages from a physical object"));
 
+       /*
+        * Indicating that the object is undergoing paging.
+        *
+        * spl protection is required to avoid a race between the memq scan,
+        * an interrupt unbusy/free, and the busy check.
+        */
        vm_object_pip_add(object, 1);
+       s = splvm();
 again:
        size = end - start;
        if (all || size > object->resident_page_count / 4) {
@@ -1553,7 +1630,6 @@ again:
        } else {
                while (size > 0) {
                        if ((p = vm_page_lookup(object, start)) != 0) {
-
                                if (p->wire_count != 0) {
                                        vm_page_protect(p, VM_PROT_NONE);
                                        if (!clean_only)
@@ -1587,6 +1663,7 @@ again:
                        size -= 1;
                }
        }
+       splx(s);
        vm_object_pip_wakeup(object);
 }
 
@@ -1888,7 +1965,7 @@ DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
                osize = object->size;
                if (osize > 128)
                        osize = 128;
-               for(idx=0;idx<osize;idx++) {
+               for (idx = 0; idx < osize; idx++) {
                        m = vm_page_lookup(object, idx);
                        if (m == NULL) {
                                if (rcount) {
index a4505fa..27aaaae 100644 (file)
@@ -35,7 +35,7 @@
  *
  *     from: @(#)vm_page.c     7.4 (Berkeley) 5/7/91
  * $FreeBSD: src/sys/vm/vm_page.c,v 1.147.2.18 2002/03/10 05:03:19 alc Exp $
- * $DragonFly: src/sys/vm/vm_page.c,v 1.20 2004/05/10 11:05:13 hmp Exp $
+ * $DragonFly: src/sys/vm/vm_page.c,v 1.21 2004/05/13 17:40:19 dillon Exp $
  */
 
 /*
@@ -374,10 +374,9 @@ vm_page_unhold(vm_page_t mem)
  *     enter the page into the kernel's pmap.  We are not allowed to block
  *     here so we *can't* do this anyway.
  *
- *     The object and page must be locked, and must be splhigh.
  *     This routine may not block.
+ *     This routine must be called at splvm().
  */
-
 void
 vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
 {
@@ -389,14 +388,12 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
        /*
         * Record the object/offset pair in this page
         */
-
        m->object = object;
        m->pindex = pindex;
 
        /*
         * Insert it into the object_object/offset hash table
         */
-
        bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
        m->hnext = *bucket;
        *bucket = m;
@@ -431,11 +428,10 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
  *     table and the object page list, but do not invalidate/terminate
  *     the backing store.
  *
- *     The object and page must be locked, and at splhigh.
+ *     This routine must be called at splvm()
  *     The underlying pmap entry (if any) is NOT removed here.
  *     This routine may not block.
  */
-
 void
 vm_page_remove(vm_page_t m)
 {
@@ -463,7 +459,6 @@ vm_page_remove(vm_page_t m)
         * Note: we must NULL-out m->hnext to prevent loops in detached
         * buffers with vm_page_lookup().
         */
-
        {
                struct vm_page **bucket;
 
@@ -481,13 +476,11 @@ vm_page_remove(vm_page_t m)
        /*
         * Now remove from the object's list of backed pages.
         */
-
        TAILQ_REMOVE(&object->memq, m, listq);
 
        /*
         * And show that the object has one fewer resident page.
         */
-
        object->resident_page_count--;
        object->generation++;
 
@@ -497,17 +490,18 @@ vm_page_remove(vm_page_t m)
 /*
  *     vm_page_lookup:
  *
- *     Returns the page associated with the object/offset
- *     pair specified; if none is found, NULL is returned.
+ *     Locate and return the page at (object, pindex), or NULL if the
+ *     page could not be found.
  *
- *     NOTE: the code below does not lock.  It will operate properly if
- *     an interrupt makes a change, but the generation algorithm will not 
- *     operate properly in an SMP environment where both cpu's are able to run
- *     kernel code simultaneously.
+ *     This routine will operate properly without spl protection, but
+ *     the returned page could be in flux if it is busy.  Because an
+ *     interrupt can race a caller's busy check (unbusying and freeing the
+ *     page we return before the caller is able to check the busy bit),
+ *     the caller should generally call this routine at splvm().
  *
- *     The object must be locked.  No side effects.
- *     This routine may not block.
- *     This is a critical path routine
+ *     Callers may call this routine without spl protection if they know
+ *     'for sure' that the page will not be ripped out from under them
+ *     by an interrupt.
  */
 
 vm_page_t
@@ -1433,13 +1427,21 @@ vm_page_dontneed(vm_page_t m)
 }
 
 /*
- * Grab a page, waiting until we are waken up due to the page
- * changing state.  We keep on waiting, if the page continues
- * to be in the object.  If the page doesn't exist, allocate it.
+ * Grab a page, blocking if it is busy and allocating a page if necessary.
+ * A busy page is returned or NULL.
  *
  * If VM_ALLOC_RETRY is specified VM_ALLOC_NORMAL must also be specified.
+ * If VM_ALLOC_RETRY is not specified
  *
- * This routine may block.
+ * This routine may block, but if VM_ALLOC_RETRY is not set then NULL is
+ * always returned if we had blocked.  
+ * This routine will never return NULL if VM_ALLOC_RETRY is set.
+ * This routine may not be called from an interrupt.
+ * The returned page may not be entirely valid.
+ *
+ * This routine may be called from mainline code without spl protection and
+ * be guarenteed a busied page associated with the object at the specified
+ * index.
  */
 vm_page_t
 vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
@@ -1449,38 +1451,37 @@ vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
 
        KKASSERT(allocflags &
                (VM_ALLOC_NORMAL|VM_ALLOC_INTERRUPT|VM_ALLOC_SYSTEM));
+       s = splvm();
 retrylookup:
        if ((m = vm_page_lookup(object, pindex)) != NULL) {
                if (m->busy || (m->flags & PG_BUSY)) {
                        generation = object->generation;
 
-                       s = splvm();
                        while ((object->generation == generation) &&
                                        (m->busy || (m->flags & PG_BUSY))) {
                                vm_page_flag_set(m, PG_WANTED | PG_REFERENCED);
                                tsleep(m, 0, "pgrbwt", 0);
                                if ((allocflags & VM_ALLOC_RETRY) == 0) {
-                                       splx(s);
-                                       return NULL;
+                                       m = NULL;
+                                       goto done;
                                }
                        }
-                       splx(s);
                        goto retrylookup;
                } else {
                        vm_page_busy(m);
-                       return m;
+                       goto done;
                }
        }
-
        m = vm_page_alloc(object, pindex, allocflags & ~VM_ALLOC_RETRY);
        if (m == NULL) {
                VM_WAIT;
                if ((allocflags & VM_ALLOC_RETRY) == 0)
-                       return NULL;
+                       goto done;
                goto retrylookup;
        }
-
-       return m;
+done:
+       splx(s);
+       return(m);
 }
 
 /*
index f83a3f7..8bdd21e 100644 (file)
@@ -62,7 +62,7 @@
  * rights to redistribute these changes.
  *
  * $FreeBSD: src/sys/vm/vm_page.h,v 1.75.2.8 2002/03/06 01:07:09 dillon Exp $
- * $DragonFly: src/sys/vm/vm_page.h,v 1.10 2004/02/16 19:35:53 joerg Exp $
+ * $DragonFly: src/sys/vm/vm_page.h,v 1.11 2004/05/13 17:40:19 dillon Exp $
  */
 
 /*
@@ -444,10 +444,19 @@ vm_offset_t vm_contig_pg_kmap(int, u_long, vm_map_t, int);
 void vm_contig_pg_free(int, u_long);
 
 /*
- * Keep page from being freed by the page daemon
- * much of the same effect as wiring, except much lower
- * overhead and should be used only for *very* temporary
- * holding ("wiring").
+ * Holding a page keeps it from being reused.  Other parts of the system
+ * can still disassociate the page from its current object and free it, or
+ * perform read or write I/O on it and/or otherwise manipulate the page,
+ * but if the page is held the VM system will leave the page and its data
+ * intact and not reuse the page for other purposes until the last hold
+ * reference is released.  (see vm_page_wire() if you want to prevent the
+ * page from being disassociated from its object too).
+ *
+ * This routine must be called while at splvm() or better.
+ *
+ * The caller must still validate the contents of the page and, if necessary,
+ * wait for any pending I/O (e.g. vm_page_sleep_busy() loop) to complete
+ * before manipulating the page.
  */
 static __inline void
 vm_page_hold(vm_page_t mem)
@@ -456,13 +465,19 @@ vm_page_hold(vm_page_t mem)
 }
 
 /*
- *     vm_page_protect:
- *
- *     Reduce the protection of a page.  This routine never raises the 
- *     protection and therefore can be safely called if the page is already
- *     at VM_PROT_NONE (it will be a NOP effectively ).
+ * Reduce the protection of a page.  This routine never raises the 
+ * protection and therefore can be safely called if the page is already
+ * at VM_PROT_NONE (it will be a NOP effectively ).
+ *
+ * VM_PROT_NONE will remove all user mappings of a page.  This is often
+ * necessary when a page changes state (for example, turns into a copy-on-write
+ * page or needs to be frozen for write I/O) in order to force a fault, or
+ * to force a page's dirty bits to be synchronized and avoid hardware
+ * (modified/accessed) bit update races with pmap changes.
+ *
+ * Since 'prot' is usually a constant, this inline usually winds up optimizing
+ * out the primary conditional.
  */
-
 static __inline void
 vm_page_protect(vm_page_t mem, int prot)
 {
@@ -478,29 +493,22 @@ vm_page_protect(vm_page_t mem, int prot)
 }
 
 /*
- *     vm_page_zero_fill:
- *
- *     Zero-fill the specified page.
- *     Written as a standard pagein routine, to
- *     be used by the zero-fill object.
+ * Zero-fill the specified page.  The entire contents of the page will be
+ * zero'd out.
  */
 static __inline boolean_t
-vm_page_zero_fill(m)
-       vm_page_t m;
+vm_page_zero_fill(vm_page_t m)
 {
        pmap_zero_page(VM_PAGE_TO_PHYS(m));
        return (TRUE);
 }
 
 /*
- *     vm_page_copy:
- *
- *     Copy one page to another
+ * Copy the contents of src_m to dest_m.  The pages must be stable but spl
+ * and other protections depend on context.
  */
 static __inline void
-vm_page_copy(src_m, dest_m)
-       vm_page_t src_m;
-       vm_page_t dest_m;
+vm_page_copy(vm_page_t src_m, vm_page_t dest_m)
 {
        pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m));
        dest_m->valid = VM_PAGE_BITS_ALL;
index 6591f1c..6d4d431 100644 (file)
@@ -66,7 +66,7 @@
  * rights to redistribute these changes.
  *
  * $FreeBSD: src/sys/vm/vm_pageout.c,v 1.151.2.15 2002/12/29 18:21:04 dillon Exp $
- * $DragonFly: src/sys/vm/vm_pageout.c,v 1.10 2004/03/23 22:54:32 dillon Exp $
+ * $DragonFly: src/sys/vm/vm_pageout.c,v 1.11 2004/05/13 17:40:19 dillon Exp $
  */
 
 /*
@@ -209,7 +209,8 @@ static void vm_pageout_page_stats(void);
 /*
  * vm_pageout_clean:
  *
- * Clean the page and remove it from the laundry.
+ * Clean the page and remove it from the laundry.  The page must not be
+ * busy on-call.
  * 
  * We set the busy bit to cause potential page faults on this page to
  * block.  Note the careful timing, however, the busy bit isn't set till
@@ -443,7 +444,7 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags)
  */
 static void
 vm_pageout_object_deactivate_pages(vm_map_t map, vm_object_t object,
-    vm_pindex_t desired, int map_remove_only)
+       vm_pindex_t desired, int map_remove_only)
 {
        vm_page_t p, next;
        int rcount;
@@ -462,15 +463,22 @@ vm_pageout_object_deactivate_pages(vm_map_t map, vm_object_t object,
                remove_mode = map_remove_only;
                if (object->shadow_count > 1)
                        remove_mode = 1;
-       /*
-        * scan the objects entire memory queue
-        */
+
+               /*
+                * scan the objects entire memory queue.  spl protection is
+                * required to avoid an interrupt unbusy/free race against
+                * our busy check.
+                */
+               s = splvm();
                rcount = object->resident_page_count;
                p = TAILQ_FIRST(&object->memq);
+
                while (p && (rcount-- > 0)) {
                        int actcount;
-                       if (pmap_resident_count(vm_map_pmap(map)) <= desired)
+                       if (pmap_resident_count(vm_map_pmap(map)) <= desired) {
+                               splx(s);
                                return;
+                       }
                        next = TAILQ_NEXT(p, listq);
                        mycpu->gd_cnt.v_pdpages++;
                        if (p->wire_count != 0 ||
@@ -501,29 +509,25 @@ vm_pageout_object_deactivate_pages(vm_map_t map, vm_object_t object,
                                                vm_page_protect(p, VM_PROT_NONE);
                                                vm_page_deactivate(p);
                                        } else {
-                                               s = splvm();
                                                TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
                                                TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
-                                               splx(s);
                                        }
                                } else {
                                        vm_page_activate(p);
                                        vm_page_flag_clear(p, PG_REFERENCED);
                                        if (p->act_count < (ACT_MAX - ACT_ADVANCE))
                                                p->act_count += ACT_ADVANCE;
-                                       s = splvm();
                                        TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
                                        TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
-                                       splx(s);
                                }
                        } else if (p->queue == PQ_INACTIVE) {
                                vm_page_protect(p, VM_PROT_NONE);
                        }
                        p = next;
                }
+               splx(s);
                object = object->backing_object;
        }
-       return;
 }
 
 /*
@@ -675,19 +679,37 @@ vm_pageout_scan(int pass)
        if (pass)
                maxlaunder = 10000;
 
+       /*
+        * We will generally be at splvm() throughout the scan, but we
+        * can release it temporarily when we are sitting on a non-busy
+        * page without fear.  The spl protection is required because an
+        * an interrupt can come along and unbusy/free a busy page prior
+        * to our busy check, leaving us on the wrong queue or checking
+        * the wrong page.
+        */
+       s = splvm();
 rescan0:
        addl_page_shortage = addl_page_shortage_init;
        maxscan = vmstats.v_inactive_count;
        for (m = TAILQ_FIRST(&vm_page_queues[PQ_INACTIVE].pl);
             m != NULL && maxscan-- > 0 && page_shortage > 0;
-            m = next) {
-
+            m = next
+        ) {
                mycpu->gd_cnt.v_pdpages++;
 
-               if (m->queue != PQ_INACTIVE) {
-                       goto rescan0;
-               }
+               /*
+                * Give interrupts a chance
+                */
+               splx(s);
+               s = splvm();
 
+               /*
+                * It's easier for some of the conditions below to just loop
+                * and catch queue changes here rather then check everywhere
+                * else.
+                */
+               if (m->queue != PQ_INACTIVE)
+                       goto rescan0;
                next = TAILQ_NEXT(m, pageq);
 
                /*
@@ -700,13 +722,12 @@ rescan0:
                 * A held page may be undergoing I/O, so skip it.
                 */
                if (m->hold_count) {
-                       s = splvm();
                        TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
                        TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
-                       splx(s);
                        addl_page_shortage++;
                        continue;
                }
+
                /*
                 * Dont mess with busy pages, keep in the front of the
                 * queue, most likely are being paged out.
@@ -716,25 +737,26 @@ rescan0:
                        continue;
                }
 
-               /*
-                * If the object is not being used, we ignore previous 
-                * references.
-                */
                if (m->object->ref_count == 0) {
+                       /*
+                        * If the object is not being used, we ignore previous 
+                        * references.
+                        */
                        vm_page_flag_clear(m, PG_REFERENCED);
                        pmap_clear_reference(m);
 
-               /*
-                * Otherwise, if the page has been referenced while in the 
-                * inactive queue, we bump the "activation count" upwards, 
-                * making it less likely that the page will be added back to 
-                * the inactive queue prematurely again.  Here we check the 
-                * page tables (or emulated bits, if any), given the upper 
-                * level VM system not knowing anything about existing 
-                * references.
-                */
                } else if (((m->flags & PG_REFERENCED) == 0) &&
-                       (actcount = pmap_ts_referenced(m))) {
+                           (actcount = pmap_ts_referenced(m))) {
+                       /*
+                        * Otherwise, if the page has been referenced while 
+                        * in the inactive queue, we bump the "activation
+                        * count" upwards, making it less likely that the
+                        * page will be added back to the inactive queue
+                        * prematurely again.  Here we check the page tables
+                        * (or emulated bits, if any), given the upper level
+                        * VM system not knowing anything about existing 
+                        * references.
+                        */
                        vm_page_activate(m);
                        m->act_count += (actcount + ACT_ADVANCE);
                        continue;
@@ -806,11 +828,9 @@ rescan0:
                         * before being freed.  This significantly extends
                         * the thrash point for a heavily loaded machine.
                         */
-                       s = splvm();
                        vm_page_flag_set(m, PG_WINATCFLS);
                        TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
                        TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
-                       splx(s);
                } else if (maxlaunder > 0) {
                        /*
                         * We always want to try to flush some dirty pages if
@@ -838,10 +858,8 @@ rescan0:
                         * Those objects are in a "rundown" state.
                         */
                        if (!swap_pageouts_ok || (object->flags & OBJ_DEAD)) {
-                               s = splvm();
                                TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
                                TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
-                               splx(s);
                                continue;
                        }
 
@@ -907,10 +925,8 @@ rescan0:
                                 * be undergoing I/O, so skip it
                                 */
                                if (m->hold_count) {
-                                       s = splvm();
                                        TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
                                        TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
-                                       splx(s);
                                        if (object->flags & OBJ_MIGHTBEDIRTY)
                                                vnodes_skipped++;
                                        vput(vp);
@@ -932,17 +948,13 @@ rescan0:
                         * the (future) cleaned page.  Otherwise we could wind
                         * up laundering or cleaning too many pages.
                         */
-                       s = splvm();
                        TAILQ_INSERT_AFTER(&vm_page_queues[PQ_INACTIVE].pl, m, &marker, pageq);
-                       splx(s);
                        if (vm_pageout_clean(m) != 0) {
                                --page_shortage;
                                --maxlaunder;
                        } 
-                       s = splvm();
                        next = TAILQ_NEXT(&marker, pageq);
                        TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, &marker, pageq);
-                       splx(s);
                        if (vp != NULL)
                                vput(vp);
                }
@@ -960,32 +972,34 @@ rescan0:
         * Scan the active queue for things we can deactivate. We nominally
         * track the per-page activity counter and use it to locate 
         * deactivation candidates.
+        *
+        * NOTE: we are still at splvm().
         */
-
        pcount = vmstats.v_active_count;
        m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl);
 
        while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) {
+               /*
+                * Give interrupts a chance.
+                */
+               splx(s);
+               s = splvm();
 
                /*
-                * This is a consistency check, and should likely be a panic
-                * or warning.
+                * If the page was ripped out from under us, just stop.
                 */
-               if (m->queue != PQ_ACTIVE) {
+               if (m->queue != PQ_ACTIVE)
                        break;
-               }
-
                next = TAILQ_NEXT(m, pageq);
+
                /*
                 * Don't deactivate pages that are busy.
                 */
                if ((m->busy != 0) ||
                    (m->flags & PG_BUSY) ||
                    (m->hold_count != 0)) {
-                       s = splvm();
                        TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
                        TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
-                       splx(s);
                        m = next;
                        continue;
                }
@@ -1022,10 +1036,8 @@ rescan0:
                 * page activation count stats.
                 */
                if (actcount && (m->object->ref_count != 0)) {
-                       s = splvm();
                        TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
                        TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
-                       splx(s);
                } else {
                        m->act_count -= min(m->act_count, ACT_DECLINE);
                        if (vm_pageout_algorithm ||
@@ -1042,22 +1054,20 @@ rescan0:
                                        vm_page_deactivate(m);
                                }
                        } else {
-                               s = splvm();
                                TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
                                TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
-                               splx(s);
                        }
                }
                m = next;
        }
 
-       s = splvm();
-
        /*
         * We try to maintain some *really* free pages, this allows interrupt
         * code to be guaranteed space.  Since both cache and free queues 
         * are considered basically 'free', moving pages from cache to free
         * does not effect other calculations.
+        *
+        * NOTE: we are still at splvm().
         */
 
        while (vmstats.v_free_count < vmstats.v_free_reserved) {
@@ -1079,6 +1089,7 @@ rescan0:
                vm_pageout_page_free(m);
                mycpu->gd_cnt.v_dfree++;
        }
+
        splx(s);
 
 #if !defined(NO_SWAPPING)
index a730467..deccc9e 100644 (file)
@@ -62,7 +62,7 @@
  * rights to redistribute these changes.
  *
  * $FreeBSD: src/sys/vm/vm_pager.c,v 1.54.2.2 2001/11/18 07:11:00 dillon Exp $
- * $DragonFly: src/sys/vm/vm_pager.c,v 1.9 2004/03/23 22:54:32 dillon Exp $
+ * $DragonFly: src/sys/vm/vm_pager.c,v 1.10 2004/05/13 17:40:19 dillon Exp $
  */
 
 /*
@@ -288,23 +288,6 @@ vm_pager_sync(void)
 
 #endif
 
-vm_offset_t
-vm_pager_map_page(vm_page_t m)
-{
-       vm_offset_t kva;
-
-       kva = kmem_alloc_wait(pager_map, PAGE_SIZE);
-       pmap_kenter(kva, VM_PAGE_TO_PHYS(m));
-       return (kva);
-}
-
-void
-vm_pager_unmap_page(vm_offset_t kva)
-{
-       pmap_kremove(kva);
-       kmem_free_wakeup(pager_map, kva, PAGE_SIZE);
-}
-
 vm_object_t
 vm_pager_object_lookup(struct pagerlst *pg_list, void *handle)
 {
index fb0bd2a..2801a82 100644 (file)
@@ -37,7 +37,7 @@
  *
  *     @(#)vm_pager.h  8.4 (Berkeley) 1/12/94
  * $FreeBSD: src/sys/vm/vm_pager.h,v 1.24.2.2 2002/12/31 09:34:51 dillon Exp $
- * $DragonFly: src/sys/vm/vm_pager.h,v 1.3 2003/08/20 08:03:01 rob Exp $
+ * $DragonFly: src/sys/vm/vm_pager.h,v 1.4 2004/05/13 17:40:19 dillon Exp $
  */
 
 /*
@@ -102,11 +102,7 @@ static __inline int vm_pager_get_pages (vm_object_t, vm_page_t *, int, int);
 static __inline boolean_t vm_pager_has_page (vm_object_t, vm_pindex_t, int *, int *);
 void vm_pager_init (void);
 vm_object_t vm_pager_object_lookup (struct pagerlst *, void *);
-vm_offset_t vm_pager_map_pages (vm_page_t *, int, boolean_t);
-vm_offset_t vm_pager_map_page (vm_page_t);
 void vm_pager_sync (void);
-void vm_pager_unmap_pages (vm_offset_t, int);
-void vm_pager_unmap_page (vm_offset_t);
 void vm_pager_strategy (vm_object_t object, struct buf *bp);
 struct buf *getchainbuf(struct buf *bp, struct vnode *vp, int flags);
 void flushchainbuf(struct buf *nbp);
index 35a86f5..e64bbcb 100644 (file)
@@ -39,7 +39,7 @@
  *
  *     from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91
  * $FreeBSD: src/sys/vm/vnode_pager.c,v 1.116.2.7 2002/12/31 09:34:51 dillon Exp $
- * $DragonFly: src/sys/vm/vnode_pager.c,v 1.13 2004/05/08 04:11:45 dillon Exp $
+ * $DragonFly: src/sys/vm/vnode_pager.c,v 1.14 2004/05/13 17:40:19 dillon Exp $
  */
 
 /*
@@ -60,6 +60,7 @@
 #include <sys/buf.h>
 #include <sys/vmmeter.h>
 #include <sys/conf.h>
+#include <sys/sfbuf.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
@@ -299,14 +300,16 @@ vnode_pager_setsize(struct vnode *vp, vm_ooffset_t nsize)
                        if (m && m->valid) {
                                int base = (int)nsize & PAGE_MASK;
                                int size = PAGE_SIZE - base;
+                               struct sf_buf *sf;
 
                                /*
                                 * Clear out partial-page garbage in case
                                 * the page has been mapped.
                                 */
-                               kva = vm_pager_map_page(m);
+                               sf = sf_buf_alloc(m, SFBA_QUICK);
+                               kva = sf_buf_kva(sf);
                                bzero((caddr_t)kva + base, size);
-                               vm_pager_unmap_page(kva);
+                               sf_buf_free(sf);
 
                                /*
                                 * XXX work around SMP data integrity race
@@ -413,6 +416,7 @@ vnode_pager_input_smlfs(vm_object_t object, vm_page_t m)
        struct vnode *dp, *vp;
        struct buf *bp;
        vm_offset_t kva;
+       struct sf_buf *sf;
        int fileaddr;
        vm_offset_t bsize;
        int error = 0;
@@ -426,7 +430,8 @@ vnode_pager_input_smlfs(vm_object_t object, vm_page_t m)
 
        VOP_BMAP(vp, 0, &dp, 0, NULL, NULL);
 
-       kva = vm_pager_map_page(m);
+       sf = sf_buf_alloc(m, 0);
+       kva = sf_buf_kva(sf);
 
        for (i = 0; i < PAGE_SIZE / bsize; i++) {
                vm_ooffset_t address;
@@ -480,7 +485,7 @@ vnode_pager_input_smlfs(vm_object_t object, vm_page_t m)
                        bzero((caddr_t) kva + i * bsize, bsize);
                }
        }
-       vm_pager_unmap_page(kva);
+       sf_buf_free(sf);
        pmap_clear_modify(m);
        vm_page_flag_clear(m, PG_ZERO);
        if (error) {
@@ -502,6 +507,7 @@ vnode_pager_input_old(vm_object_t object, vm_page_t m)
        int error;
        int size;
        vm_offset_t kva;
+       struct sf_buf *sf;
 
        error = 0;
 
@@ -519,7 +525,8 @@ vnode_pager_input_old(vm_object_t object, vm_page_t m)
                 * Allocate a kernel virtual address and initialize so that
                 * we can use VOP_READ/WRITE routines.
                 */
-               kva = vm_pager_map_page(m);
+               sf = sf_buf_alloc(m, 0);
+               kva = sf_buf_kva(sf);
 
                aiov.iov_base = (caddr_t) kva;
                aiov.iov_len = size;
@@ -540,7 +547,7 @@ vnode_pager_input_old(vm_object_t object, vm_page_t m)
                        else if (count != PAGE_SIZE)
                                bzero((caddr_t) kva + count, PAGE_SIZE - count);
                }
-               vm_pager_unmap_page(kva);
+               sf_buf_free(sf);
        }
        pmap_clear_modify(m);
        vm_page_undirty(m);