Fix many bugs and issues in the VM system, particularly related to
authorMatthew Dillon <dillon@dragonflybsd.org>
Fri, 9 May 2008 07:24:48 +0000 (07:24 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Fri, 9 May 2008 07:24:48 +0000 (07:24 +0000)
heavy paging.

* (cleanup) PG_WRITEABLE is now set by the low level pmap code and not by
  high level code.  It means 'This page may contain a managed page table
  mapping which is writeable', meaning that hardware can dirty the page
  at any time.  The page must be tested via appropriate pmap calls before
  being disposed of.

* (cleanup) PG_MAPPED is now handled by the low level pmap code and only
  applies to managed mappings.  There is still a bit of cruft left over
  related to the pmap code's page table pages but the high level code is now
  clean.

* (bug) Various XIO, SFBUF, and MSFBUF routines which bypass normal paging
  operations were not properly dirtying pages when the caller intended
  to write to them.

* (bug) vfs_busy_pages in kern/vfs_bio.c had a busy race.  Separate the code
  out to ensure that we have marked all the pages as undergoing IO before we
  call vm_page_protect().  vm_page_protect(... VM_PROT_NONE) can block
  under very heavy paging conditions and if the pages haven't been marked
  for IO that could blow up the code.

* (optimization) Make a minor optimization.  When busying pages for write
  IO, downgrade the page table mappings to read-only instead of removing
  them entirely.

* (bug) In platform/pc32/i386/pmap.c fix various places where
  pmap_inval_add() was being called at the wrong point.  Only one was
  critical, in pmap_enter(), where pmap_inval_add() was being called so far
  away from the pmap entry being modified that it could wind up being flushed
  out prior to the modification, breaking the cpusync required.

  pmap.c also contains most of the work involved in the PG_MAPPED and
  PG_WRITEABLE changes.

* (bug) Close numerous pte updating races with hardware setting the
  modified bit.  There is still one race left (in pmap_enter()).

* (bug) Disable pmap_copy() entirely.   Fix most of the bugs anyway, but
  there is still one left in the handling of the srcmpte variable.

* (cleanup) Change vm_page_dirty() from an inline to a real procedure, and
  move the code which set the object to writeable/maybedirty into
  vm_page_dirty().

* (bug) Calls to vm_page_protect(... VM_PROT_NONE) can block.  Fix all cases
  where this call was made with a non-busied page.  All such calls are
  now made with a busied page, preventing blocking races from re-dirtying
  or remapping the page unexpectedly.

  (Such blockages could only occur during heavy paging activity where the
  underlying page table pages are being actively recycled).

* (bug) Fix the pageout code to properly mark pages as undergoing I/O before
  changing their protection bits.

* (bug) Busy pages undergoing zeroing or partial zeroing in the vnode pager
  (vm/vnode_pager.c) to avoid unexpected effects.

16 files changed:
sys/kern/kern_slaballoc.c
sys/kern/kern_umtx.c
sys/kern/kern_xio.c
sys/kern/sys_pipe.c
sys/kern/vfs_bio.c
sys/platform/pc32/i386/pmap.c
sys/platform/pc32/i386/trap.c
sys/platform/vkernel/platform/copyio.c
sys/platform/vkernel/platform/pmap.c
sys/vm/swap_pager.c
sys/vm/vm_fault.c
sys/vm/vm_object.c
sys/vm/vm_page.c
sys/vm/vm_page.h
sys/vm/vm_pageout.c
sys/vm/vnode_pager.c

index db4abfa..90c42e1 100644 (file)
@@ -33,7 +33,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/kern/kern_slaballoc.c,v 1.51 2007/11/18 09:53:19 sephe Exp $
+ * $DragonFly: src/sys/kern/kern_slaballoc.c,v 1.52 2008/05/09 07:24:45 dillon Exp $
  *
  * This module implements a slab allocator drop-in replacement for the
  * kernel malloc().
@@ -1148,6 +1148,7 @@ kmem_slab_alloc(vm_size_t size, vm_offset_t align, int flags)
            while (i != 0) {
                i -= PAGE_SIZE;
                m = vm_page_lookup(&kernel_object, OFF_TO_IDX(addr + i));
+               /* page should already be busy */
                vm_page_free(m);
            }
            vm_map_delete(&kernel_map, addr, addr + size, &count);
@@ -1164,6 +1165,8 @@ kmem_slab_alloc(vm_size_t size, vm_offset_t align, int flags)
      *
      * Mark the map entry as non-pageable using a routine that allows us to
      * populate the underlying pages.
+     *
+     * The pages were busied by the allocations above.
      */
     vm_map_set_wired_quick(&kernel_map, addr, size, &count);
     crit_exit();
@@ -1176,13 +1179,15 @@ kmem_slab_alloc(vm_size_t size, vm_offset_t align, int flags)
 
        m = vm_page_lookup(&kernel_object, OFF_TO_IDX(addr + i));
        m->valid = VM_PAGE_BITS_ALL;
+       /* page should already be busy */
        vm_page_wire(m);
        vm_page_wakeup(m);
        pmap_enter(&kernel_pmap, addr + i, m, VM_PROT_ALL, 1);
        if ((m->flags & PG_ZERO) == 0 && (flags & M_ZERO))
            bzero((char *)addr + i, PAGE_SIZE);
        vm_page_flag_clear(m, PG_ZERO);
-       vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE | PG_REFERENCED);
+       KKASSERT(m->flags & (PG_WRITEABLE | PG_MAPPED));
+       vm_page_flag_set(m, PG_REFERENCED);
     }
     vm_map_unlock(&kernel_map);
     vm_map_entry_release(count);
index 64b51b2..743f716 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/kern/kern_umtx.c,v 1.8 2008/04/14 20:00:28 dillon Exp $
+ * $DragonFly: src/sys/kern/kern_umtx.c,v 1.9 2008/05/09 07:24:45 dillon Exp $
  */
 
 /*
@@ -150,6 +150,7 @@ sys_umtx_sleep(struct umtx_sleep_args *uap)
     }
 
     sf_buf_free(sf);
+    /*vm_page_dirty(m); we don't actually dirty the page */
     vm_page_unhold(m);
     return(error);
 }
index 4d36a6b..2024627 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/kern/kern_xio.c,v 1.15 2007/08/13 17:20:04 dillon Exp $
+ * $DragonFly: src/sys/kern/kern_xio.c,v 1.16 2008/05/09 07:24:45 dillon Exp $
  */
 /*
  * Kernel XIO interface.  An initialized XIO is basically a collection of
@@ -142,9 +142,10 @@ xio_init_ubuf(xio_t xio, void *ubase, size_t ubytes, int flags)
 
        /*
         * If a failure occured clean out what we loaded and return EFAULT.
-        * Return 0 on success.
+        * Return 0 on success.  Do not dirty the pages.
         */
        if (i < XIO_INTERNAL_PAGES && n) {
+           xio->xio_flags &= ~XIOF_WRITE;
            xio_release(xio);
            xio->xio_error = EFAULT;
        }
@@ -203,7 +204,8 @@ xio_init_kbuf(xio_t xio, void *kbase, size_t kbytes)
 }
 
 /*
- * Initialize an XIO given an array of vm_page pointers.
+ * Initialize an XIO given an array of vm_page pointers.  The caller is
+ * responsible for any modified state changes for the pages.
  */
 int
 xio_init_pages(xio_t xio, struct vm_page **mbase, int npages, int xflags)
@@ -240,6 +242,8 @@ xio_release(xio_t xio)
     crit_enter();
     for (i = 0; i < xio->xio_npages; ++i) {
        m = xio->xio_pages[i];
+       if (xio->xio_flags & XIOF_WRITE)
+               vm_page_dirty(m);
        vm_page_unhold(m);
     }
     crit_exit();
index 7adb44b..168fa6d 100644 (file)
@@ -17,7 +17,7 @@
  *    are met.
  *
  * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.60.2.13 2002/08/05 15:05:15 des Exp $
- * $DragonFly: src/sys/kern/sys_pipe.c,v 1.46 2008/05/08 01:31:01 dillon Exp $
+ * $DragonFly: src/sys/kern/sys_pipe.c,v 1.47 2008/05/09 07:24:45 dillon Exp $
  */
 
 /*
@@ -464,7 +464,8 @@ pipe_read(struct file *fp, struct uio *uio, struct ucred *cred, int fflags)
                        if (size > (u_int) uio->uio_resid)
                                size = (u_int) uio->uio_resid;
 
-                       error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
+                       error = uiomove(&rpipe->pipe_buffer.buffer
+                                         [rpipe->pipe_buffer.out],
                                        size, uio);
                        if (error)
                                break;
@@ -1052,7 +1053,8 @@ pipe_write(struct file *fp, struct uio *uio, struct ucred *cred, int fflags)
                                
                                /* Transfer first segment */
 
-                               error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 
+                               error = uiomove(&wpipe->pipe_buffer.buffer
+                                                 [wpipe->pipe_buffer.in], 
                                                segsize, uio);
                                
                                if (error == 0 && segsize < size) {
@@ -1065,7 +1067,8 @@ pipe_write(struct file *fp, struct uio *uio, struct ucred *cred, int fflags)
                                            wpipe->pipe_buffer.size)
                                                panic("Expected pipe buffer wraparound disappeared");
                                                
-                                       error = uiomove(&wpipe->pipe_buffer.buffer[0],
+                                       error = uiomove(&wpipe->pipe_buffer.
+                                                         buffer[0],
                                                        size - segsize, uio);
                                }
                                if (error == 0) {
index 42fb7d3..1c3f0c7 100644 (file)
@@ -12,7 +12,7 @@
  *             John S. Dyson.
  *
  * $FreeBSD: src/sys/kern/vfs_bio.c,v 1.242.2.20 2003/05/28 18:38:10 alc Exp $
- * $DragonFly: src/sys/kern/vfs_bio.c,v 1.101 2008/05/06 00:13:53 dillon Exp $
+ * $DragonFly: src/sys/kern/vfs_bio.c,v 1.102 2008/05/09 07:24:45 dillon Exp $
  */
 
 /*
@@ -146,6 +146,8 @@ SYSCTL_INT(_vfs, OID_AUTO, hirunningspace, CTLFLAG_RW, &hirunningspace, 0,
 /*
  * Sysctls determining current state of the buffer cache.
  */
+SYSCTL_INT(_vfs, OID_AUTO, nbuf, CTLFLAG_RD, &nbuf, 0,
+       "Total number of buffers in buffer cache");
 SYSCTL_INT(_vfs, OID_AUTO, numdirtybuffers, CTLFLAG_RD, &numdirtybuffers, 0,
        "Pending number of dirty buffers (all)");
 SYSCTL_INT(_vfs, OID_AUTO, numdirtybuffershw, CTLFLAG_RD, &numdirtybuffershw, 0,
@@ -3318,14 +3320,21 @@ vfs_busy_pages(struct vnode *vp, struct buf *bp)
                        ("vfs_busy_pages: no buffer offset"));
                vfs_setdirty(bp);
 
+               /*
+                * Loop until none of the pages are busy.
+                */
 retry:
                for (i = 0; i < bp->b_xio.xio_npages; i++) {
                        vm_page_t m = bp->b_xio.xio_pages[i];
+
                        if (vm_page_sleep_busy(m, FALSE, "vbpage"))
                                goto retry;
                }
 
-               bogus = 0;
+               /*
+                * Setup for I/O, soft-busy the page right now because
+                * the next loop may block.
+                */
                for (i = 0; i < bp->b_xio.xio_npages; i++) {
                        vm_page_t m = bp->b_xio.xio_pages[i];
 
@@ -3334,6 +3343,16 @@ retry:
                                vm_object_pip_add(obj, 1);
                                vm_page_io_start(m);
                        }
+               }
+
+               /*
+                * Adjust protections for I/O and do bogus-page mapping.
+                * Assume that vm_page_protect() can block (it can block
+                * if VM_PROT_NONE, don't take any chances regardless).
+                */
+               bogus = 0;
+               for (i = 0; i < bp->b_xio.xio_npages; i++) {
+                       vm_page_t m = bp->b_xio.xio_pages[i];
 
                        /*
                         * When readying a vnode-backed buffer for a write
@@ -3350,12 +3369,14 @@ retry:
                         * Bogus page replacement is, uh, bogus.  We need
                         * to find a better way.
                         */
-                       vm_page_protect(m, VM_PROT_NONE);
                        if (bp->b_cmd == BUF_CMD_WRITE) {
+                               vm_page_protect(m, VM_PROT_READ);
                                vfs_page_set_valid(bp, foff, i, m);
                        } else if (m->valid == VM_PAGE_BITS_ALL) {
                                bp->b_xio.xio_pages[i] = bogus_page;
                                bogus++;
+                       } else {
+                               vm_page_protect(m, VM_PROT_NONE);
                        }
                        foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
                }
index 341d850..64758e1 100644 (file)
@@ -40,7 +40,7 @@
  *
  *     from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
  * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $
- * $DragonFly: src/sys/platform/pc32/i386/pmap.c,v 1.83 2008/04/28 07:05:06 dillon Exp $
+ * $DragonFly: src/sys/platform/pc32/i386/pmap.c,v 1.84 2008/05/09 07:24:46 dillon Exp $
  */
 
 /*
@@ -199,8 +199,7 @@ static void i386_protection_init (void);
 static __inline void   pmap_clearbit (vm_page_t m, int bit);
 
 static void    pmap_remove_all (vm_page_t m);
-static vm_page_t pmap_enter_quick (pmap_t pmap, vm_offset_t va,
-                                     vm_page_t m, vm_page_t mpte);
+static void    pmap_enter_quick (pmap_t pmap, vm_offset_t va, vm_page_t m);
 static int pmap_remove_pte (struct pmap *pmap, unsigned *ptq, 
                                vm_offset_t sva, pmap_inval_info_t info);
 static void pmap_remove_page (struct pmap *pmap, 
@@ -670,9 +669,9 @@ pmap_kenter(vm_offset_t va, vm_paddr_t pa)
        pmap_inval_info info;
 
        pmap_inval_init(&info);
-       pmap_inval_add(&info, &kernel_pmap, va);
        npte = pa | PG_RW | PG_V | pgeflag;
        pte = (unsigned *)vtopte(va);
+       pmap_inval_add(&info, &kernel_pmap, va);
        *pte = npte;
        pmap_inval_flush(&info);
 }
@@ -721,8 +720,8 @@ pmap_kremove(vm_offset_t va)
        pmap_inval_info info;
 
        pmap_inval_init(&info);
-       pmap_inval_add(&info, &kernel_pmap, va);
        pte = (unsigned *)vtopte(va);
+       pmap_inval_add(&info, &kernel_pmap, va);
        *pte = 0;
        pmap_inval_flush(&info);
 }
@@ -872,10 +871,10 @@ pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
 {
        vm_page_t m;
 
-retry:
-       m = vm_page_lookup(object, pindex);
-       if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
-               goto retry;
+       do {
+               m = vm_page_lookup(object, pindex);
+       } while (m && vm_page_sleep_busy(m, FALSE, "pplookp"));
+
        return(m);
 }
 
@@ -921,9 +920,15 @@ pmap_dispose_proc(struct proc *p)
 static int 
 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info) 
 {
-       pmap_inval_flush(info);
-       while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
-               ;
+       /* 
+        * Wait until we can busy the page ourselves.  We cannot have
+        * any active flushes if we block.
+        */
+       if (m->flags & PG_BUSY) {
+               pmap_inval_flush(info);
+               while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
+                       ;
+       }
        KASSERT(m->queue == PQ_NONE,
                ("_pmap_unwire_pte_hold: %p->queue != PQ_NONE", m));
 
@@ -952,11 +957,15 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info)
                --m->wire_count;
                KKASSERT(m->wire_count == 0);
                --vmstats.v_wire_count;
+               vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
                vm_page_flash(m);
                vm_page_free_zero(m);
                return 1;
+       } else {
+               KKASSERT(m->hold_count > 1);
+               vm_page_unhold(m);
+               return 0;
        }
-       return 0;
 }
 
 static PMAP_INLINE int
@@ -1554,7 +1563,7 @@ pmap_remove_entry(struct pmap *pmap, vm_page_t m,
        if (pv) {
                TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
                m->md.pv_list_count--;
-               if (TAILQ_FIRST(&m->md.pv_list) == NULL)
+               if (TAILQ_EMPTY(&m->md.pv_list))
                        vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
                TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
                ++pmap->pm_generation;
@@ -1783,8 +1792,8 @@ pmap_remove_all(vm_page_t m)
 
                pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
                pmap_inval_add(&info, pv->pv_pmap, pv->pv_va);
-
                tpte = loadandclear(pte);
+
                if (tpte & PG_W)
                        pv->pv_pmap->pm_stats.wired_count--;
 
@@ -1809,12 +1818,13 @@ pmap_remove_all(vm_page_t m)
                TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
                ++pv->pv_pmap->pm_generation;
                m->md.pv_list_count--;
+               if (TAILQ_EMPTY(&m->md.pv_list))
+                       vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
                pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem, &info);
                free_pv_entry(pv);
        }
-
-       vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
        crit_exit();
+       KKASSERT((m->flags & (PG_MAPPED|PG_WRITEABLE)) == 0);
        pmap_inval_flush(&info);
 }
 
@@ -1883,7 +1893,12 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
                        unsigned pbits;
                        vm_page_t m;
 
-                       /* XXX this isn't optimal */
+                       /*
+                        * XXX non-optimal.  Note also that there can be
+                        * no pmap_inval_flush() calls until after we modify
+                        * ptbase[sindex] (or otherwise we have to do another
+                        * pmap_inval_add() call).
+                        */
                        pmap_inval_add(&info, pmap, i386_ptob(sindex));
                        pbits = ptbase[sindex];
 
@@ -1960,14 +1975,14 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 #endif
        }
 
-       mpte = NULL;
        /*
         * In the case that a page table page is not
         * resident, we are creating it here.
         */
-       if (va < UPT_MIN_ADDRESS) {
+       if (va < UPT_MIN_ADDRESS)
                mpte = pmap_allocpte(pmap, va);
-       }
+       else
+               mpte = NULL;
 
        pmap_inval_init(&info);
        pte = pmap_pte(pmap, va);
@@ -1981,7 +1996,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
        }
 
        pa = VM_PAGE_TO_PHYS(m) & PG_FRAME;
-       pmap_inval_add(&info, pmap, va); /* XXX non-optimal */
        origpte = *(vm_offset_t *)pte;
        opa = origpte & PG_FRAME;
 
@@ -2031,6 +2045,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
                                vm_page_dirty(om);
                        }
                        pa |= PG_MANAGED;
+                       KKASSERT(m->flags & PG_MAPPED);
                }
                goto validate;
        } 
@@ -2054,6 +2069,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
            (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
                pmap_insert_entry(pmap, va, mpte, m);
                pa |= PG_MANAGED;
+               vm_page_flag_set(m, PG_MAPPED);
        }
 
        /*
@@ -2081,27 +2097,30 @@ validate:
         * to update the pte.
         */
        if ((origpte & ~(PG_M|PG_A)) != newpte) {
+               pmap_inval_add(&info, pmap, va);
                *pte = newpte | PG_A;
+               if (newpte & PG_RW)
+                       vm_page_flag_set(m, PG_WRITEABLE);
        }
+       KKASSERT((newpte & VPTE_MANAGED) == 0 || m->flags & PG_MAPPED);
        pmap_inval_flush(&info);
 }
 
 /*
- * this code makes some *MAJOR* assumptions:
- * 1. Current pmap & pmap exists.
- * 2. Not wired.
- * 3. Read access.
- * 4. No page table pages.
- * 5. Tlbflush is deferred to calling procedure.
- * 6. Page IS managed.
- * but is *MUCH* faster than pmap_enter...
+ * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired.
+ * This code also assumes that the pmap has no pre-existing entry for this
+ * VA.
+ *
+ * This code currently may only be used on user pmaps, not kernel_pmap.
  */
-
-static vm_page_t
-pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
+static void
+pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
        unsigned *pte;
        vm_paddr_t pa;
+       vm_page_t mpte;
+       unsigned ptepindex;
+       vm_offset_t ptepa;
        pmap_inval_info info;
 
        pmap_inval_init(&info);
@@ -2119,22 +2138,21 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
 #endif
        }
 
+       KKASSERT(va < UPT_MIN_ADDRESS); /* assert used on user pmaps only */
+
        /*
-        * In the case that a page table page is not
-        * resident, we are creating it here.
+        * Calculate the page table page (mpte), allocating it if necessary.
+        *
+        * A held page table page (mpte), or NULL, is passed onto the
+        * section following.
         */
        if (va < UPT_MIN_ADDRESS) {
-               unsigned ptepindex;
-               vm_offset_t ptepa;
-
                /*
                 * Calculate pagetable page index
                 */
                ptepindex = va >> PDRSHIFT;
-               if (mpte && (mpte->pindex == ptepindex)) {
-                       mpte->hold_count++;
-               } else {
-retry:
+
+               do {
                        /*
                         * Get the page directory entry
                         */
@@ -2148,43 +2166,44 @@ retry:
                                if (ptepa & PG_PS)
                                        panic("pmap_enter_quick: unexpected mapping into 4MB page");
                                if (pmap->pm_ptphint &&
-                                       (pmap->pm_ptphint->pindex == ptepindex)) {
+                                   (pmap->pm_ptphint->pindex == ptepindex)) {
                                        mpte = pmap->pm_ptphint;
                                } else {
                                        mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
                                        pmap->pm_ptphint = mpte;
                                }
-                               if (mpte == NULL)
-                                       goto retry;
-                               mpte->hold_count++;
+                               if (mpte)
+                                       mpte->hold_count++;
                        } else {
                                mpte = _pmap_allocpte(pmap, ptepindex);
                        }
-               }
+               } while (mpte == NULL);
        } else {
                mpte = NULL;
+               /* this code path is not yet used */
        }
 
        /*
-        * This call to vtopte makes the assumption that we are
-        * entering the page into the current pmap.  In order to support
-        * quick entry into any pmap, one would likely use pmap_pte_quick.
-        * But that isn't as quick as vtopte.
+        * With a valid (and held) page directory page, we can just use
+        * vtopte() to get to the pte.  If the pte is already present
+        * we do not disturb it.
         */
        pte = (unsigned *)vtopte(va);
-       if (*pte) {
+       if (*pte & PG_V) {
                if (mpte)
                        pmap_unwire_pte_hold(pmap, mpte, &info);
-               return 0;
+               pa = VM_PAGE_TO_PHYS(m);
+               KKASSERT(((*pte ^ pa) & PG_FRAME) == 0);
+               return;
        }
 
        /*
-        * Enter on the PV list if part of our managed memory. Note that we
-        * raise IPL while manipulating pv_table since pmap_enter can be
-        * called at interrupt time.
+        * Enter on the PV list if part of our managed memory
         */
-       if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
+       if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
                pmap_insert_entry(pmap, va, mpte, m);
+               vm_page_flag_set(m, PG_MAPPED);
+       }
 
        /*
         * Increment counters
@@ -2200,8 +2219,8 @@ retry:
                *pte = pa | PG_V | PG_U;
        else
                *pte = pa | PG_V | PG_U | PG_MANAGED;
-
-       return mpte;
+/*     pmap_inval_add(&info, pmap, va); shouldn't be needed inval->valid */
+       pmap_inval_flush(&info);
 }
 
 /*
@@ -2304,10 +2323,8 @@ pmap_object_init_pt_callback(vm_page_t p, void *data)
                        vm_page_deactivate(p);
                vm_page_busy(p);
                rel_index = p->pindex - info->start_pindex;
-               info->mpte = pmap_enter_quick(info->pmap,
-                                             info->addr + i386_ptob(rel_index),
-                                             p, info->mpte);
-               vm_page_flag_set(p, PG_MAPPED);
+               pmap_enter_quick(info->pmap,
+                                info->addr + i386_ptob(rel_index), p);
                vm_page_wakeup(p);
        }
        return(0);
@@ -2336,7 +2353,7 @@ pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
        vm_offset_t starta;
        vm_offset_t addr;
        vm_pindex_t pindex;
-       vm_page_t m, mpte;
+       vm_page_t m;
        vm_object_t object;
        struct lwp *lp;
 
@@ -2363,7 +2380,6 @@ pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
         * page/object association, interrupts can free pages and remove 
         * them from their objects.
         */
-       mpte = NULL;
        crit_enter();
        for (i = 0; i < PAGEORDER_SIZE; i++) {
                vm_object_t lobject;
@@ -2411,8 +2427,7 @@ pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
                                vm_page_deactivate(m);
                        }
                        vm_page_busy(m);
-                       mpte = pmap_enter_quick(pmap, addr, m, mpte);
-                       vm_page_flag_set(m, PG_MAPPED);
+                       pmap_enter_quick(pmap, addr, m);
                        vm_page_wakeup(m);
                }
        }
@@ -2483,6 +2498,14 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
 
        if (dst_addr != src_addr)
                return;
+       /*
+        * XXX BUGGY.  Amoung other things srcmpte is assumed to remain
+        * valid through blocking calls, and that's just not going to
+        * be the case.
+        *
+        * FIXME!
+        */
+       return;
 
        src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
        if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) {
@@ -2539,9 +2562,10 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
                }
 
                srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
-               if ((srcmpte == NULL) ||
-                       (srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
+               if ((srcmpte == NULL) || (srcmpte->hold_count == 0) ||
+                   (srcmpte->flags & PG_BUSY)) {
                        continue;
+               }
 
                if (pdnxt > end_addr)
                        pdnxt = end_addr;
@@ -2572,7 +2596,9 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
                                        kprintf("WARNING: pmap_copy: detected and corrected race\n");
                                        pmap_unwire_pte_hold(dst_pmap, dstmpte, &info);
                                        goto failed;
-                               } else if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
+                               } else if ((*dst_pte == 0) &&
+                                          (ptetemp = *src_pte) != 0 &&
+                                          (ptetemp & PG_MANAGED)) {
                                        /*
                                         * Clear the modified and
                                         * accessed (referenced) bits
@@ -2583,8 +2609,11 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
                                        ++dst_pmap->pm_stats.resident_count;
                                        pmap_insert_entry(dst_pmap, addr,
                                                dstmpte, m);
+                                       KKASSERT(m->flags & PG_MAPPED);
                                } else {
+                                       kprintf("WARNING: pmap_copy: dst_pte race detected and corrected\n");
                                        pmap_unwire_pte_hold(dst_pmap, dstmpte, &info);
+                                       goto failed;
                                }
                                if (dstmpte->hold_count >= srcmpte->hold_count)
                                        break;
@@ -2822,17 +2851,16 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
                        pte = pmap_pte_quick(pmap, pv->pv_va);
                if (pmap->pm_active)
                        pmap_inval_add(&info, pmap, pv->pv_va);
-               tpte = *pte;
 
                /*
                 * We cannot remove wired pages from a process' mapping
                 * at this time
                 */
-               if (tpte & PG_W) {
+               if (*pte & PG_W) {
                        npv = TAILQ_NEXT(pv, pv_plist);
                        continue;
                }
-               *pte = 0;
+               tpte = loadandclear(pte);
 
                m = PHYS_TO_VM_PAGE(tpte);
 
@@ -2855,9 +2883,8 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 
                m->md.pv_list_count--;
                TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
-               if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
+               if (TAILQ_EMPTY(&m->md.pv_list))
                        vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
-               }
 
                pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem, &info);
                free_pv_entry(pv);
@@ -2968,16 +2995,26 @@ pmap_clearbit(vm_page_t m, int bit)
                 * because the virtual kernel will invalidate the pmap
                 * entry when/if it needs to resynchronize the Modify bit.
                 */
-               pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
                if (bit & PG_RW)
                        pmap_inval_add(&info, pv->pv_pmap, pv->pv_va);
-
+               pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+again:
                pbits = *pte;
                if (pbits & bit) {
                        if (bit == PG_RW) {
-                               if (pbits & PG_M)
+                               if (pbits & PG_M) {
                                        vm_page_dirty(m);
-                               atomic_clear_int(pte, PG_M|PG_RW);
+                                       atomic_clear_int(pte, PG_M|PG_RW);
+                               } else {
+                                       /*
+                                        * The cpu may be trying to set PG_M
+                                        * simultaniously with our clearing
+                                        * of PG_RW.
+                                        */
+                                       if (!atomic_cmpset_int(pte, pbits,
+                                                              pbits & ~PG_RW))
+                                               goto again;
+                               }
                        } else if (bit == PG_M) {
                                /*
                                 * We could also clear PG_RW here to force
@@ -3008,6 +3045,7 @@ pmap_page_protect(vm_page_t m, vm_prot_t prot)
        if ((prot & VM_PROT_WRITE) == 0) {
                if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
                        pmap_clearbit(m, PG_RW);
+                       vm_page_flag_clear(m, PG_WRITEABLE);
                } else {
                        pmap_remove_all(m);
                }
index a66c3c4..f7dbbf0 100644 (file)
@@ -36,7 +36,7 @@
  *
  *     from: @(#)trap.c        7.4 (Berkeley) 5/13/91
  * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $
- * $DragonFly: src/sys/platform/pc32/i386/trap.c,v 1.111 2008/04/24 08:53:01 dillon Exp $
+ * $DragonFly: src/sys/platform/pc32/i386/trap.c,v 1.112 2008/05/09 07:24:46 dillon Exp $
  */
 
 /*
@@ -989,7 +989,8 @@ trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva)
                PRELE(lp->lwp_proc);
        } else {
                /*
-                * Don't have to worry about process locking or stacks in the kernel.
+                * Don't have to worry about process locking or stacks
+                * in the kernel.
                 */
                rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
        }
index 3893d40..904e240 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/platform/vkernel/platform/copyio.c,v 1.8 2007/07/02 16:52:25 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/copyio.c,v 1.9 2008/05/09 07:24:47 dillon Exp $
  */
 
 #include <sys/types.h>
@@ -186,6 +186,7 @@ copyout(const void *kaddr, void *udaddr, size_t len)
                len -= n;
                udaddr = (char *)udaddr + n;
                kaddr = (const char *)kaddr + n;
+               vm_page_dirty(m);
                vm_page_unhold(m);
                sf_buf_free(sf);
        }
index 6ef3401..80dd9d5 100644 (file)
@@ -38,7 +38,7 @@
  * 
  * from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
  * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $
- * $DragonFly: src/sys/platform/vkernel/platform/pmap.c,v 1.28 2008/04/28 07:05:08 dillon Exp $
+ * $DragonFly: src/sys/platform/vkernel/platform/pmap.c,v 1.29 2008/05/09 07:24:47 dillon Exp $
  */
 /*
  * NOTE: PMAP_INVAL_ADD: In pc32 this function is called prior to adjusting
@@ -1000,10 +1000,12 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
                --m->wire_count;
                KKASSERT(m->wire_count == 0);
                --vmstats.v_wire_count;
+               vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
                vm_page_flash(m);
                vm_page_free_zero(m);
                return 1;
        }
+       KKASSERT(m->hold_count > 1);
        vm_page_unhold(m);
        return 0;
 }
@@ -1346,9 +1348,9 @@ pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va)
        if (pv) {
                TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
                m->md.pv_list_count--;
-               if (TAILQ_FIRST(&m->md.pv_list) == NULL)
-                       vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
                TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
+               if (TAILQ_EMPTY(&m->md.pv_list))
+                       vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
                ++pmap->pm_generation;
                rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
                free_pv_entry(pv);
@@ -1607,11 +1609,12 @@ pmap_remove_all(vm_page_t m)
                TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
                ++pv->pv_pmap->pm_generation;
                m->md.pv_list_count--;
+               if (TAILQ_EMPTY(&m->md.pv_list))
+                       vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
                pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
                free_pv_entry(pv);
        }
-
-       vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
+       KKASSERT((m->flags & (PG_MAPPED | PG_WRITEABLE)) == 0);
        crit_exit();
 }
 
@@ -1813,6 +1816,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
                                vm_page_dirty(om);
                        }
                        pa |= VPTE_MANAGED;
+                       KKASSERT(m->flags & PG_MAPPED);
                }
                goto validate;
        } 
@@ -1836,6 +1840,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
            (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
                pmap_insert_entry(pmap, va, mpte, m);
                pa |= VPTE_MANAGED;
+               vm_page_flag_set(m, PG_MAPPED);
        }
 
        /*
@@ -1853,7 +1858,8 @@ validate:
 
        if (wired)
                newpte |= VPTE_WIRED;
-       newpte |= VPTE_U;
+       if (pmap != &kernel_pmap)
+               newpte |= VPTE_U;
 
        /*
         * If the mapping or permission bits are different from the
@@ -1866,26 +1872,23 @@ validate:
         */
        if ((origpte & ~(VPTE_W|VPTE_M|VPTE_A)) != newpte) {
                *pte = newpte | VPTE_A;
+               if (newpte & VPTE_W)
+                       vm_page_flag_set(m, PG_WRITEABLE);
        }
+       KKASSERT((newpte & VPTE_MANAGED) == 0 || m->flags & PG_MAPPED);
 }
 
 /*
- * This is a quick version of pmap_enter().  It is used only under the 
- * following conditions:
+ * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired.
  *
- * (1) The pmap is not the kernel_pmap
- * (2) The page is not to be wired into the map
- * (3) The page is to mapped read-only in the pmap (initially that is)
- * (4) The calling procedure is responsible for flushing the TLB
- * (5) The page is always managed
- * (6) There is no prior mapping at the VA
+ * Currently this routine may only be used on user pmaps, not kernel_pmap.
  */
-
-static vm_page_t
-pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
+static void
+pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
        vpte_t *pte;
        vm_paddr_t pa;
+       vm_page_t mpte;
        unsigned ptepindex;
        vm_offset_t ptepa;
 
@@ -1894,17 +1897,14 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
        KKASSERT(va >= VM_MIN_USER_ADDRESS && va < VM_MAX_USER_ADDRESS);
 
        /*
-        * Instantiate the page table page if required
-        */
-
-       /*
-        * Calculate pagetable page index
+        * Calculate pagetable page (mpte), allocating it if necessary.
+        *
+        * A held page table page (mpte), or NULL, is passed onto the 
+        * section following.
         */
        ptepindex = va >> PDRSHIFT;
-       if (mpte && (mpte->pindex == ptepindex)) {
-               mpte->hold_count++;
-       } else {
-retry:
+
+       do {
                /*
                 * Get the page directory entry
                 */
@@ -1924,13 +1924,12 @@ retry:
                                mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
                                pmap->pm_ptphint = mpte;
                        }
-                       if (mpte == NULL)
-                               goto retry;
-                       mpte->hold_count++;
+                       if (mpte)
+                               mpte->hold_count++;
                } else {
                        mpte = _pmap_allocpte(pmap, ptepindex);
                }
-       }
+       } while (mpte == NULL);
 
        /*
         * Ok, now that the page table page has been validated, get the pte.
@@ -1939,9 +1938,8 @@ retry:
         */
        pte = pmap_pte(pmap, va);
        if (*pte) {
-               if (mpte)
-                       pmap_unwire_pte_hold(pmap, mpte);
-               return 0;
+               pmap_unwire_pte_hold(pmap, mpte);
+               return;
        }
 
        /*
@@ -1949,8 +1947,10 @@ retry:
         * raise IPL while manipulating pv_table since pmap_enter can be
         * called at interrupt time.
         */
-       if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
+       if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
                pmap_insert_entry(pmap, va, mpte, m);
+               vm_page_flag_set(m, PG_MAPPED);
+       }
 
        /*
         * Increment counters
@@ -1966,8 +1966,8 @@ retry:
                *pte = (vpte_t)pa | VPTE_V | VPTE_U;
        else
                *pte = (vpte_t)pa | VPTE_V | VPTE_U | VPTE_MANAGED;
-
-       return mpte;
+       /*pmap_inval_add(&info, pmap, va); shouldn't be needed 0->valid */
+       /*pmap_inval_flush(&info); don't need for vkernel */
 }
 
 /*
@@ -2082,10 +2082,8 @@ pmap_object_init_pt_callback(vm_page_t p, void *data)
                        vm_page_deactivate(p);
                vm_page_busy(p);
                rel_index = p->pindex - info->start_pindex;
-               info->mpte = pmap_enter_quick(info->pmap,
-                                             info->addr + i386_ptob(rel_index),
-                                             p, info->mpte);
-               vm_page_flag_set(p, PG_MAPPED);
+               pmap_enter_quick(info->pmap,
+                                info->addr + i386_ptob(rel_index), p);
                vm_page_wakeup(p);
        }
        return(0);
@@ -2113,7 +2111,7 @@ pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
        vm_offset_t starta;
        vm_offset_t addr;
        vm_pindex_t pindex;
-       vm_page_t m, mpte;
+       vm_page_t m;
        vm_object_t object;
        struct lwp *lp;
        int i;
@@ -2141,7 +2139,6 @@ pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
         * page/object association, interrupts can free pages and remove 
         * them from their objects.
         */
-       mpte = NULL;
        crit_enter();
        for (i = 0; i < PAGEORDER_SIZE; i++) {
                vm_object_t lobject;
@@ -2204,8 +2201,7 @@ pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
                                vm_page_deactivate(m);
                        }
                        vm_page_busy(m);
-                       mpte = pmap_enter_quick(pmap, addr, m, mpte);
-                       vm_page_flag_set(m, PG_MAPPED);
+                       pmap_enter_quick(pmap, addr, m);
                        vm_page_wakeup(m);
                }
        }
@@ -2266,6 +2262,15 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
        vpte_t *dst_frame;
        vm_page_t m;
 
+        /*
+         * XXX BUGGY.  Amoung other things srcmpte is assumed to remain
+         * valid through blocking calls, and that's just not going to
+         * be the case.
+         *
+         * FIXME!
+         */
+       return;
+
        if (dst_addr != src_addr)
                return;
        if (dst_pmap->pm_pdir == NULL)
@@ -2317,9 +2322,10 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
                }
 
                srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
-               if ((srcmpte == NULL) ||
-                       (srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
+               if ((srcmpte == NULL) || (srcmpte->hold_count == 0) ||
+                   (srcmpte->flags & PG_BUSY)) {
                        continue;
+               }
 
                if (pdnxt > end_addr)
                        pdnxt = end_addr;
@@ -2328,6 +2334,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
                dst_pte = dst_frame + ((addr - src_addr) >> PAGE_SHIFT);
                while (addr < pdnxt) {
                        vpte_t ptetemp;
+
                        ptetemp = *src_pte;
                        /*
                         * we only virtual copy managed pages
@@ -2345,7 +2352,8 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
                                src_frame = get_ptbase1(src_pmap, src_addr);
                                dst_frame = get_ptbase2(dst_pmap, src_addr);
 
-                               if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
+                               if ((*dst_pte == 0) && (ptetemp = *src_pte) &&
+                                   (ptetemp & VPTE_MANAGED) != 0) {
                                        /*
                                         * Clear the modified and accessed
                                         * (referenced) bits during the copy.
@@ -2360,6 +2368,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
                                        ++dst_pmap->pm_stats.resident_count;
                                        pmap_insert_entry(dst_pmap, addr,
                                                dstmpte, m);
+                                       KKASSERT(m->flags & PG_MAPPED);
                                } else {
                                        pmap_unwire_pte_hold(dst_pmap, dstmpte);
                                }
@@ -2612,9 +2621,8 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 
                m->md.pv_list_count--;
                TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
-               if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
+               if (TAILQ_FIRST(&m->md.pv_list) == NULL)
                        vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
-               }
 
                pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem);
                free_pv_entry(pv);
@@ -2777,6 +2785,7 @@ pmap_page_protect(vm_page_t m, vm_prot_t prot)
        if ((prot & VM_PROT_WRITE) == 0) {
                if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
                        pmap_clearbit(m, VPTE_W);
+                       vm_page_flag_clear(m, PG_WRITEABLE);
                } else {
                        pmap_remove_all(m);
                }
index f4b405d..8248a75 100644 (file)
@@ -96,7 +96,7 @@
  *     @(#)swap_pager.c        8.9 (Berkeley) 3/21/94
  *
  * $FreeBSD: src/sys/vm/swap_pager.c,v 1.130.2.12 2002/08/31 21:15:55 dillon Exp $
- * $DragonFly: src/sys/vm/swap_pager.c,v 1.30 2008/04/28 21:16:27 dillon Exp $
+ * $DragonFly: src/sys/vm/swap_pager.c,v 1.31 2008/05/09 07:24:48 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -1177,6 +1177,7 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage)
                    mreq->object
                );
        }
+
        /*
         * Calculate range to retrieve.  The pages have already been assigned
         * their swapblks.  We require a *contiguous* range that falls entirely
@@ -1187,7 +1188,6 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage)
         * The swp_*() calls must be made at splvm().  vm_page_free() does
         * not need to be, but it will go a little faster if it is.
         */
-
        crit_enter();
        blk = swp_pager_meta_ctl(mreq->object, mreq->pindex, 0);
 
@@ -1355,10 +1355,9 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage)
  *     those whos rtvals[] entry is not set to VM_PAGER_PEND on return.
  *     We need to unbusy the rest on I/O completion.
  */
-
 void
-swap_pager_putpages(vm_object_t object, vm_page_t *m, int count, boolean_t sync,
-    int *rtvals)
+swap_pager_putpages(vm_object_t object, vm_page_t *m, int count,
+                   boolean_t sync, int *rtvals)
 {
        int i;
        int n = 0;
@@ -1369,6 +1368,7 @@ swap_pager_putpages(vm_object_t object, vm_page_t *m, int count, boolean_t sync,
                    m[0]->object
                );
        }
+
        /*
         * Step 1
         *
index b4fbf64..eb3c6a6 100644 (file)
@@ -67,7 +67,7 @@
  * rights to redistribute these changes.
  *
  * $FreeBSD: src/sys/vm/vm_fault.c,v 1.108.2.8 2002/02/26 05:49:27 silby Exp $
- * $DragonFly: src/sys/vm/vm_fault.c,v 1.45 2008/04/14 20:00:29 dillon Exp $
+ * $DragonFly: src/sys/vm/vm_fault.c,v 1.46 2008/05/09 07:24:48 dillon Exp $
  */
 
 /*
@@ -131,8 +131,8 @@ static int vm_fault_ratelimit(struct vmspace *);
 static __inline void
 release_page(struct faultstate *fs)
 {
-       vm_page_wakeup(fs->m);
        vm_page_deactivate(fs->m);
+       vm_page_wakeup(fs->m);
        fs->m = NULL;
 }
 
@@ -378,7 +378,7 @@ RetryFault:
        }
 
        vm_page_flag_clear(fs.m, PG_ZERO);
-       vm_page_flag_set(fs.m, PG_MAPPED|PG_REFERENCED);
+       vm_page_flag_set(fs.m, PG_REFERENCED);
 
        /*
         * If the page is not wired down, then put it where the pageout daemon
@@ -434,14 +434,18 @@ vm_fault_page_quick(vm_offset_t va, vm_prot_t fault_type, int *errorp)
  *
  * The returned page will be properly dirtied if VM_PROT_WRITE was specified,
  * and marked PG_REFERENCED as well.
+ *
+ * If the page cannot be faulted writable and VM_PROT_WRITE was specified, an
+ * error will be returned.
  */
 vm_page_t
 vm_fault_page(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
              int fault_flags, int *errorp)
 {
-       int result;
        vm_pindex_t first_pindex;
        struct faultstate fs;
+       int result;
+       vm_prot_t orig_fault_type = fault_type;
 
        mycpu->gd_cnt.v_vm_faults++;
 
@@ -555,6 +559,13 @@ RetryFault:
                return(NULL);
        }
 
+       if ((orig_fault_type & VM_PROT_WRITE) &&
+           (fs.prot & VM_PROT_WRITE) == 0) {
+               *errorp = KERN_PROTECTION_FAILURE;
+               unlock_and_deallocate(&fs);
+               return(NULL);
+       }
+
        /*
         * On success vm_fault_object() does not unlock or deallocate, and fs.m
         * will contain a busied page.
@@ -579,7 +590,7 @@ RetryFault:
         * now just do it unconditionally. XXX
         */
        pmap_enter(fs.map->pmap, vaddr, fs.m, fs.prot, fs.wired);
-       vm_page_flag_set(fs.m, PG_REFERENCED|PG_MAPPED);
+       vm_page_flag_set(fs.m, PG_REFERENCED);
 
        /*
         * Unbusy the page by activating it.  It remains held and will not
@@ -606,7 +617,9 @@ RetryFault:
 }
 
 /*
- * Fault in the specified
+ * Fault in the specified (object,offset), dirty the returned page as
+ * needed.  If the requested fault_type cannot be done NULL and an
+ * error is returned.
  */
 vm_page_t
 vm_fault_object_page(vm_object_t object, vm_ooffset_t offset,
@@ -686,6 +699,12 @@ RetryFault:
                return(NULL);
        }
 
+       if ((fault_type & VM_PROT_WRITE) && (fs.prot & VM_PROT_WRITE) == 0) {
+               *errorp = KERN_PROTECTION_FAILURE;
+               unlock_and_deallocate(&fs);
+               return(NULL);
+       }
+
        /*
         * On success vm_fault_object() does not unlock or deallocate, and fs.m
         * will contain a busied page.
@@ -1087,8 +1106,10 @@ readrest:
                                        if (mt->dirty == 0)
                                                vm_page_test_dirty(mt);
                                        if (mt->dirty) {
+                                               vm_page_busy(mt);
                                                vm_page_protect(mt, VM_PROT_NONE);
                                                vm_page_deactivate(mt);
+                                               vm_page_wakeup(mt);
                                        } else {
                                                vm_page_cache(mt);
                                        }
@@ -1397,30 +1418,22 @@ readrest:
        }
 
        /*
-        * Put this page into the physical map. We had to do the unlock above
-        * because pmap_enter may cause other faults.   We don't put the page
-        * back on the active queue until later so that the page-out daemon
-        * won't find us (yet).
+        * If the fault is a write, we know that this page is being
+        * written NOW so dirty it explicitly to save on pmap_is_modified()
+        * calls later.
+        *
+        * If this is a NOSYNC mmap we do not want to set PG_NOSYNC
+        * if the page is already dirty to prevent data written with
+        * the expectation of being synced from not being synced.
+        * Likewise if this entry does not request NOSYNC then make
+        * sure the page isn't marked NOSYNC.  Applications sharing
+        * data should use the same flags to avoid ping ponging.
+        *
+        * Also tell the backing pager, if any, that it should remove
+        * any swap backing since the page is now dirty.
         */
        if (fs->prot & VM_PROT_WRITE) {
-               vm_page_flag_set(fs->m, PG_WRITEABLE);
                vm_object_set_writeable_dirty(fs->m->object);
-
-               /*
-                * If the fault is a write, we know that this page is being
-                * written NOW so dirty it explicitly to save on 
-                * pmap_is_modified() calls later.
-                *
-                * If this is a NOSYNC mmap we do not want to set PG_NOSYNC
-                * if the page is already dirty to prevent data written with
-                * the expectation of being synced from not being synced.
-                * Likewise if this entry does not request NOSYNC then make
-                * sure the page isn't marked NOSYNC.  Applications sharing
-                * data should use the same flags to avoid ping ponging.
-                *
-                * Also tell the backing pager, if any, that it should remove
-                * any swap backing since the page is now dirty.
-                */
                if (fs->entry->eflags & MAP_ENTRY_NOSYNC) {
                        if (fs->m->dirty == 0)
                                vm_page_flag_set(fs->m, PG_NOSYNC);
@@ -1659,7 +1672,6 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map,
 
                vm_page_flag_clear(dst_m, PG_ZERO);
                pmap_enter(dst_map->pmap, vaddr, dst_m, prot, FALSE);
-               vm_page_flag_set(dst_m, PG_WRITEABLE|PG_MAPPED);
 
                /*
                 * Mark it no longer busy, and put it on the active list.
@@ -1709,8 +1721,8 @@ vm_fault_additional_pages(vm_page_t m, int rbehind, int rahead,
        /*
         * if the requested page is not available, then give up now
         */
-
        if (!vm_pager_has_page(object, pindex, &cbehind, &cahead)) {
+               *reqpage = 0;   /* not used by caller, fix compiler warn */
                return 0;
        }
 
index fff70d1..e71b452 100644 (file)
@@ -62,7 +62,7 @@
  * rights to redistribute these changes.
  *
  * $FreeBSD: src/sys/vm/vm_object.c,v 1.171.2.8 2003/05/26 19:17:56 alc Exp $
- * $DragonFly: src/sys/vm/vm_object.c,v 1.32 2008/04/14 19:43:02 dillon Exp $
+ * $DragonFly: src/sys/vm/vm_object.c,v 1.33 2008/05/09 07:24:48 dillon Exp $
  */
 
 /*
@@ -598,7 +598,7 @@ vm_object_page_clean_pass1(struct vm_page *p, void *data)
        if ((info->limit & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC))
                info->error = 1;
        else
-               vm_page_protect(p, VM_PROT_READ);
+               vm_page_protect(p, VM_PROT_READ);       /* must not block */
        return(0);
 }
 
index 7afcad6..176bcb8 100644 (file)
@@ -35,7 +35,7 @@
  *
  *     from: @(#)vm_page.c     7.4 (Berkeley) 5/7/91
  * $FreeBSD: src/sys/vm/vm_page.c,v 1.147.2.18 2002/03/10 05:03:19 alc Exp $
- * $DragonFly: src/sys/vm/vm_page.c,v 1.37 2008/04/14 20:00:29 dillon Exp $
+ * $DragonFly: src/sys/vm/vm_page.c,v 1.38 2008/05/09 07:24:48 dillon Exp $
  */
 
 /*
@@ -374,7 +374,7 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
         * Since we are inserting a new and possibly dirty page,
         * update the object's OBJ_WRITEABLE and OBJ_MIGHTBEDIRTY flags.
         */
-       if (m->flags & PG_WRITEABLE)
+       if ((m->valid & m->dirty) || (m->flags & PG_WRITEABLE))
                vm_object_set_writeable_dirty(object);
 }
 
@@ -929,6 +929,8 @@ vm_page_free_toq(vm_page_t m)
        crit_enter();
        mycpu->gd_cnt.v_tfree++;
 
+       KKASSERT((m->flags & PG_MAPPED) == 0);
+
        if (m->busy || ((m->queue - m->pc) == PQ_FREE)) {
                kprintf(
                "vm_page_free: pindex(%lu), busy(%d), PG_BUSY(%d), hold(%d)\n",
@@ -1059,9 +1061,8 @@ vm_page_wire(vm_page_t m)
                }
                m->wire_count++;
                KASSERT(m->wire_count != 0,
-                   ("vm_page_wire: wire_count overflow m=%p", m));
+                       ("vm_page_wire: wire_count overflow m=%p", m));
        }
-       vm_page_flag_set(m, PG_MAPPED);
        crit_exit();
 }
 
@@ -1235,8 +1236,10 @@ vm_page_cache(vm_page_t m)
        /*
         * Already in the cache (and thus not mapped)
         */
-       if ((m->queue - m->pc) == PQ_CACHE)
+       if ((m->queue - m->pc) == PQ_CACHE) {
+               KKASSERT((m->flags & PG_MAPPED) == 0);
                return;
+       }
 
        /*
         * Caller is required to test m->dirty, but note that the act of
@@ -1250,11 +1253,17 @@ vm_page_cache(vm_page_t m)
 
        /*
         * Remove all pmaps and indicate that the page is not
-        * writeable or mapped.  Deal with the case where the page
-        * may have become dirty via a race.
+        * writeable or mapped.  Our vm_page_protect() call may
+        * have blocked (especially w/ VM_PROT_NONE), so recheck
+        * everything.
         */
+       vm_page_busy(m);
        vm_page_protect(m, VM_PROT_NONE);
-       if (m->dirty) {
+       vm_page_wakeup(m);
+       if ((m->flags & (PG_BUSY|PG_UNMANAGED|PG_MAPPED)) || m->busy ||
+                       m->wire_count || m->hold_count) {
+               /* do nothing */
+       } else if (m->dirty) {
                vm_page_deactivate(m);
        } else {
                vm_page_unqueue_nowakeup(m);
@@ -1506,6 +1515,27 @@ vm_page_clear_dirty(vm_page_t m, int base, int size)
 }
 
 /*
+ * Make the page all-dirty.
+ *
+ * Also make sure the related object and vnode reflect the fact that the
+ * object may now contain a dirty page.
+ */
+void
+vm_page_dirty(vm_page_t m)
+{
+#ifdef INVARIANTS
+        int pqtype = m->queue - m->pc;
+#endif
+        KASSERT(pqtype != PQ_CACHE && pqtype != PQ_FREE,
+                ("vm_page_dirty: page in free/cache queue!"));
+       if (m->dirty != VM_PAGE_BITS_ALL) {
+               m->dirty = VM_PAGE_BITS_ALL;
+               if (m->object)
+                       vm_object_set_writeable_dirty(m->object);
+       }
+}
+
+/*
  * Invalidates DEV_BSIZE'd chunks within a page.  Both the
  * valid and dirty bits for the effected areas are cleared.
  *
index 07e68ff..b0405c6 100644 (file)
@@ -62,7 +62,7 @@
  * rights to redistribute these changes.
  *
  * $FreeBSD: src/sys/vm/vm_page.h,v 1.75.2.8 2002/03/06 01:07:09 dillon Exp $
- * $DragonFly: src/sys/vm/vm_page.h,v 1.27 2008/04/14 20:00:29 dillon Exp $
+ * $DragonFly: src/sys/vm/vm_page.h,v 1.28 2008/05/09 07:24:48 dillon Exp $
  */
 
 /*
@@ -297,19 +297,30 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT];
 /*
  * These are the flags defined for vm_page.
  *
- * Note: PG_UNMANAGED (used by OBJT_PHYS) indicates that the page is
- *      not under PV management but otherwise should be treated as a
- *      normal page.  Pages not under PV management cannot be paged out
- *      via the object/vm_page_t because there is no knowledge of their
- *      pte mappings, nor can they be removed from their objects via 
- *      the object, and such pages are also not on any PQ queue.
+ *  PG_UNMANAGED (used by OBJT_PHYS) indicates that the page is
+ *  not under PV management but otherwise should be treated as a
+ *  normal page.  Pages not under PV management cannot be paged out
+ *  via the object/vm_page_t because there is no knowledge of their
+ *  pte mappings, nor can they be removed from their objects via 
+ *  the object, and such pages are also not on any PQ queue.  The
+ *  PG_MAPPED and PG_WRITEABLE flags are not applicable.
+ *
+ *  PG_MAPPED only applies to managed pages, indicating whether the page
+ *  is mapped onto one or more pmaps.  A page might still be mapped to
+ *  special pmaps in an unmanaged fashion, for example when mapped into a
+ *  buffer cache buffer, without setting PG_MAPPED.
+ *
+ *  PG_WRITEABLE indicates that there may be a writeable managed pmap entry
+ *  somewhere, and that the page can be dirtied by hardware at any time
+ *  and may have to be tested for that.  The modified bit in unmanaged
+ *  mappings or in the special clean map is not tested.
  */
 #define        PG_BUSY         0x0001          /* page is in transit (O) */
 #define        PG_WANTED       0x0002          /* someone is waiting for page (O) */
 #define PG_WINATCFLS   0x0004          /* flush dirty page on inactive q */
 #define        PG_FICTITIOUS   0x0008          /* physical page doesn't exist (O) */
-#define        PG_WRITEABLE    0x0010          /* page is mapped writeable */
-#define PG_MAPPED      0x0020          /* page is mapped */
+#define        PG_WRITEABLE    0x0010          /* page is writeable */
+#define PG_MAPPED      0x0020          /* page is mapped (managed) */
 #define        PG_ZERO         0x0040          /* page is zeroed */
 #define PG_REFERENCED  0x0080          /* page has been referenced */
 #define PG_CLEANCHK    0x0100          /* page will be checked for cleaning */
@@ -494,6 +505,7 @@ void vm_page_free_toq(vm_page_t m);
 vm_offset_t vm_contig_pg_kmap(int, u_long, vm_map_t, int);
 void vm_contig_pg_free(int, u_long);
 void vm_page_event_internal(vm_page_t, vm_page_event_t);
+void vm_page_dirty(vm_page_t m);
 
 /*
  * Holding a page keeps it from being reused.  Other parts of the system
@@ -529,6 +541,10 @@ vm_page_hold(vm_page_t mem)
  *
  * Since 'prot' is usually a constant, this inline usually winds up optimizing
  * out the primary conditional.
+ *
+ * WARNING: VM_PROT_NONE can block, but will loop until all mappings have
+ * been cleared.  Callers should be aware that other page related elements
+ * might have changed, however.
  */
 static __inline void
 vm_page_protect(vm_page_t mem, int prot)
@@ -536,11 +552,11 @@ vm_page_protect(vm_page_t mem, int prot)
        if (prot == VM_PROT_NONE) {
                if (mem->flags & (PG_WRITEABLE|PG_MAPPED)) {
                        pmap_page_protect(mem, VM_PROT_NONE);
-                       vm_page_flag_clear(mem, PG_WRITEABLE|PG_MAPPED);
+                       /* PG_WRITEABLE & PG_MAPPED cleared by call */
                }
        } else if ((prot == VM_PROT_READ) && (mem->flags & PG_WRITEABLE)) {
                pmap_page_protect(mem, VM_PROT_READ);
-               vm_page_flag_clear(mem, PG_WRITEABLE);
+               /* PG_WRITEABLE cleared by call */
        }
 }
 
@@ -564,6 +580,7 @@ vm_page_copy(vm_page_t src_m, vm_page_t dest_m)
 {
        pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m));
        dest_m->valid = VM_PAGE_BITS_ALL;
+       dest_m->dirty = VM_PAGE_BITS_ALL;
 }
 
 /*
@@ -625,22 +642,6 @@ vm_page_sleep_busy(vm_page_t m, int also_m_busy, const char *msg)
 }
 
 /*
- * Make page all dirty
- */
-static __inline void
-_vm_page_dirty(vm_page_t m, const char *info)
-{
-#ifdef INVARIANTS
-       int pqtype = m->queue - m->pc;
-#endif
-       KASSERT(pqtype != PQ_CACHE && pqtype != PQ_FREE,
-               ("vm_page_dirty: page in free/cache queue!"));
-       m->dirty = VM_PAGE_BITS_ALL;
-}
-
-#define vm_page_dirty(m)       _vm_page_dirty(m, __FUNCTION__)
-
-/*
  * Set page to not be dirty.  Note: does not clear pmap modify bits .
  */
 static __inline void
index a4a64c1..17ac523 100644 (file)
@@ -66,7 +66,7 @@
  * rights to redistribute these changes.
  *
  * $FreeBSD: src/sys/vm/vm_pageout.c,v 1.151.2.15 2002/12/29 18:21:04 dillon Exp $
- * $DragonFly: src/sys/vm/vm_pageout.c,v 1.34 2008/04/28 21:16:27 dillon Exp $
+ * $DragonFly: src/sys/vm/vm_pageout.c,v 1.35 2008/05/09 07:24:48 dillon Exp $
  */
 
 /*
@@ -392,19 +392,21 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags)
        int i;
 
        /*
-        * Initiate I/O.  Bump the vm_page_t->busy counter and
-        * mark the pages read-only.
-        *
+        * Initiate I/O.  Bump the vm_page_t->busy counter.
+        */
+       for (i = 0; i < count; i++) {
+               KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL, ("vm_pageout_flush page %p index %d/%d: partially invalid page", mc[i], i, count));
+               vm_page_io_start(mc[i]);
+       }
+
+       /*
         * We must make the pages read-only.  This will also force the
         * modified bit in the related pmaps to be cleared.  The pager
         * cannot clear the bit for us since the I/O completion code
         * typically runs from an interrupt.  The act of making the page
         * read-only handles the case for us.
         */
-
        for (i = 0; i < count; i++) {
-               KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL, ("vm_pageout_flush page %p index %d/%d: partially invalid page", mc[i], i, count));
-               vm_page_io_start(mc[i]);
                vm_page_protect(mc[i], VM_PROT_READ);
        }
 
@@ -555,7 +557,9 @@ vm_pageout_object_deactivate_pages_callback(vm_page_t p, void *data)
                if ((p->flags & PG_REFERENCED) == 0) {
                        p->act_count -= min(p->act_count, ACT_DECLINE);
                        if (!info->limit && (vm_pageout_algorithm || (p->act_count == 0))) {
+                               vm_page_busy(p);
                                vm_page_protect(p, VM_PROT_NONE);
+                               vm_page_wakeup(p);
                                vm_page_deactivate(p);
                        } else {
                                TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
@@ -570,7 +574,9 @@ vm_pageout_object_deactivate_pages_callback(vm_page_t p, void *data)
                        TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
                }
        } else if (p->queue == PQ_INACTIVE) {
+               vm_page_busy(p);
                vm_page_protect(p, VM_PROT_NONE);
+               vm_page_wakeup(p);
        }
        return(0);
 }
@@ -846,10 +852,8 @@ rescan0:
                 * address space of a process running on another cpu.  A
                 * user process (without holding the MP lock) running on
                 * another cpu may be able to touch the page while we are
-                * trying to remove it.  To prevent this from occuring we
-                * must call pmap_remove_all() or otherwise make the page
-                * read-only.  If the race occured pmap_remove_all() is
-                * responsible for setting m->dirty.
+                * trying to remove it.  vm_page_cache() will handle this
+                * case for us.
                 */
                if (m->dirty == 0) {
                        vm_page_test_dirty(m);
@@ -1105,7 +1109,9 @@ rescan0:
                            m->act_count < pass) {
                                page_shortage--;
                                if (m->object->ref_count == 0) {
+                                       vm_page_busy(m);
                                        vm_page_protect(m, VM_PROT_NONE);
+                                       vm_page_wakeup(m);
                                        if (m->dirty == 0)
                                                vm_page_cache(m);
                                        else
@@ -1145,6 +1151,8 @@ rescan0:
                        vm_page_deactivate(m);
                        continue;
                }
+               KKASSERT((m->flags & PG_MAPPED) == 0);
+               KKASSERT(m->dirty == 0);
                cache_rover = (cache_rover + PQ_PRIME2) & PQ_L2_MASK;
                vm_pageout_page_free(m);
                mycpu->gd_cnt.v_dfree++;
@@ -1325,7 +1333,9 @@ vm_pageout_page_stats(void)
                                 * operations would be higher than the value
                                 * of doing the operation.
                                 */
+                               vm_page_busy(m);
                                vm_page_protect(m, VM_PROT_NONE);
+                               vm_page_wakeup(m);
                                vm_page_deactivate(m);
                        } else {
                                m->act_count -= min(m->act_count, ACT_DECLINE);
index 9a3e380..ce49107 100644 (file)
@@ -39,7 +39,7 @@
  *
  *     from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91
  * $FreeBSD: src/sys/vm/vnode_pager.c,v 1.116.2.7 2002/12/31 09:34:51 dillon Exp $
- * $DragonFly: src/sys/vm/vnode_pager.c,v 1.41 2008/04/28 21:16:27 dillon Exp $
+ * $DragonFly: src/sys/vm/vnode_pager.c,v 1.42 2008/05/09 07:24:48 dillon Exp $
  */
 
 /*
@@ -292,7 +292,10 @@ vnode_pager_setsize(struct vnode *vp, vm_ooffset_t nsize)
                        vm_offset_t kva;
                        vm_page_t m;
 
-                       m = vm_page_lookup(object, OFF_TO_IDX(nsize));
+                       do {
+                               m = vm_page_lookup(object, OFF_TO_IDX(nsize));
+                       } while (m && vm_page_sleep_busy(m, TRUE, "vsetsz"));
+
                        if (m && m->valid) {
                                int base = (int)nsize & PAGE_MASK;
                                int size = PAGE_SIZE - base;
@@ -302,6 +305,7 @@ vnode_pager_setsize(struct vnode *vp, vm_ooffset_t nsize)
                                 * Clear out partial-page garbage in case
                                 * the page has been mapped.
                                 */
+                               vm_page_busy(m);
                                sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
                                kva = sf_buf_kva(sf);
                                bzero((caddr_t)kva + base, size);
@@ -337,6 +341,7 @@ vnode_pager_setsize(struct vnode *vp, vm_ooffset_t nsize)
                                vm_page_set_validclean(m, base, size);
                                if (m->dirty != 0)
                                        m->dirty = VM_PAGE_BITS_ALL;
+                               vm_page_wakeup(m);
                        }
                }
        } else {