kernel - Add vm_page_need_commit() and vm_page_clear_commit()
authorMatthew Dillon <dillon@apollo.backplane.com>
Wed, 1 Feb 2012 04:41:57 +0000 (20:41 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Thu, 9 Feb 2012 03:27:27 +0000 (19:27 -0800)
* Add vm_page_need_commit() and vm_page_clear_commit().  This controls the
  PG_NEED_COMMIT flag on individual VM pages.

  When set a clean VM page cannot be reused and is still subject to
  flushing by the pageout daemon.

* Refactor buwrite() to use the new mechanism instead of dirtying the
  pages underlying the buffer.

* This allows the normal page and buffer flushing mechanism to operate
  even in situations where the VM page is also the nominal backing store
  for the data (as in tmpfs).

* Will be used by tmpfs.

sys/kern/uipc_syscalls.c
sys/kern/vfs_bio.c
sys/vm/vm_contig.c
sys/vm/vm_fault.c
sys/vm/vm_object.c
sys/vm/vm_page.c
sys/vm/vm_page.h
sys/vm/vm_pageout.c
sys/vm/vnode_pager.c

index 0aa5bc1..2b65b48 100644 (file)
@@ -1413,7 +1413,9 @@ sf_buf_mfree(void *arg)
                /* sf invalid now */
                vm_page_busy_wait(m, FALSE, "sockpgf");
                vm_page_unwire(m, 0);
-               if (m->wire_count == 0 && m->object == NULL) {
+               if (m->object == NULL &&
+                   m->wire_count == 0 &&
+                   (m->flags & PG_NEED_COMMIT) == 0) {
                        vm_page_free(m);
                } else {
                        vm_page_wakeup(m);
index a335ebb..a52fec3 100644 (file)
@@ -97,7 +97,9 @@ struct buf *buf;              /* buffer header pool */
 
 static void vfs_clean_pages(struct buf *bp);
 static void vfs_clean_one_page(struct buf *bp, int pageno, vm_page_t m);
+#if 0
 static void vfs_dirty_one_page(struct buf *bp, int pageno, vm_page_t m);
+#endif
 static void vfs_vmio_release(struct buf *bp);
 static int flushbufqueues(bufq_type_t q);
 static vm_page_t bio_page_alloc(vm_object_t obj, vm_pindex_t pg, int deficit);
@@ -1148,11 +1150,11 @@ buwrite(struct buf *bp)
        }
 
        /*
-        * Set valid & dirty.
+        * Mark as needing a commit.
         */
        for (i = 0; i < bp->b_xio.xio_npages; i++) {
                m = bp->b_xio.xio_pages[i];
-               vfs_dirty_one_page(bp, i, m);
+               vm_page_need_commit(m);
        }
        bqrelse(bp);
 }
@@ -1800,7 +1802,9 @@ vfs_vmio_release(struct buf *bp)
 
                /*
                 * If the wire_count has dropped to 0 we may need to take
-                * further action before unbusying the page
+                * further action before unbusying the page.
+                *
+                * WARNING: vm_page_try_*() also checks PG_NEED_COMMIT for us.
                 */
                if (m->wire_count == 0) {
                        vm_page_flag_clear(m, PG_ZERO);
@@ -3915,8 +3919,10 @@ bpdone(struct buf *bp, int elseit)
                                        kprintf(" VDEV, loffset: %lld, flags: 0x%08x, npages: %d\n",
                                            (long long)bp->b_loffset,
                                            bp->b_flags, bp->b_xio.xio_npages);
-                               kprintf(" valid: 0x%x, dirty: 0x%x, wired: %d\n",
-                                   m->valid, m->dirty, m->wire_count);
+                               kprintf(" valid: 0x%x, dirty: 0x%x, "
+                                       "wired: %d\n",
+                                       m->valid, m->dirty,
+                                       m->wire_count);
                                panic("biodone: page busy < 0");
                        }
                        vm_page_io_finish(m);
@@ -4377,6 +4383,7 @@ vfs_clean_one_page(struct buf *bp, int pageno, vm_page_t m)
        vm_page_set_validclean(m, soff & PAGE_MASK, eoff - soff);
 }
 
+#if 0
 /*
  * Similar to vfs_clean_one_page() but sets the bits to valid and dirty.
  * The page data is assumed to be valid (there is no zeroing here).
@@ -4409,6 +4416,7 @@ vfs_dirty_one_page(struct buf *bp, int pageno, vm_page_t m)
                return;
        vm_page_set_validdirty(m, soff & PAGE_MASK, eoff - soff);
 }
+#endif
 
 /*
  * vfs_bio_clrbuf:
index f249db0..822f656 100644 (file)
@@ -202,7 +202,7 @@ vm_contig_pg_clean(int queue, int count)
                        continue;
                }
                vm_page_test_dirty(m);
-               if (m->dirty) {
+               if (m->dirty || (m->flags & PG_NEED_COMMIT)) {
                        vm_object_hold(object);
                        KKASSERT(m->object == object);
 
@@ -293,9 +293,9 @@ again:
                            ((phys & (alignment - 1)) == 0) &&
                            (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0) &&
                            m->busy == 0 && m->wire_count == 0 &&
-                           m->hold_count == 0 && (m->flags & PG_BUSY) == 0
-
-                       {
+                           m->hold_count == 0 &&
+                           (m->flags & (PG_BUSY | PG_NEED_COMMIT)) == 0)
+                       {
                                break;
                        }
                }
@@ -359,8 +359,9 @@ again:
                            (VM_PAGE_TO_PHYS(&m[-1]) + PAGE_SIZE)) ||
                            ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE)) ||
                            m->busy || m->wire_count ||
-                           m->hold_count || (m->flags & PG_BUSY)
-                       ) {
+                           m->hold_count ||
+                           (m->flags & (PG_BUSY | PG_NEED_COMMIT)))
+                       {
                                start++;
                                goto again;
                        }
@@ -384,7 +385,7 @@ again:
                        if (pqtype == PQ_CACHE &&
                            m->hold_count == 0 &&
                            m->wire_count == 0 &&
-                           (m->flags & PG_UNMANAGED) == 0) {
+                           (m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) == 0) {
                                vm_page_protect(m, VM_PROT_NONE);
                                KKASSERT((m->flags & PG_MAPPED) == 0);
                                KKASSERT(m->dirty == 0);
index 1c01058..05dd8f7 100644 (file)
@@ -1335,7 +1335,8 @@ readrest:
                                                break;
                                        }
                                        if ((mt->flags &
-                                            (PG_FICTITIOUS | PG_UNMANAGED)) ||
+                                            (PG_FICTITIOUS | PG_UNMANAGED |
+                                             PG_NEED_COMMIT)) ||
                                            mt->hold_count ||
                                            mt->wire_count)  {
                                                vm_page_wakeup(mt);
index d99d859..c4f09c5 100644 (file)
@@ -998,6 +998,9 @@ vm_object_terminate_callback(vm_page_t p, void *data __unused)
                        "busied page %p on queue %d\n", p, p->queue);
                vm_page_wakeup(p);
        } else if (p->wire_count == 0) {
+               /*
+                * NOTE: PG_NEED_COMMIT is ignored.
+                */
                vm_page_free(p);
                mycpu->gd_cnt.v_pfree++;
        } else {
@@ -1524,10 +1527,8 @@ shadowlookup:
                 * mess with.  Things can break if we mess with pages in
                 * any of the below states.
                 */
-               if (
-                   /*m->hold_count ||*/
-                   m->wire_count ||
-                   (m->flags & PG_UNMANAGED) ||
+               if (m->wire_count ||
+                   (m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) ||
                    m->valid != VM_PAGE_BITS_ALL
                ) {
                        vm_page_wakeup(m);
@@ -1834,8 +1835,9 @@ vm_object_backing_scan_callback(vm_page_t p, void *data)
                }
 
                if (op & OBSC_COLLAPSE_NOWAIT) {
-                       if (p->valid == 0 /*|| p->hold_count*/ ||
-                           p->wire_count) {
+                       if (p->valid == 0 ||
+                           p->wire_count ||
+                           (p->flags & PG_NEED_COMMIT)) {
                                vm_page_wakeup(p);
                                return(0);
                        }
@@ -2370,6 +2372,8 @@ vm_object_page_remove_callback(vm_page_t p, void *data)
         *           cache buffer, and the buffer might be marked B_CACHE.
         *           This is fine as part of a truncation but VFSs must be
         *           sure to fix the buffer up when re-extending the file.
+        *
+        * NOTE!     PG_NEED_COMMIT is ignored.
         */
        if (p->wire_count != 0) {
                vm_page_protect(p, VM_PROT_NONE);
index 90e5f6e..c47761f 100644 (file)
@@ -1336,13 +1336,18 @@ vm_page_select_cache(u_short pg_color)
                        /*
                         * We successfully busied the page
                         */
-                       if ((m->flags & PG_UNMANAGED) == 0 &&
+                       if ((m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) == 0 &&
                            m->hold_count == 0 &&
-                           m->wire_count == 0) {
+                           m->wire_count == 0 &&
+                           (m->dirty & m->valid) == 0) {
                                vm_page_spin_unlock(m);
                                pagedaemon_wakeup();
                                return(m);
                        }
+
+                       /*
+                        * The page cannot be recycled, deactivate it.
+                        */
                        _vm_page_deactivate_locked(m, 0);
                        if (_vm_page_wakeup(m)) {
                                vm_page_spin_unlock(m);
@@ -1402,7 +1407,8 @@ vm_page_select_free(u_short pg_color, boolean_t prefer_zero)
                         * lock) nobody else should be able to mess with the
                         * page before us.
                         */
-                       KKASSERT((m->flags & PG_UNMANAGED) == 0);
+                       KKASSERT((m->flags & (PG_UNMANAGED |
+                                             PG_NEED_COMMIT)) == 0);
                        KKASSERT(m->hold_count == 0);
                        KKASSERT(m->wire_count == 0);
                        vm_page_spin_unlock(m);
@@ -1999,10 +2005,12 @@ vm_page_free_toq(vm_page_t m)
 
        /*
         * Clear the UNMANAGED flag when freeing an unmanaged page.
+        * Clear the NEED_COMMIT flag
         */
-       if (m->flags & PG_UNMANAGED) {
+       if (m->flags & PG_UNMANAGED)
                vm_page_flag_clear(m, PG_UNMANAGED);
-       }
+       if (m->flags & PG_NEED_COMMIT)
+               vm_page_flag_clear(m, PG_NEED_COMMIT);
 
        if (m->hold_count != 0) {
                vm_page_flag_clear(m, PG_ZERO);
@@ -2070,7 +2078,8 @@ vm_page_free_fromq_fast(void)
                                 * The page is not PG_ZERO'd so return it.
                                 */
                                vm_page_spin_unlock(m);
-                               KKASSERT((m->flags & PG_UNMANAGED) == 0);
+                               KKASSERT((m->flags & (PG_UNMANAGED |
+                                                     PG_NEED_COMMIT)) == 0);
                                KKASSERT(m->hold_count == 0);
                                KKASSERT(m->wire_count == 0);
                                break;
@@ -2271,7 +2280,7 @@ vm_page_try_to_cache(vm_page_t m)
                return(0);
        }
        if (m->dirty || m->hold_count || m->wire_count ||
-           (m->flags & PG_UNMANAGED)) {
+           (m->flags & (PG_UNMANAGED | PG_NEED_COMMIT))) {
                if (_vm_page_wakeup(m)) {
                        vm_page_spin_unlock(m);
                        wakeup(m);
@@ -2317,7 +2326,8 @@ vm_page_try_to_free(vm_page_t m)
        if (m->dirty ||                         /* can't free if it is dirty */
            m->hold_count ||                    /* or held (XXX may be wrong) */
            m->wire_count ||                    /* or wired */
-           (m->flags & PG_UNMANAGED) ||        /* or unmanaged */
+           (m->flags & (PG_UNMANAGED |         /* or unmanaged */
+                        PG_NEED_COMMIT)) ||    /* or needs a commit */
            m->queue - m->pc == PQ_FREE ||      /* already on PQ_FREE */
            m->queue - m->pc == PQ_HOLD) {      /* already on PQ_HOLD */
                if (_vm_page_wakeup(m)) {
@@ -2362,8 +2372,8 @@ vm_page_try_to_free(vm_page_t m)
 void
 vm_page_cache(vm_page_t m)
 {
-       if ((m->flags & PG_UNMANAGED) || m->busy ||
-           m->wire_count || m->hold_count) {
+       if ((m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) ||
+           m->busy || m->wire_count || m->hold_count) {
                kprintf("vm_page_cache: attempting to cache busy/held page\n");
                vm_page_wakeup(m);
                return;
@@ -2395,10 +2405,10 @@ vm_page_cache(vm_page_t m)
         * everything.
         */
        vm_page_protect(m, VM_PROT_NONE);
-       if ((m->flags & (PG_UNMANAGED|PG_MAPPED)) || m->busy ||
-                       m->wire_count || m->hold_count) {
+       if ((m->flags & (PG_UNMANAGED | PG_MAPPED)) ||
+           m->busy || m->wire_count || m->hold_count) {
                vm_page_wakeup(m);
-       } else if (m->dirty) {
+       } else if (m->dirty || (m->flags & PG_NEED_COMMIT)) {
                vm_page_deactivate(m);
                vm_page_wakeup(m);
        } else {
@@ -2517,6 +2527,22 @@ vm_page_io_finish(vm_page_t m)
                vm_page_flag_clear(m, PG_SBUSY);
 }
 
+/*
+ * Indicate that a clean VM page requires a filesystem commit and cannot
+ * be reused.  Used by tmpfs.
+ */
+void
+vm_page_need_commit(vm_page_t m)
+{
+       vm_page_flag_set(m, PG_NEED_COMMIT);
+}
+
+void
+vm_page_clear_commit(vm_page_t m)
+{
+       vm_page_flag_clear(m, PG_NEED_COMMIT);
+}
+
 /*
  * Grab a page, blocking if it is busy and allocating a page if necessary.
  * A busy page is returned or NULL.  The page may or may not be valid and
@@ -2741,7 +2767,7 @@ vm_page_set_validclean(vm_page_t m, int base, int size)
  *
  * WARNING: Page must be busied?  But vfs_dirty_one_page() will
  *         call this function in buwrite() so for now vm_token must
- *         be held.
+ *         be held.
  *
  * No other requirements.
  */
@@ -2754,7 +2780,7 @@ vm_page_set_validdirty(vm_page_t m, int base, int size)
        m->valid |= pagebits;
        m->dirty |= pagebits;
        if (m->object)
-               vm_object_set_writeable_dirty(m->object);
+              vm_object_set_writeable_dirty(m->object);
 }
 
 /*
index a985cdc..2c2fe77 100644 (file)
@@ -318,6 +318,7 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT];
 #define PG_NOTMETA     0x00008000      /* do not back with swap */
 #define PG_ACTIONLIST  0x00010000      /* lookaside action list present */
 #define PG_SBUSY       0x00020000      /* soft-busy also set */
+#define PG_NEED_COMMIT 0x00040000      /* clean page requires commit */
 
 /*
  * Misc constants.
@@ -432,6 +433,8 @@ void vm_page_and_queue_spin_unlock(vm_page_t m);
 
 void vm_page_io_finish(vm_page_t m);
 void vm_page_io_start(vm_page_t m);
+void vm_page_need_commit(vm_page_t m);
+void vm_page_clear_commit(vm_page_t m);
 void vm_page_wakeup(vm_page_t m);
 void vm_page_hold(vm_page_t);
 void vm_page_unhold(vm_page_t);
index 9bebe31..d7de1ff 100644 (file)
@@ -295,7 +295,8 @@ more:
                        break;
                }
                vm_page_test_dirty(p);
-               if ((p->dirty & p->valid) == 0 ||
+               if (((p->dirty & p->valid) == 0 &&
+                    (p->flags & PG_NEED_COMMIT) == 0) ||
                    p->queue - p->pc != PQ_INACTIVE ||
                    p->wire_count != 0 ||       /* may be held by buf cache */
                    p->hold_count != 0) {       /* may be undergoing I/O */
@@ -327,7 +328,8 @@ more:
                        break;
                }
                vm_page_test_dirty(p);
-               if ((p->dirty & p->valid) == 0 ||
+               if (((p->dirty & p->valid) == 0 &&
+                    (p->flags & PG_NEED_COMMIT) == 0) ||
                    p->queue - p->pc != PQ_INACTIVE ||
                    p->wire_count != 0 ||       /* may be held by buf cache */
                    p->hold_count != 0) {       /* may be undergoing I/O */
@@ -922,14 +924,14 @@ vm_pageout_scan_inactive(int pass, int q, int avail_shortage,
                        vm_page_dirty(m);
                }
 
-               if (m->valid == 0) {
+               if (m->valid == 0 && (m->flags & PG_NEED_COMMIT) == 0) {
                        /*
                         * Invalid pages can be easily freed
                         */
                        vm_pageout_page_free(m);
                        mycpu->gd_cnt.v_dfree++;
                        ++delta;
-               } else if (m->dirty == 0) {
+               } else if (m->dirty == 0 && (m->flags & PG_NEED_COMMIT) == 0) {
                        /*
                         * Clean pages can be placed onto the cache queue.
                         * This effectively frees them.
@@ -1299,6 +1301,7 @@ vm_pageout_scan_active(int pass, int q,
                                                ++*recycle_countp;
                                        vm_page_protect(m, VM_PROT_NONE);
                                        if (m->dirty == 0 &&
+                                           (m->flags & PG_NEED_COMMIT) == 0 &&
                                            avail_shortage - delta > 0) {
                                                vm_page_cache(m);
                                        } else {
@@ -1398,7 +1401,7 @@ vm_pageout_scan_cache(int avail_shortage, int vnodes_skipped, int recycle_count)
                 * Page has been successfully busied and it and its queue
                 * is no longer spinlocked.
                 */
-               if ((m->flags & PG_UNMANAGED) ||
+               if ((m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) ||
                    m->hold_count ||
                    m->wire_count) {
                        vm_page_deactivate(m);
index b4047d1..1fa22e1 100644 (file)
@@ -484,7 +484,7 @@ vnode_pager_setsize(struct vnode *vp, vm_ooffset_t nsize)
 void
 vnode_pager_freepage(vm_page_t m)
 {
-       if (m->busy || m->wire_count) {
+       if (m->busy || m->wire_count || (m->flags & PG_NEED_COMMIT)) {
                vm_page_activate(m);
                vm_page_wakeup(m);
        } else {