kernel - Remove unneeded critical sections from VM code, add pmap asserts
authorMatthew Dillon <dillon@apollo.backplane.com>
Fri, 3 Dec 2010 16:48:05 +0000 (08:48 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Fri, 3 Dec 2010 16:51:14 +0000 (08:51 -0800)
* Various bits of VM code now only need vm_token and no longer need
  a critical section.

* Add pmap_page_assertzero() assertions in a couple of places when
  PMAP_DEBUG is enabled.

* Add PMAP_DEBUG as a global kernel config option.

* Be a bit more conservative and clear PG_ZERO for all page table
  pages and not just terminal page table pages.  When such pages
  are finally freed they will be zero again due to the way the
  pmap code works but don't make that assumption.

sys/conf/options
sys/kern/uipc_syscalls.c
sys/platform/pc64/x86_64/pmap.c
sys/vm/vm_fault.c
sys/vm/vm_page.c

index fdf9cae..5409cb1 100644 (file)
@@ -483,6 +483,7 @@ DEBUG_INTERRUPTS    opt_global.h
 SOCKBUF_DEBUG          opt_global.h
 PANIC_DEBUG            opt_global.h
 MBUF_DEBUG             opt_global.h
+PMAP_DEBUG             opt_global.h
 
 # Sample system/interrupt PC
 DEBUG_PCTRACK          opt_pctrack.h
index a038d48..3ccc466 100644 (file)
@@ -1643,7 +1643,6 @@ retry_lookup:
                 *      interrupt can free the page) through to the
                 *      vm_page_wire() call.
                 */
-               crit_enter();
                lwkt_gettoken(&vm_token);
                pg = vm_page_lookup(obj, pindex);
                if (pg == NULL) {
@@ -1662,7 +1661,6 @@ retry_lookup:
                }
                vm_page_wire(pg);
                lwkt_reltoken(&vm_token);
-               crit_exit();
 
                /*
                 * If page is not valid for what we need, initiate I/O
index 5594467..ecf1967 100644 (file)
@@ -1371,6 +1371,10 @@ pmap_pinit(struct pmap *pmap)
        }
        if ((ptdpg->flags & PG_ZERO) == 0)
                bzero(pmap->pm_pml4, PAGE_SIZE);
+#ifdef PMAP_DEBUG
+       else
+               pmap_page_assertzero(VM_PAGE_TO_PHYS(ptdpg));
+#endif
 
        pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U;
        pmap->pm_pml4[DMPML4I] = DMPDPphys | PG_RW | PG_V | PG_U;
@@ -1571,6 +1575,11 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex)
        if ((m->flags & PG_ZERO) == 0) {
                pmap_zero_page(VM_PAGE_TO_PHYS(m));
        }
+#ifdef PMAP_DEBUG
+       else {
+               pmap_page_assertzero(VM_PAGE_TO_PHYS(m));
+       }
+#endif
 
        KASSERT(m->queue == PQ_NONE,
                ("_pmap_allocpte: %p->queue != PQ_NONE", m));
@@ -1582,6 +1591,8 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex)
        m->hold_count++;
        if (m->wire_count++ == 0)
                vmstats.v_wire_count++;
+       m->valid = VM_PAGE_BITS_ALL;
+       vm_page_flag_clear(m, PG_ZERO);
 
        /*
         * Map the pagetable page into the process address space, if
@@ -1720,8 +1731,10 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex)
        pmap->pm_ptphint = m;
        ++pmap->pm_stats.resident_count;
 
+#if 0
        m->valid = VM_PAGE_BITS_ALL;
        vm_page_flag_clear(m, PG_ZERO);
+#endif
        vm_page_flag_set(m, PG_MAPPED);
        vm_page_wakeup(m);
 
@@ -3187,13 +3200,14 @@ pmap_zero_page(vm_paddr_t phys)
 void
 pmap_page_assertzero(vm_paddr_t phys)
 {
-       vm_offset_t virt = PHYS_TO_DMAP(phys);
-       int i;
+       vm_offset_t va = PHYS_TO_DMAP(phys);
+       size_t i;
 
        for (i = 0; i < PAGE_SIZE; i += sizeof(long)) {
-           if (*(long *)((char *)virt + i) != 0) {
-               panic("pmap_page_assertzero() @ %p not zero!\n", (void *)virt);
-           }
+               if (*(long *)((char *)va + i) != 0) {
+                       panic("pmap_page_assertzero() @ %p not zero!\n",
+                             (void *)(intptr_t)va);
+               }
        }
 }
 
index 67333ee..8597199 100644 (file)
@@ -1002,12 +1002,8 @@ vm_fault_object(struct faultstate *fs,
                }
 
                /*
-                * See if page is resident.  spl protection is required
-                * to avoid an interrupt unbusy/free race against our
-                * lookup.  We must hold the protection through a page
-                * allocation or busy.
+                * See if the page is resident.
                 */
-               crit_enter();
                fs->m = vm_page_lookup(fs->object, pindex);
                if (fs->m != NULL) {
                        int queue;
@@ -1034,7 +1030,6 @@ vm_fault_object(struct faultstate *fs,
                                vm_object_deallocate(fs->first_object);
                                fs->first_object = NULL;
                                lwkt_reltoken(&vm_token);
-                               crit_exit();
                                return (KERN_TRY_AGAIN);
                        }
 
@@ -1051,7 +1046,6 @@ vm_fault_object(struct faultstate *fs,
                                unlock_and_deallocate(fs);
                                vm_waitpfault();
                                lwkt_reltoken(&vm_token);
-                               crit_exit();
                                return (KERN_TRY_AGAIN);
                        }
 
@@ -1066,7 +1060,6 @@ vm_fault_object(struct faultstate *fs,
                         * page busy.
                         */
                        vm_page_busy(fs->m);
-                       crit_exit();
 
                        if (fs->m->object != &kernel_object) {
                                if ((fs->m->valid & VM_PAGE_BITS_ALL) !=
@@ -1086,8 +1079,6 @@ vm_fault_object(struct faultstate *fs,
                /*
                 * Page is not resident, If this is the search termination
                 * or the pager might contain the page, allocate a new page.
-                *
-                * NOTE: We are still in a critical section.
                 */
                if (TRYPAGER(fs) || fs->object == fs->first_object) {
                        /*
@@ -1095,7 +1086,6 @@ vm_fault_object(struct faultstate *fs,
                         */
                        if (pindex >= fs->object->size) {
                                lwkt_reltoken(&vm_token);
-                               crit_exit();
                                unlock_and_deallocate(fs);
                                return (KERN_PROTECTION_FAILURE);
                        }
@@ -1109,7 +1099,6 @@ vm_fault_object(struct faultstate *fs,
                                limticks = vm_fault_ratelimit(curproc->p_vmspace);
                                if (limticks) {
                                        lwkt_reltoken(&vm_token);
-                                       crit_exit();
                                        unlock_and_deallocate(fs);
                                        tsleep(curproc, 0, "vmrate", limticks);
                                        fs->didlimit = 1;
@@ -1127,13 +1116,11 @@ vm_fault_object(struct faultstate *fs,
                        }
                        if (fs->m == NULL) {
                                lwkt_reltoken(&vm_token);
-                               crit_exit();
                                unlock_and_deallocate(fs);
                                vm_waitpfault();
                                return (KERN_TRY_AGAIN);
                        }
                }
-               crit_exit();
 
 readrest:
                /*
@@ -1191,7 +1178,6 @@ readrest:
                                                scan_count = 16;
                                }
 
-                               crit_enter();
                                while (scan_count) {
                                        vm_page_t mt;
 
@@ -1222,7 +1208,6 @@ skip:
                                        --scan_count;
                                        --scan_pindex;
                                }
-                               crit_exit();
 
                                seqaccess = 1;
                        }
@@ -1361,6 +1346,10 @@ skip:
                        if ((fs->m->flags & PG_ZERO) == 0) {
                                vm_page_zero_fill(fs->m);
                        } else {
+#ifdef PMAP_DEBUG
+                               pmap_page_assertzero(VM_PAGE_TO_PHYS(fs->m));
+#endif
+                               vm_page_flag_clear(fs->m, PG_ZERO);
                                mycpu->gd_cnt.v_ozfod++;
                        }
                        mycpu->gd_cnt.v_zfod++;
@@ -1531,10 +1520,8 @@ skip:
                vm_object_set_writeable_dirty(fs->m->object);
                vm_set_nosync(fs->m, fs->entry);
                if (fs->fault_flags & VM_FAULT_DIRTY) {
-                       crit_enter();
                        vm_page_dirty(fs->m);
                        swap_pager_unswapped(fs->m);
-                       crit_exit();
                }
        }
 
@@ -1758,7 +1745,7 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map,
                 * memory.)
                 */
                src_m = vm_page_lookup(src_object,
-                       OFF_TO_IDX(dst_offset + src_offset));
+                                      OFF_TO_IDX(dst_offset + src_offset));
                if (src_m == NULL)
                        panic("vm_fault_copy_wired: page missing");
 
@@ -1867,7 +1854,6 @@ vm_fault_additional_pages(vm_page_t m, int rbehind, int rahead,
                        startpindex = pindex - rbehind;
                }
 
-               crit_enter();
                lwkt_gettoken(&vm_token);
                for (tpindex = pindex; tpindex > startpindex; --tpindex) {
                        if (vm_page_lookup(object, tpindex - 1))
@@ -1879,7 +1865,6 @@ vm_fault_additional_pages(vm_page_t m, int rbehind, int rahead,
                        rtm = vm_page_alloc(object, tpindex, VM_ALLOC_SYSTEM);
                        if (rtm == NULL) {
                                lwkt_reltoken(&vm_token);
-                               crit_exit();
                                for (j = 0; j < i; j++) {
                                        vm_page_free(marray[j]);
                                }
@@ -1892,7 +1877,6 @@ vm_fault_additional_pages(vm_page_t m, int rbehind, int rahead,
                        ++tpindex;
                }
                lwkt_reltoken(&vm_token);
-               crit_exit();
        } else {
                i = 0;
        }
@@ -1912,7 +1896,6 @@ vm_fault_additional_pages(vm_page_t m, int rbehind, int rahead,
        if (endpindex > object->size)
                endpindex = object->size;
 
-       crit_enter();
        lwkt_gettoken(&vm_token);
        while (tpindex < endpindex) {
                if (vm_page_lookup(object, tpindex))
@@ -1925,7 +1908,6 @@ vm_fault_additional_pages(vm_page_t m, int rbehind, int rahead,
                ++tpindex;
        }
        lwkt_reltoken(&vm_token);
-       crit_exit();
 
        return (i);
 }
@@ -2016,12 +1998,6 @@ vm_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry, int prot)
        else if (starta > addra)
                starta = 0;
 
-       /*
-        * critical section protection is required to maintain the
-        * page/object association, interrupts can free pages and remove
-        * them from their objects.
-        */
-       crit_enter();
        lwkt_gettoken(&vm_token);
        for (i = 0; i < PAGEORDER_SIZE; i++) {
                vm_object_t lobject;
@@ -2073,6 +2049,9 @@ vm_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry, int prot)
                                if ((m->flags & PG_ZERO) == 0) {
                                        vm_page_zero_fill(m);
                                } else {
+#ifdef PMAP_DEBUG
+                                       pmap_page_assertzero(VM_PAGE_TO_PHYS(m));
+#endif
                                        vm_page_flag_clear(m, PG_ZERO);
                                        mycpu->gd_cnt.v_ozfod++;
                                }
@@ -2141,5 +2120,4 @@ vm_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry, int prot)
                }
        }
        lwkt_reltoken(&vm_token);
-       crit_exit();
 }
index f98a7cc..6bf0aaf 100644 (file)
@@ -112,8 +112,6 @@ struct vm_page_action_list  action_list[VMACTION_HSIZE];
 static volatile int vm_pages_waiting;
 
 
-#define ASSERT_IN_CRIT_SECTION()       KKASSERT(crit_test(curthread));
-
 RB_GENERATE2(vm_page_rb_tree, vm_page, rb_entry, rb_vm_page_compare,
             vm_pindex_t, pindex);
 
@@ -420,7 +418,6 @@ vm_page_unhold(vm_page_t m)
 void
 vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
 {
-       ASSERT_IN_CRIT_SECTION();
        ASSERT_LWKT_TOKEN_HELD(&vm_token);
        if (m->object != NULL)
                panic("vm_page_insert: already inserted");
@@ -479,11 +476,9 @@ vm_page_remove(vm_page_t m)
 {
        vm_object_t object;
 
-       crit_enter();
        lwkt_gettoken(&vm_token);
        if (m->object == NULL) {
                lwkt_reltoken(&vm_token);
-               crit_exit();
                return;
        }
 
@@ -502,7 +497,6 @@ vm_page_remove(vm_page_t m)
        m->object = NULL;
 
        lwkt_reltoken(&vm_token);
-       crit_exit();
 }
 
 /*
@@ -520,9 +514,7 @@ vm_page_lookup(vm_object_t object, vm_pindex_t pindex)
         * Search the hash table for this object/offset pair
         */
        ASSERT_LWKT_TOKEN_HELD(&vm_token);
-       crit_enter();
        m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, pindex);
-       crit_exit();
        KKASSERT(m == NULL || (m->object == object && m->pindex == pindex));
        return(m);
 }
@@ -553,7 +545,6 @@ vm_page_lookup(vm_object_t object, vm_pindex_t pindex)
 void
 vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
 {
-       crit_enter();
        lwkt_gettoken(&vm_token);
        vm_page_remove(m);
        vm_page_insert(m, new_object, new_pindex);
@@ -562,7 +553,6 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
        vm_page_dirty(m);
        vm_page_wakeup(m);
        lwkt_reltoken(&vm_token);
-       crit_exit();
 }
 
 /*
@@ -759,7 +749,6 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int page_req)
 {
        vm_page_t m = NULL;
 
-       crit_enter();
        lwkt_gettoken(&vm_token);
        
        KKASSERT(object != NULL);
@@ -822,7 +811,6 @@ loop:
                 * On failure return NULL
                 */
                lwkt_reltoken(&vm_token);
-               crit_exit();
 #if defined(DIAGNOSTIC)
                if (vmstats.v_cache_count > 0)
                        kprintf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", vmstats.v_cache_count);
@@ -835,7 +823,6 @@ loop:
                 * No pages available, wakeup the pageout daemon and give up.
                 */
                lwkt_reltoken(&vm_token);
-               crit_exit();
                vm_pageout_deficit++;
                pagedaemon_wakeup();
                return (NULL);
@@ -871,7 +858,7 @@ loop:
        m->valid = 0;
 
        /*
-        * vm_page_insert() is safe prior to the crit_exit().  Note also that
+        * vm_page_insert() is safe while holding vm_token.  Note also that
         * inserting a page here does not insert it into the pmap (which
         * could cause us to block allocating memory).  We cannot block 
         * anywhere.
@@ -885,7 +872,6 @@ loop:
        pagedaemon_wakeup();
 
        lwkt_reltoken(&vm_token);
-       crit_exit();
 
        /*
         * A PG_BUSY page is returned.
@@ -992,7 +978,6 @@ vm_waitpfault(void)
 void
 vm_page_activate(vm_page_t m)
 {
-       crit_enter();
        lwkt_gettoken(&vm_token);
        if (m->queue != PQ_ACTIVE) {
                if ((m->queue - m->pc) == PQ_CACHE)
@@ -1014,7 +999,6 @@ vm_page_activate(vm_page_t m)
                        m->act_count = ACT_INIT;
        }
        lwkt_reltoken(&vm_token);
-       crit_exit();
 }
 
 /*
@@ -1087,7 +1071,6 @@ vm_page_free_toq(vm_page_t m)
 {
        struct vpgqueues *pq;
 
-       crit_enter();
        lwkt_gettoken(&vm_token);
        mycpu->gd_cnt.v_tfree++;
 
@@ -1120,7 +1103,6 @@ vm_page_free_toq(vm_page_t m)
        if ((m->flags & PG_FICTITIOUS) != 0) {
                vm_page_wakeup(m);
                lwkt_reltoken(&vm_token);
-               crit_exit();
                return;
        }
 
@@ -1166,7 +1148,6 @@ vm_page_free_toq(vm_page_t m)
        vm_page_wakeup(m);
        vm_page_free_wakeup();
        lwkt_reltoken(&vm_token);
-       crit_exit();
 }
 
 /*
@@ -1184,12 +1165,12 @@ vm_page_free_fromq_fast(void)
        vm_page_t m;
        int i;
 
-       crit_enter();
        lwkt_gettoken(&vm_token);
        for (i = 0; i < PQ_L2_SIZE; ++i) {
                m = vm_page_list_find(PQ_FREE, qi, FALSE);
                qi = (qi + PQ_PRIME2) & PQ_L2_MASK;
                if (m && (m->flags & PG_ZERO) == 0) {
+                       KKASSERT(m->busy == 0 && (m->flags & PG_BUSY) == 0);
                        vm_page_unqueue_nowakeup(m);
                        vm_page_busy(m);
                        break;
@@ -1197,7 +1178,6 @@ vm_page_free_fromq_fast(void)
                m = NULL;
        }
        lwkt_reltoken(&vm_token);
-       crit_exit();
        return (m);
 }
 
@@ -1225,7 +1205,6 @@ vm_page_free_fromq_fast(void)
 void
 vm_page_unmanage(vm_page_t m)
 {
-       ASSERT_IN_CRIT_SECTION();
        ASSERT_LWKT_TOKEN_HELD(&vm_token);
        if ((m->flags & PG_UNMANAGED) == 0) {
                if (m->wire_count == 0)
@@ -1250,7 +1229,6 @@ vm_page_wire(vm_page_t m)
         * it is already off the queues).  Don't do anything with fictitious
         * pages because they are always wired.
         */
-       crit_enter();
        lwkt_gettoken(&vm_token);
        if ((m->flags & PG_FICTITIOUS) == 0) {
                if (m->wire_count == 0) {
@@ -1263,7 +1241,6 @@ vm_page_wire(vm_page_t m)
                        ("vm_page_wire: wire_count overflow m=%p", m));
        }
        lwkt_reltoken(&vm_token);
-       crit_exit();
 }
 
 /*
@@ -1294,7 +1271,6 @@ vm_page_wire(vm_page_t m)
 void
 vm_page_unwire(vm_page_t m, int activate)
 {
-       crit_enter();
        lwkt_gettoken(&vm_token);
        if (m->flags & PG_FICTITIOUS) {
                /* do nothing */
@@ -1323,7 +1299,6 @@ vm_page_unwire(vm_page_t m, int activate)
                }
        }
        lwkt_reltoken(&vm_token);
-       crit_exit();
 }
 
 
@@ -1374,11 +1349,9 @@ _vm_page_deactivate(vm_page_t m, int athead)
 void
 vm_page_deactivate(vm_page_t m)
 {
-       crit_enter();
        lwkt_gettoken(&vm_token);
        _vm_page_deactivate(m, 0);
        lwkt_reltoken(&vm_token);
-       crit_exit();
 }
 
 /*
@@ -1390,23 +1363,19 @@ vm_page_deactivate(vm_page_t m)
 int
 vm_page_try_to_cache(vm_page_t m)
 {
-       crit_enter();
        lwkt_gettoken(&vm_token);
        if (m->dirty || m->hold_count || m->busy || m->wire_count ||
            (m->flags & (PG_BUSY|PG_UNMANAGED))) {
                lwkt_reltoken(&vm_token);
-               crit_exit();
                return(0);
        }
        vm_page_test_dirty(m);
        if (m->dirty) {
                lwkt_reltoken(&vm_token);
-               crit_exit();
                return(0);
        }
        vm_page_cache(m);
        lwkt_reltoken(&vm_token);
-       crit_exit();
        return(1);
 }
 
@@ -1419,25 +1388,21 @@ vm_page_try_to_cache(vm_page_t m)
 int
 vm_page_try_to_free(vm_page_t m)
 {
-       crit_enter();
        lwkt_gettoken(&vm_token);
        if (m->dirty || m->hold_count || m->busy || m->wire_count ||
            (m->flags & (PG_BUSY|PG_UNMANAGED))) {
                lwkt_reltoken(&vm_token);
-               crit_exit();
                return(0);
        }
        vm_page_test_dirty(m);
        if (m->dirty) {
                lwkt_reltoken(&vm_token);
-               crit_exit();
                return(0);
        }
        vm_page_busy(m);
        vm_page_protect(m, VM_PROT_NONE);
        vm_page_free(m);
        lwkt_reltoken(&vm_token);
-       crit_exit();
        return(1);
 }
 
@@ -1452,7 +1417,6 @@ vm_page_try_to_free(vm_page_t m)
 void
 vm_page_cache(vm_page_t m)
 {
-       ASSERT_IN_CRIT_SECTION();
        ASSERT_LWKT_TOKEN_HELD(&vm_token);
 
        if ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy ||
@@ -1538,7 +1502,6 @@ vm_page_dontneed(vm_page_t m)
        /*
         * occassionally leave the page alone
         */
-       crit_enter();
        lwkt_gettoken(&vm_token);
        if ((dnw & 0x01F0) == 0 ||
            m->queue == PQ_INACTIVE || 
@@ -1547,7 +1510,6 @@ vm_page_dontneed(vm_page_t m)
                if (m->act_count >= ACT_INIT)
                        --m->act_count;
                lwkt_reltoken(&vm_token);
-               crit_exit();
                return;
        }
 
@@ -1569,7 +1531,6 @@ vm_page_dontneed(vm_page_t m)
        }
        _vm_page_deactivate(m, head);
        lwkt_reltoken(&vm_token);
-       crit_exit();
 }
 
 /*
@@ -1599,7 +1560,6 @@ vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
 
        KKASSERT(allocflags &
                (VM_ALLOC_NORMAL|VM_ALLOC_INTERRUPT|VM_ALLOC_SYSTEM));
-       crit_enter();
        lwkt_gettoken(&vm_token);
 retrylookup:
        if ((m = vm_page_lookup(object, pindex)) != NULL) {
@@ -1630,7 +1590,6 @@ retrylookup:
        }
 done:
        lwkt_reltoken(&vm_token);
-       crit_exit();
        return(m);
 }