vkernel64 - Fix pmap panic
authorMatthew Dillon <dillon@apollo.backplane.com>
Fri, 28 Nov 2014 06:57:36 +0000 (22:57 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Fri, 28 Nov 2014 06:57:36 +0000 (22:57 -0800)
* Fix a pmap panic complaining about the pm_active cpumask not being zero
  when the pmap is released/destroyed.

* The vkernel64 swtch code was improperly setting the active bits from
  the gd_other_cpus field instead of the gd_cpumask field, which wound
  up setting lots of bits instead of the one bit we wanted to set.

* Long-known bug was difficult to reproduce consistently enough to
  locate until Robert found a quick way.

Reproduced-by: Robert Garrett
sys/platform/vkernel64/platform/pmap.c
sys/platform/vkernel64/x86_64/swtch.s

index a1400c7..8bdf4ef 100644 (file)
@@ -1549,6 +1549,7 @@ pmap_release(struct pmap *pmap)
 
        KKASSERT(pmap != &kernel_pmap);
 
+       lwkt_gettoken(&vm_token);
 #if defined(DIAGNOSTIC)
        if (object->ref_count != 1)
                panic("pmap_release: pteobj reference count != 1");
@@ -1557,6 +1558,11 @@ pmap_release(struct pmap *pmap)
        info.pmap = pmap;
        info.object = object;
 
+       KASSERT(CPUMASK_TESTZERO(pmap->pm_active),
+               ("pmap %p still active! %016jx",
+               pmap,
+               (uintmax_t)CPUMASK_LOWMASK(pmap->pm_active)));
+
        spin_lock(&pmap_spin);
        TAILQ_REMOVE(&pmap_list, pmap, pm_pmnode);
        spin_unlock(&pmap_spin);
@@ -1575,6 +1581,7 @@ pmap_release(struct pmap *pmap)
                }
        } while (info.error);
        vm_object_drop(object);
+       lwkt_reltoken(&vm_token);
 }
 
 static int
@@ -1815,7 +1822,8 @@ pmap_collect(void)
        pmap_pagedaemon_waken = 0;
 
        if (warningdone < 5) {
-               kprintf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
+               kprintf("pmap_collect: collecting pv entries -- "
+                       "suggest increasing PMAP_SHPGPERPROC\n");
                warningdone++;
        }
 
@@ -3329,14 +3337,14 @@ pmap_replacevm(struct proc *p, struct vmspace *newvm, int adjrefs)
        crit_enter();
        oldvm = p->p_vmspace;
        if (oldvm != newvm) {
+               if (adjrefs)
+                       vmspace_ref(newvm);
                p->p_vmspace = newvm;
                KKASSERT(p->p_nthreads == 1);
                lp = RB_ROOT(&p->p_lwp_tree);
                pmap_setlwpvm(lp, newvm);
-               if (adjrefs) {
-                       vmspace_ref(newvm);
+               if (adjrefs)
                        vmspace_rel(oldvm);
-               }
        }
        crit_exit();
 }
@@ -3353,25 +3361,23 @@ pmap_setlwpvm(struct lwp *lp, struct vmspace *newvm)
        struct pmap *pmap;
 
        oldvm = lp->lwp_vmspace;
-       if (oldvm == newvm)
-               return;
-       lp->lwp_vmspace = newvm;
-       if (curthread->td_lwp != lp)
-               return;
-       /*
-        * NOTE: We don't have to worry about the CPULOCK here because
-        *       the virtual kernel doesn't call this function when VMM
-        *       is enabled (and depends on the host kernel when it isn't).
-        */
-       crit_enter();
-       pmap = vmspace_pmap(newvm);
-       ATOMIC_CPUMASK_ORBIT(pmap->pm_active, mycpu->gd_cpuid);
+       if (oldvm != newvm) {
+               crit_enter();
+               lp->lwp_vmspace = newvm;
+               if (curthread->td_lwp == lp) {
+                       pmap = vmspace_pmap(newvm);
+                       ATOMIC_CPUMASK_ORBIT(pmap->pm_active, mycpu->gd_cpuid);
+                       if (pmap->pm_active_lock & CPULOCK_EXCL)
+                               pmap_interlock_wait(newvm);
 #if defined(SWTCH_OPTIM_STATS)
-       tlb_flush_count++;
+                       tlb_flush_count++;
 #endif
-       pmap = vmspace_pmap(oldvm);
-       ATOMIC_CPUMASK_NANDBIT(pmap->pm_active, mycpu->gd_cpuid);
-       crit_exit();
+                       pmap = vmspace_pmap(oldvm);
+                       ATOMIC_CPUMASK_NANDBIT(pmap->pm_active,
+                                              mycpu->gd_cpuid);
+               }
+               crit_exit();
+       }
 }
 
 /*
@@ -3384,8 +3390,14 @@ pmap_interlock_wait (struct vmspace *vm)
 {
        pmap_t pmap = vmspace_pmap(vm);
 
-       while (pmap->pm_active_lock & CPULOCK_EXCL)
-               pthread_yield();
+       if (pmap->pm_active_lock & CPULOCK_EXCL) {
+               crit_enter();
+               while (pmap->pm_active_lock & CPULOCK_EXCL) {
+                       cpu_ccfence();
+                       pthread_yield();
+               }
+               crit_exit();
+       }
 }
 
 vm_offset_t
index d92a7bd..340c368 100644 (file)
@@ -283,7 +283,7 @@ ENTRY(cpu_exit_switch)
        ret
 
 /*
- * cpu_heavy_restore() (current thread in %rax on entry)
+ * cpu_heavy_restore() (current thread in %rax on entry, %rbx is old thread)
  *
  *     Restore the thread after an LWKT switch.  This entry is normally
  *     called via the LWKT switch restore function, which was pulled
@@ -322,13 +322,13 @@ ENTRY(cpu_heavy_restore)
        movq    TD_LWP(%rax),%rcx
        movq    LWP_VMSPACE(%rcx), %rcx         /* RCX = vmspace */
 
-       movq    PCPU(other_cpus)+0, %rsi
+       movq    PCPU(cpumask)+0, %rsi
        MPLOCKED orq    %rsi, VM_PMAP+PM_ACTIVE+0(%rcx)
-       movq    PCPU(other_cpus)+8, %rsi
+       movq    PCPU(cpumask)+8, %rsi
        MPLOCKED orq    %rsi, VM_PMAP+PM_ACTIVE+8(%rcx)
-       movq    PCPU(other_cpus)+16, %rsi
+       movq    PCPU(cpumask)+16, %rsi
        MPLOCKED orq    %rsi, VM_PMAP+PM_ACTIVE+16(%rcx)
-       movq    PCPU(other_cpus)+24, %rsi
+       movq    PCPU(cpumask)+24, %rsi
        MPLOCKED orq    %rsi, VM_PMAP+PM_ACTIVE+24(%rcx)
 
        movl    VM_PMAP+PM_ACTIVE_LOCK(%rcx),%esi
@@ -580,6 +580,7 @@ ENTRY(savectx)
 
 /*
  * cpu_idle_restore()  (current thread in %rax on entry) (one-time execution)
+ *                     (old thread is %rbx on entry)
  *
  *     Don't bother setting up any regs other than %rbp so backtraces
  *     don't die.  This restore function is used to bootstrap into the
@@ -624,6 +625,7 @@ ENTRY(cpu_idle_restore)
 
 /*
  * cpu_kthread_restore() (current thread is %rax on entry) (one-time execution)
+ *                      (old thread is %rbx on entry)
  *
  *     Don't bother setting up any regs other then %rbp so backtraces
  *     don't die.  This restore function is used to bootstrap into an