X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/blobdiff_plain/be71787bd3c8d2d199af9bb30ffb6df730c6b858..03ac22da0ce225ca12694a9b80003ea708a64320:/sys/kern/lwkt_thread.c

diff --git a/sys/kern/lwkt_thread.c b/sys/kern/lwkt_thread.c
index 55b5ab6b07..82014c3d1e 100644
--- a/sys/kern/lwkt_thread.c
+++ b/sys/kern/lwkt_thread.c
@@ -141,6 +141,9 @@ SYSCTL_INT(_lwkt, OID_AUTO, spin_delay, CTLFLAG_RW,
 static int lwkt_spin_method = 1;
 SYSCTL_INT(_lwkt, OID_AUTO, spin_method, CTLFLAG_RW,
 	&lwkt_spin_method, 0, "LWKT scheduler behavior when contended");
+static int lwkt_spin_fatal = 0;	/* disabled */
+SYSCTL_INT(_lwkt, OID_AUTO, spin_fatal, CTLFLAG_RW,
+	&lwkt_spin_fatal, 0, "LWKT scheduler spin loops till fatal panic");
 static int preempt_enable = 1;
 SYSCTL_INT(_lwkt, OID_AUTO, preempt_enable, CTLFLAG_RW,
 	&preempt_enable, 0, "Enable preemption");
@@ -250,6 +253,7 @@ lwkt_init(void)
 void
 lwkt_schedule_self(thread_t td)
 {
+    KKASSERT((td->td_flags & TDF_MIGRATING) == 0);
     crit_enter_quick(td);
     KASSERT(td != &td->td_gd->gd_idlethread,
 	    ("lwkt_schedule_self(): scheduling gd_idlethread is illegal!"));
@@ -430,14 +434,14 @@ lwkt_set_comm(thread_t td, const char *ctl, ...)
 void
 lwkt_hold(thread_t td)
 {
-    ++td->td_refs;
+    atomic_add_int(&td->td_refs, 1);
 }
 
 void
 lwkt_rele(thread_t td)
 {
     KKASSERT(td->td_refs > 0);
-    --td->td_refs;
+    atomic_add_int(&td->td_refs, -1);
 }
 
 void
@@ -450,6 +454,7 @@ lwkt_wait_free(thread_t td)
 void
 lwkt_free_thread(thread_t td)
 {
+    KKASSERT(td->td_refs == 0);
     KKASSERT((td->td_flags & (TDF_RUNNING|TDF_PREEMPT_LOCK|TDF_RUNQ)) == 0);
     if (td->td_flags & TDF_ALLOCATED_THREAD) {
     	objcache_put(thread_cache, td);
@@ -480,15 +485,18 @@ lwkt_free_thread(thread_t td)
  * different beast and LWKT priorities should not be confused with
  * user process priorities.
  *
- * Note that the td_switch() function cannot do anything that requires
- * the MP lock since the MP lock will have already been setup for
- * the target thread (not the current thread).  It's nice to have a scheduler
- * that does not need the MP lock to work because it allows us to do some
- * really cool high-performance MP lock optimizations.
- *
  * PREEMPTION NOTE: Preemption occurs via lwkt_preempt().  lwkt_switch()
  * is not called by the current thread in the preemption case, only when
  * the preempting thread blocks (in order to return to the original thread).
+ *
+ * SPECIAL NOTE ON SWITCH ATOMICY: Certain operations such as thread
+ * migration and tsleep deschedule the current lwkt thread and call
+ * lwkt_switch().  In particular, the target cpu of the migration fully
+ * expects the thread to become non-runnable and can deadlock against
+ * cpusync operations if we run any IPIs prior to switching the thread out.
+ *
+ * WE MUST BE VERY CAREFUL NOT TO RUN SPLZ DIRECTLY OR INDIRECTLY IF
+ * THE CURRENET THREAD HAS BEEN DESCHEDULED!
  */
 void
 lwkt_switch(void)
@@ -501,6 +509,7 @@ lwkt_switch(void)
     int reqflags;
     int cseq;
     int oseq;
+    int fatal_count;
 
     /*
      * Switching from within a 'fast' (non thread switched) interrupt or IPI
@@ -604,14 +613,11 @@ lwkt_switch(void)
      * Implement round-robin fairq with priority insertion.  The priority
      * insertion is handled by _lwkt_enqueue()
      *
-     * We have to adjust the MP lock for the target thread.  If we
-     * need the MP lock and cannot obtain it we try to locate a
-     * thread that does not need the MP lock.  If we cannot, we spin
-     * instead of HLT.
-     *
-     * A similar issue exists for the tokens held by the target thread.
      * If we cannot obtain ownership of the tokens we cannot immediately
-     * schedule the thread.
+     * schedule the target thread.
+     *
+     * Reminder: Again, we cannot afford to run any IPIs in this path if
+     * the current thread has been descheduled.
      */
     for (;;) {
 	/*
@@ -655,7 +661,7 @@ lwkt_switch(void)
 	    if (ntd->td_fairq_accum >= 0)
 		    break;
 
-	    splz_check();
+	    /*splz_check(); cannot do this here, see above */
 	    lwkt_fairq_accumulate(gd, ntd);
 	    TAILQ_REMOVE(&gd->gd_tdrunq, ntd, td_threadq);
 	    TAILQ_INSERT_TAIL(&gd->gd_tdrunq, ntd, td_threadq);
@@ -815,8 +821,12 @@ skip:
 	 * idle thread will check for pending reschedules already set
 	 * (RQF_AST_LWKT_RESCHED) before actually halting so we don't have
 	 * to here.
+	 *
+	 * Also, if TDF_RUNQ is not set the current thread is trying to
+	 * deschedule, possibly in an atomic fashion.  We cannot afford to
+	 * stay here.
 	 */
-	if (spinning <= 0) {
+	if (spinning <= 0 || (td->td_flags & TDF_RUNQ) == 0) {
 	    atomic_clear_int(&gd->gd_reqflags, RQF_WAKEUP);
 	    goto haveidle;
 	}
@@ -847,15 +857,27 @@ skip:
 	 * WARNING!  We can't call splz_check() or anything else here as
 	 *	     it could cause a deadlock.
 	 */
+#if defined(INVARIANTS) && defined(__amd64__)
+	if ((read_rflags() & PSL_I) == 0) {
+		cpu_enable_intr();
+		panic("lwkt_switch() called with interrupts disabled");
+	}
+#endif
 	cseq = atomic_fetchadd_int(&lwkt_cseq_windex, 1);
+	fatal_count = lwkt_spin_fatal;
 	while ((oseq = lwkt_cseq_rindex) != cseq) {
 	    cpu_ccfence();
+#if !defined(_KERNEL_VIRTUAL)
 	    if (cpu_mi_feature & CPU_MI_MONITOR) {
 		cpu_mmw_pause_int(&lwkt_cseq_rindex, oseq);
-	    } else {
+	    } else
+#endif
+	    {
 		DELAY(1);
 		cpu_lfence();
 	    }
+	    if (fatal_count && --fatal_count == 0)
+		panic("lwkt_switch: fatal spin wait");
 	}
 	cseq = lwkt_spin_delay;	/* don't trust the system operator */
 	cpu_ccfence();
@@ -865,7 +887,7 @@ skip:
 	    cseq = 1000;
 	DELAY(cseq);
 	atomic_add_int(&lwkt_cseq_rindex, 1);
-	splz_check();
+	splz_check();	/* ok, we already checked that td is still scheduled */
 	/* highest level for(;;) loop */
     }
 
@@ -1225,6 +1247,7 @@ _lwkt_schedule(thread_t td, int reschedok)
 
     KASSERT(td != &td->td_gd->gd_idlethread,
 	    ("lwkt_schedule(): scheduling gd_idlethread is illegal!"));
+    KKASSERT((td->td_flags & TDF_MIGRATING) == 0);
     crit_enter_gd(mygd);
     KKASSERT(td->td_lwp == NULL || (td->td_lwp->lwp_flag & LWP_ONRUNQ) == 0);
     if (td == mygd->gd_curthread) {
@@ -1326,12 +1349,14 @@ lwkt_acquire(thread_t td)
 	cpu_lfence();
 	KKASSERT((td->td_flags & TDF_RUNQ) == 0);
 	crit_enter_gd(mygd);
+	DEBUG_PUSH_INFO("lwkt_acquire");
 	while (td->td_flags & (TDF_RUNNING|TDF_PREEMPT_LOCK)) {
 #ifdef SMP
 	    lwkt_process_ipiq();
 #endif
 	    cpu_lfence();
 	}
+	DEBUG_POP_INFO();
 	cpu_mfence();
 	td->td_gd = mygd;
 	TAILQ_INSERT_TAIL(&mygd->gd_tdallq, td, td_allq);
@@ -1533,14 +1558,22 @@ lwkt_setcpu_remote(void *arg)
 {
     thread_t td = arg;
     globaldata_t gd = mycpu;
+    int retry = 10000000;
 
+    DEBUG_PUSH_INFO("lwkt_setcpu_remote");
     while (td->td_flags & (TDF_RUNNING|TDF_PREEMPT_LOCK)) {
 #ifdef SMP
 	lwkt_process_ipiq();
 #endif
 	cpu_lfence();
 	cpu_pause();
+	if (--retry == 0) {
+		kprintf("lwkt_setcpu_remote: td->td_flags %08x\n",
+			td->td_flags);
+		retry = 10000000;
+	}
     }
+    DEBUG_POP_INFO();
     td->td_gd = gd;
     cpu_mfence();
     td->td_flags &= ~TDF_MIGRATING;
@@ -1641,6 +1674,7 @@ lwkt_exit(void)
 	tsleep_remove(td);
     lwkt_deschedule_self(td);
     lwkt_remove_tdallq(td);
+    KKASSERT(td->td_refs == 0);
 
     /*
      * Final cleanup