X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/blobdiff_plain/be71787bd3c8d2d199af9bb30ffb6df730c6b858..03ac22da0ce225ca12694a9b80003ea708a64320:/sys/kern/lwkt_thread.c diff --git a/sys/kern/lwkt_thread.c b/sys/kern/lwkt_thread.c index 55b5ab6b07..82014c3d1e 100644 --- a/sys/kern/lwkt_thread.c +++ b/sys/kern/lwkt_thread.c @@ -141,6 +141,9 @@ SYSCTL_INT(_lwkt, OID_AUTO, spin_delay, CTLFLAG_RW, static int lwkt_spin_method = 1; SYSCTL_INT(_lwkt, OID_AUTO, spin_method, CTLFLAG_RW, &lwkt_spin_method, 0, "LWKT scheduler behavior when contended"); +static int lwkt_spin_fatal = 0; /* disabled */ +SYSCTL_INT(_lwkt, OID_AUTO, spin_fatal, CTLFLAG_RW, + &lwkt_spin_fatal, 0, "LWKT scheduler spin loops till fatal panic"); static int preempt_enable = 1; SYSCTL_INT(_lwkt, OID_AUTO, preempt_enable, CTLFLAG_RW, &preempt_enable, 0, "Enable preemption"); @@ -250,6 +253,7 @@ lwkt_init(void) void lwkt_schedule_self(thread_t td) { + KKASSERT((td->td_flags & TDF_MIGRATING) == 0); crit_enter_quick(td); KASSERT(td != &td->td_gd->gd_idlethread, ("lwkt_schedule_self(): scheduling gd_idlethread is illegal!")); @@ -430,14 +434,14 @@ lwkt_set_comm(thread_t td, const char *ctl, ...) void lwkt_hold(thread_t td) { - ++td->td_refs; + atomic_add_int(&td->td_refs, 1); } void lwkt_rele(thread_t td) { KKASSERT(td->td_refs > 0); - --td->td_refs; + atomic_add_int(&td->td_refs, -1); } void @@ -450,6 +454,7 @@ lwkt_wait_free(thread_t td) void lwkt_free_thread(thread_t td) { + KKASSERT(td->td_refs == 0); KKASSERT((td->td_flags & (TDF_RUNNING|TDF_PREEMPT_LOCK|TDF_RUNQ)) == 0); if (td->td_flags & TDF_ALLOCATED_THREAD) { objcache_put(thread_cache, td); @@ -480,15 +485,18 @@ lwkt_free_thread(thread_t td) * different beast and LWKT priorities should not be confused with * user process priorities. * - * Note that the td_switch() function cannot do anything that requires - * the MP lock since the MP lock will have already been setup for - * the target thread (not the current thread). It's nice to have a scheduler - * that does not need the MP lock to work because it allows us to do some - * really cool high-performance MP lock optimizations. - * * PREEMPTION NOTE: Preemption occurs via lwkt_preempt(). lwkt_switch() * is not called by the current thread in the preemption case, only when * the preempting thread blocks (in order to return to the original thread). + * + * SPECIAL NOTE ON SWITCH ATOMICY: Certain operations such as thread + * migration and tsleep deschedule the current lwkt thread and call + * lwkt_switch(). In particular, the target cpu of the migration fully + * expects the thread to become non-runnable and can deadlock against + * cpusync operations if we run any IPIs prior to switching the thread out. + * + * WE MUST BE VERY CAREFUL NOT TO RUN SPLZ DIRECTLY OR INDIRECTLY IF + * THE CURRENET THREAD HAS BEEN DESCHEDULED! */ void lwkt_switch(void) @@ -501,6 +509,7 @@ lwkt_switch(void) int reqflags; int cseq; int oseq; + int fatal_count; /* * Switching from within a 'fast' (non thread switched) interrupt or IPI @@ -604,14 +613,11 @@ lwkt_switch(void) * Implement round-robin fairq with priority insertion. The priority * insertion is handled by _lwkt_enqueue() * - * We have to adjust the MP lock for the target thread. If we - * need the MP lock and cannot obtain it we try to locate a - * thread that does not need the MP lock. If we cannot, we spin - * instead of HLT. - * - * A similar issue exists for the tokens held by the target thread. * If we cannot obtain ownership of the tokens we cannot immediately - * schedule the thread. + * schedule the target thread. + * + * Reminder: Again, we cannot afford to run any IPIs in this path if + * the current thread has been descheduled. */ for (;;) { /* @@ -655,7 +661,7 @@ lwkt_switch(void) if (ntd->td_fairq_accum >= 0) break; - splz_check(); + /*splz_check(); cannot do this here, see above */ lwkt_fairq_accumulate(gd, ntd); TAILQ_REMOVE(&gd->gd_tdrunq, ntd, td_threadq); TAILQ_INSERT_TAIL(&gd->gd_tdrunq, ntd, td_threadq); @@ -815,8 +821,12 @@ skip: * idle thread will check for pending reschedules already set * (RQF_AST_LWKT_RESCHED) before actually halting so we don't have * to here. + * + * Also, if TDF_RUNQ is not set the current thread is trying to + * deschedule, possibly in an atomic fashion. We cannot afford to + * stay here. */ - if (spinning <= 0) { + if (spinning <= 0 || (td->td_flags & TDF_RUNQ) == 0) { atomic_clear_int(&gd->gd_reqflags, RQF_WAKEUP); goto haveidle; } @@ -847,15 +857,27 @@ skip: * WARNING! We can't call splz_check() or anything else here as * it could cause a deadlock. */ +#if defined(INVARIANTS) && defined(__amd64__) + if ((read_rflags() & PSL_I) == 0) { + cpu_enable_intr(); + panic("lwkt_switch() called with interrupts disabled"); + } +#endif cseq = atomic_fetchadd_int(&lwkt_cseq_windex, 1); + fatal_count = lwkt_spin_fatal; while ((oseq = lwkt_cseq_rindex) != cseq) { cpu_ccfence(); +#if !defined(_KERNEL_VIRTUAL) if (cpu_mi_feature & CPU_MI_MONITOR) { cpu_mmw_pause_int(&lwkt_cseq_rindex, oseq); - } else { + } else +#endif + { DELAY(1); cpu_lfence(); } + if (fatal_count && --fatal_count == 0) + panic("lwkt_switch: fatal spin wait"); } cseq = lwkt_spin_delay; /* don't trust the system operator */ cpu_ccfence(); @@ -865,7 +887,7 @@ skip: cseq = 1000; DELAY(cseq); atomic_add_int(&lwkt_cseq_rindex, 1); - splz_check(); + splz_check(); /* ok, we already checked that td is still scheduled */ /* highest level for(;;) loop */ } @@ -1225,6 +1247,7 @@ _lwkt_schedule(thread_t td, int reschedok) KASSERT(td != &td->td_gd->gd_idlethread, ("lwkt_schedule(): scheduling gd_idlethread is illegal!")); + KKASSERT((td->td_flags & TDF_MIGRATING) == 0); crit_enter_gd(mygd); KKASSERT(td->td_lwp == NULL || (td->td_lwp->lwp_flag & LWP_ONRUNQ) == 0); if (td == mygd->gd_curthread) { @@ -1326,12 +1349,14 @@ lwkt_acquire(thread_t td) cpu_lfence(); KKASSERT((td->td_flags & TDF_RUNQ) == 0); crit_enter_gd(mygd); + DEBUG_PUSH_INFO("lwkt_acquire"); while (td->td_flags & (TDF_RUNNING|TDF_PREEMPT_LOCK)) { #ifdef SMP lwkt_process_ipiq(); #endif cpu_lfence(); } + DEBUG_POP_INFO(); cpu_mfence(); td->td_gd = mygd; TAILQ_INSERT_TAIL(&mygd->gd_tdallq, td, td_allq); @@ -1533,14 +1558,22 @@ lwkt_setcpu_remote(void *arg) { thread_t td = arg; globaldata_t gd = mycpu; + int retry = 10000000; + DEBUG_PUSH_INFO("lwkt_setcpu_remote"); while (td->td_flags & (TDF_RUNNING|TDF_PREEMPT_LOCK)) { #ifdef SMP lwkt_process_ipiq(); #endif cpu_lfence(); cpu_pause(); + if (--retry == 0) { + kprintf("lwkt_setcpu_remote: td->td_flags %08x\n", + td->td_flags); + retry = 10000000; + } } + DEBUG_POP_INFO(); td->td_gd = gd; cpu_mfence(); td->td_flags &= ~TDF_MIGRATING; @@ -1641,6 +1674,7 @@ lwkt_exit(void) tsleep_remove(td); lwkt_deschedule_self(td); lwkt_remove_tdallq(td); + KKASSERT(td->td_refs == 0); /* * Final cleanup