* Replace the cpu_contention_mask global with a per-token contention mask.
* Fold the lwkt.user_pri_sched feature into the scheduler and remove the
sysctl. The feature is now alwayws on. The feature allows a lower
priority non-contending thread to be scheduled in the face of a
high-priority contending thread that would normally spin in the scheduler.
* A reschedule IPI is now performed when a high-priority contending thread
might possibly resolve, which will kick the user process back into the
kernel and allow rescheduling of the higher priority thread.
* Change the idle-cpu check semantics. When a cpu's scheduler finds only
contending threads it used to loop in the scheduler and the idle thread
would be flagged to not halt. We now allow the idle thread to halt in
this case and expect to receive an IPI when any of the contending threads
might possibly resolve.
As a fringe benefit this should also benefit vkernels.
* lwkt_schedule() has been significantly simplified. Or as I would say,
decomplexified.
#else
splz();
#ifdef SMP
- if (!lwkt_runnable())
+ if ((mycpu->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0)
__asm __volatile("sti; hlt");
else
__asm __volatile("sti; pause");
#else
- if (!lwkt_runnable())
+ if ((mycpu->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0)
__asm __volatile("sti; hlt");
else
__asm __volatile("sti");
static int fairq_enable = 1;
SYSCTL_INT(_lwkt, OID_AUTO, fairq_enable, CTLFLAG_RW, &fairq_enable, 0,
"Turn on fairq priority accumulators");
-static int user_pri_sched = 0;
+static int user_pri_sched = 1;
SYSCTL_INT(_lwkt, OID_AUTO, user_pri_sched, CTLFLAG_RW, &user_pri_sched, 0,
"");
static int preempt_enable = 1;
xtd = TAILQ_FIRST(&gd->gd_tdrunq);
if (xtd == NULL) {
TAILQ_INSERT_TAIL(&gd->gd_tdrunq, td, td_threadq);
- atomic_set_int_nonlocked(&gd->gd_reqflags, RQF_RUNNING);
+ atomic_set_int_nonlocked(&gd->gd_reqflags,
+ RQF_RUNNING | RQF_WAKEUP);
} else {
+ atomic_set_int_nonlocked(&gd->gd_reqflags, RQF_WAKEUP);
while (xtd && xtd->td_pri > td->td_pri)
xtd = TAILQ_NEXT(xtd, td_threadq);
if (xtd)
thread_t ntd;
thread_t xtd;
thread_t nlast;
- int nquserok;
int didaccumulate;
/*
* schedule the thread.
*/
for (;;) {
+ atomic_clear_int_nonlocked(&mycpu->gd_reqflags, RQF_WAKEUP);
clear_lwkt_resched();
didaccumulate = 0;
ntd = TAILQ_FIRST(&gd->gd_tdrunq);
*/
if (ntd == NULL) {
ntd = &gd->gd_idlethread;
- if (gd->gd_reqflags & RQF_IDLECHECK_MASK)
- ntd->td_flags |= TDF_IDLE_NOHLT;
#ifdef SMP
if (gd->gd_trap_nesting_level == 0 && panicstr == NULL)
ASSERT_NO_TOKENS_HELD(ntd);
- clr_cpu_contention_mask(gd);
#endif
cpu_time.cp_msg[0] = 0;
cpu_time.cp_stallpc = 0;
if (ntd->td_fairq_accum >= 0 &&
(TD_TOKS_NOT_HELD(ntd) || lwkt_getalltokens(ntd))
) {
-#ifdef SMP
- clr_cpu_contention_mask(gd);
-#endif
goto havethread;
}
-#ifdef SMP
- if (ntd->td_fairq_accum >= 0)
- set_cpu_contention_mask(gd);
-#endif
-
/*
* Coldpath - unable to schedule ntd, continue looking for threads
- * to schedule. This is only allowed of the (presumably) kernel
- * thread exhausted its fair share. A kernel thread stuck on
- * resources does not currently allow a user thread to get in
- * front of it.
+ * to schedule.
*/
-#ifdef SMP
- nquserok = ((ntd->td_pri < TDPRI_KERN_LPSCHED) ||
- (ntd->td_fairq_accum < 0));
-#else
- nquserok = 1;
-#endif
nlast = NULL;
for (;;) {
/*
* If we exhausted the run list switch to the idle thread.
- * Since one or more threads had resource acquisition issues
- * we do not allow the idle thread to halt.
*
* NOTE: nlast can be NULL.
*/
if (ntd == nlast) {
cpu_pause();
ntd = &gd->gd_idlethread;
- ntd->td_flags |= TDF_IDLE_NOHLT;
#ifdef SMP
if (gd->gd_trap_nesting_level == 0 && panicstr == NULL)
ASSERT_NO_TOKENS_HELD(ntd);
* NOTE: For UP there is no mplock and lwkt_getalltokens()
* always succeeds.
*/
- if ((ntd->td_pri >= TDPRI_KERN_LPSCHED || nquserok ||
- user_pri_sched) && ntd->td_fairq_accum >= 0 &&
+ if (ntd->td_fairq_accum >= 0 &&
(TD_TOKS_NOT_HELD(ntd) || lwkt_getalltokens(ntd))
) {
-#ifdef SMP
- clr_cpu_contention_mask(gd);
-#endif
goto havethread;
}
/*
* Thread was runnable but we were unable to get the required
- * resources (tokens and/or mplock).
+ * resources (tokens and/or mplock), continue the scan.
*/
-#ifdef SMP
- if (ntd->td_fairq_accum >= 0)
- set_cpu_contention_mask(gd);
- if (ntd->td_pri >= TDPRI_KERN_LPSCHED && ntd->td_fairq_accum >= 0)
- nquserok = 0;
-#endif
+ /* */
}
/*
KTR_LOG(tokens_ ## name, ref, ref->tr_tok, curthread)
/*
- * Cpu contention mask for directed wakeups.
- */
-cpumask_t cpu_contention_mask;
-
-/*
* Global tokens. These replace the MP lock for major subsystem locking.
* These tokens are initially used to lockup both global and individual
* operations.
}
/*
+ * Force a LWKT reschedule on the target cpu when a requested token
+ * becomes available.
+ */
+static
+void
+lwkt_reltoken_mask_remote(void *arg, int arg2, struct intrframe *frame)
+{
+ need_lwkt_resched();
+}
+
+static __inline
+void
+_lwkt_reltoken_mask(lwkt_token_t tok)
+{
+#ifdef SMP
+ cpumask_t mask;
+
+ while ((mask = tok->t_collmask) != 0) {
+ if (atomic_cmpset_cpumask(&tok->t_collmask, mask, 0)) {
+ lwkt_send_ipiq3_mask(mask, lwkt_reltoken_mask_remote,
+ NULL, 0);
+ break;
+ }
+ }
+#endif
+}
+
+/*
* Obtain all the tokens required by the specified thread on the current
* cpu, return 0 on failure and non-zero on success. If a failure occurs
* any partially acquired tokens will be released prior to return.
if (ref >= &td->td_toks_base && ref < td->td_toks_stop)
break;
+#ifdef SMP
/*
* Otherwise we failed to acquire all the tokens.
- * Undo and return.
+ * Undo and return. We have to try once more after
+ * setting cpumask to cover possible races.
*/
+ atomic_set_cpumask(&tok->t_collmask,
+ td->td_gd->gd_cpumask);
+ if (atomic_cmpset_ptr(&tok->t_ref, NULL, scan)) {
+ atomic_clear_cpumask(&tok->t_collmask,
+ td->td_gd->gd_cpumask);
+ break;
+ }
+#endif
td->td_wmesg = tok->t_desc;
atomic_add_long(&tok->t_collisions, 1);
lwkt_relalltokens(td);
for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
tok = scan->tr_tok;
- if (tok->t_ref == scan)
+ if (tok->t_ref == scan) {
tok->t_ref = NULL;
+ _lwkt_reltoken_mask(tok);
+ }
}
}
* return tr_tok->t_ref should be assigned to this specific
* ref.
*/
- atomic_add_long(&ref->tr_tok->t_collisions, 1);
+#ifdef SMP
+ atomic_set_cpumask(&tok->t_collmask, td->td_gd->gd_cpumask);
+ if (atomic_cmpset_ptr(&tok->t_ref, NULL, ref)) {
+ atomic_clear_cpumask(&tok->t_collmask,
+ td->td_gd->gd_cpumask);
+ return;
+ }
+#endif
+ td->td_wmesg = tok->t_desc;
+ atomic_add_long(&tok->t_collisions, 1);
logtoken(fail, ref);
lwkt_switch();
logtoken(succ, ref);
- KKASSERT(ref->tr_tok->t_ref == ref);
+ KKASSERT(tok->t_ref == ref);
}
}
* return tr_tok->t_ref should be assigned to this specific
* ref.
*/
- atomic_add_long(&ref->tr_tok->t_collisions, 1);
+#ifdef SMP
+ atomic_set_cpumask(&tok->t_collmask, td->td_gd->gd_cpumask);
+ if (atomic_cmpset_ptr(&tok->t_ref, NULL, ref)) {
+ atomic_clear_cpumask(&tok->t_collmask,
+ td->td_gd->gd_cpumask);
+ goto success;
+ }
+#endif
+ td->td_wmesg = tok->t_desc;
+ atomic_add_long(&tok->t_collisions, 1);
logtoken(fail, ref);
lwkt_switch();
logtoken(succ, ref);
- KKASSERT(ref->tr_tok->t_ref == ref);
+ KKASSERT(tok->t_ref == ref);
}
+success:
crit_enter_hard_gd(td->td_gd);
}
* return tr_tok->t_ref should be assigned to this specific
* ref.
*/
- atomic_add_long(&ref->tr_tok->t_collisions, 1);
+#ifdef SMP
+ atomic_set_cpumask(&tok->t_collmask, td->td_gd->gd_cpumask);
+ if (atomic_cmpset_ptr(&tok->t_ref, NULL, ref)) {
+ atomic_clear_cpumask(&tok->t_collmask,
+ td->td_gd->gd_cpumask);
+ goto success;
+ }
+#endif
+ td->td_wmesg = tok->t_desc;
+ atomic_add_long(&tok->t_collisions, 1);
logtoken(fail, ref);
lwkt_switch();
logtoken(succ, ref);
- KKASSERT(ref->tr_tok->t_ref == ref);
+ KKASSERT(tok->t_ref == ref);
}
+success:
return(tok);
}
*
* NOTE: The mplock is a token also so sequencing is a bit complex.
*/
- if (tok->t_ref == ref)
+ if (tok->t_ref == ref) {
tok->t_ref = NULL;
+ _lwkt_reltoken_mask(tok);
+ }
cpu_sfence();
if ((ref->tr_flags & LWKT_TOKEN_MPSAFE) == 0) {
cpu_ccfence();
tok->t_ref = NULL;
tok->t_flags = mpsafe ? LWKT_TOKEN_MPSAFE : 0;
tok->t_collisions = 0;
+ tok->t_collmask = 0;
tok->t_desc = desc;
}
* NOTE: On an SMP system we rely on a scheduler IPI to wake a HLTed cpu up.
* However, there are cases where the idlethread will be entered with
* the possibility that no IPI will occur and in such cases
- * lwkt_switch() sets TDF_IDLE_NOHLT.
+ * lwkt_switch() sets RQF_WAKEUP. We usually check
+ * RQF_IDLECHECK_WK_MASK.
*
* NOTE: cpu_idle_hlt again defaults to 2 (use ACPI sleep states). Set to
* 1 to just use hlt and for debugging purposes.
* CLIing to catch any interrupt races. Note that we are
* at SPL0 and interrupts are enabled.
*/
- if (cpu_idle_hlt && !lwkt_runnable() &&
- (td->td_flags & TDF_IDLE_NOHLT) == 0) {
+ if (cpu_idle_hlt &&
+ (td->td_gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) {
__asm __volatile("cli");
splz();
- if (!lwkt_runnable()) {
+ if ((td->td_gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) {
if (cpu_idle_hlt == 1)
cpu_idle_default_hook();
else
cpu_idle_hook();
}
-#ifdef SMP
- else
- handle_cpu_contention_mask();
-#endif
__asm __volatile("sti");
++cpu_idle_hltcnt;
} else {
- td->td_flags &= ~TDF_IDLE_NOHLT;
splz();
-#ifdef SMP
__asm __volatile("sti");
- handle_cpu_contention_mask();
-#else
- __asm __volatile("sti");
-#endif
++cpu_idle_spincnt;
}
}
#ifdef SMP
/*
- * This routine is called when the only runnable threads require
- * the MP lock, and the scheduler couldn't get it. On a real cpu
- * we let the scheduler spin.
- */
-void
-handle_cpu_contention_mask(void)
-{
- cpumask_t mask;
-
- mask = cpu_contention_mask;
- cpu_ccfence();
- if (mask && BSFCPUMASK(mask) != mycpu->gd_cpuid)
- DELAY(2);
-}
-
-/*
* This routine is called if a spinlock has been held through the
* exponential backoff period and is seriously contested. On a real cpu
* we let it spin.
* CLIing to catch any interrupt races. Note that we are
* at SPL0 and interrupts are enabled.
*/
- if (cpu_idle_hlt && !lwkt_runnable() &&
- (td->td_flags & TDF_IDLE_NOHLT) == 0) {
+ if (cpu_idle_hlt &&
+ (td->td_gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) {
__asm __volatile("cli");
splz();
- if (!lwkt_runnable()) {
+ if ((td->td_gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) {
if (cpu_idle_hlt == 1)
cpu_idle_default_hook();
else
cpu_idle_hook();
}
-#ifdef SMP
- else
- handle_cpu_contention_mask();
-#endif
__asm __volatile("sti");
++cpu_idle_hltcnt;
} else {
- td->td_flags &= ~TDF_IDLE_NOHLT;
splz();
-#ifdef SMP
__asm __volatile("sti");
- handle_cpu_contention_mask();
-#else
- __asm __volatile("sti");
-#endif
++cpu_idle_spincnt;
}
}
#ifdef SMP
/*
- * This routine is called when the only runnable threads require
- * the MP lock, and the scheduler couldn't get it. On a real cpu
- * we let the scheduler spin.
- */
-void
-handle_cpu_contention_mask(void)
-{
- cpumask_t mask;
-
- mask = cpu_contention_mask;
- cpu_ccfence();
- if (mask && BSFCPUMASK(mask) != mycpu->gd_cpuid)
- DELAY(2);
-}
-
-/*
* This routine is called if a spinlock has been held through the
* exponential backoff period and is seriously contested. On a real cpu
* we let it spin.
* Note on cpu_idle_hlt: On an SMP system we rely on a scheduler IPI
* to wake a HLTed cpu up. However, there are cases where the idlethread
* will be entered with the possibility that no IPI will occur and in such
- * cases lwkt_switch() sets TDF_IDLE_NOHLT.
+ * cases lwkt_switch() sets RQF_WAKEUP. We nominally check RQF_IDLECHEK_MASK.
*/
static int cpu_idle_hlt = 1;
static int cpu_idle_hltcnt;
* The idle loop halts only if no threads are scheduleable
* and no signals have occured.
*/
- if (cpu_idle_hlt && !lwkt_runnable() &&
- (td->td_flags & TDF_IDLE_NOHLT) == 0) {
+ if (cpu_idle_hlt &&
+ (td->td_gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) {
splz();
#ifdef SMP
KKASSERT(MP_LOCK_HELD() == 0);
#endif
- if (!lwkt_runnable()) {
+ if ((td->td_gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) {
#ifdef DEBUGIDLE
struct timeval tv1, tv2;
gettimeofday(&tv1, NULL);
#endif
reqflags = gd->mi.gd_reqflags &
- ~RQF_IDLECHECK_MASK;
+ ~RQF_IDLECHECK_WK_MASK;
umtx_sleep(&gd->mi.gd_reqflags, reqflags,
1000000);
#ifdef DEBUGIDLE
}
#endif
}
-#ifdef SMP
- else {
- handle_cpu_contention_mask();
- }
-#endif
++cpu_idle_hltcnt;
} else {
- td->td_flags &= ~TDF_IDLE_NOHLT;
splz();
#ifdef SMP
- handle_cpu_contention_mask();
__asm __volatile("pause");
#endif
++cpu_idle_spincnt;
#ifdef SMP
/*
- * Called by the LWKT switch core with a critical section held if the only
- * schedulable thread needs the MP lock and we couldn't get it. On
- * a real cpu we just spin in the scheduler. In the virtual kernel
- * we sleep for a bit.
- */
-void
-handle_cpu_contention_mask(void)
-{
- cpumask_t mask;
-
- mask = cpu_contention_mask;
- cpu_ccfence();
- if (mask && BSFCPUMASK(mask) != mycpu->gd_cpuid)
- pthread_yield();
-}
-
-/*
* Called by the spinlock code with or without a critical section held
* when a spinlock is found to be seriously constested.
*
* Note on cpu_idle_hlt: On an SMP system we rely on a scheduler IPI
* to wake a HLTed cpu up. However, there are cases where the idlethread
* will be entered with the possibility that no IPI will occur and in such
- * cases lwkt_switch() sets TDF_IDLE_NOHLT.
+ * cases lwkt_switch() sets RQF_WAKEUP and we nominally check
+ * RQF_IDLECHECK_WK_MASK.
*/
static int cpu_idle_hlt = 1;
static int cpu_idle_hltcnt;
* The idle loop halts only if no threads are scheduleable
* and no signals have occured.
*/
- if (cpu_idle_hlt && !lwkt_runnable() &&
- (td->td_flags & TDF_IDLE_NOHLT) == 0) {
+ if (cpu_idle_hlt &&
+ (td->td_gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) {
splz();
- if (!lwkt_runnable()) {
+ if ((td->td_gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) {
#ifdef DEBUGIDLE
struct timeval tv1, tv2;
gettimeofday(&tv1, NULL);
#endif
reqflags = gd->mi.gd_reqflags &
- ~RQF_IDLECHECK_MASK;
+ ~RQF_IDLECHECK_WK_MASK;
umtx_sleep(&gd->mi.gd_reqflags, reqflags,
1000000);
#ifdef DEBUGIDLE
}
#endif
}
-#ifdef SMP
- else {
- handle_cpu_contention_mask();
- }
-#endif
++cpu_idle_hltcnt;
} else {
- td->td_flags &= ~TDF_IDLE_NOHLT;
splz();
#ifdef SMP
- handle_cpu_contention_mask();
__asm __volatile("pause");
#endif
++cpu_idle_spincnt;
#ifdef SMP
/*
- * Called by the LWKT switch core with a critical section held if the only
- * schedulable thread needs the MP lock and we couldn't get it. On
- * a real cpu we just spin in the scheduler. In the virtual kernel
- * we sleep for a bit.
- */
-void
-handle_cpu_contention_mask(void)
-{
- cpumask_t mask;
-
- mask = cpu_contention_mask;
- cpu_ccfence();
- if (mask && BSFCPUMASK(mask) != mycpu->gd_cpuid)
- pthread_yield();
-}
-
-/*
* Called by the spinlock code with or without a critical section held
* when a spinlock is found to be seriously constested.
*
#define RQB_AST_UPCALL 6
#define RQB_TIMER 7
#define RQB_RUNNING 8
+#define RQB_WAKEUP 9
#define RQF_IPIQ (1 << RQB_IPIQ)
#define RQF_INTPEND (1 << RQB_INTPEND)
#define RQF_AST_LWKT_RESCHED (1 << RQB_AST_LWKT_RESCHED)
#define RQF_AST_UPCALL (1 << RQB_AST_UPCALL)
#define RQF_RUNNING (1 << RQB_RUNNING)
+#define RQF_WAKEUP (1 << RQB_WAKEUP)
#define RQF_AST_MASK (RQF_AST_OWEUPC|RQF_AST_SIGNAL|\
RQF_AST_USER_RESCHED|RQF_AST_LWKT_RESCHED|\
RQF_AST_UPCALL)
#define RQF_IDLECHECK_MASK (RQF_IPIQ|RQF_INTPEND|RQF_TIMER)
+#define RQF_IDLECHECK_WK_MASK (RQF_IDLECHECK_MASK|RQF_WAKEUP)
/*
* globaldata flags
#define get_mplock_count(td) lwkt_cnttoken(&mp_token, td)
void cpu_get_initial_mplock(void);
-void handle_cpu_contention_mask(void);
-
-extern cpumask_t cpu_contention_mask;
-
-/*
- * A cpu wanted the MP lock but could not get it. This function is also
- * called directly from the LWKT scheduler.
- *
- * Reentrant, may be called even if the cpu is already contending the MP
- * lock.
- */
-static __inline
-void
-set_cpu_contention_mask(globaldata_t gd)
-{
- atomic_set_cpumask(&cpu_contention_mask, gd->gd_cpumask);
-}
-
-/*
- * A cpu is no longer contending for the MP lock after previously contending
- * for it.
- *
- * Reentrant, may be called even if the cpu was not previously contending
- * the MP lock.
- */
-static __inline
-void
-clr_cpu_contention_mask(globaldata_t gd)
-{
- atomic_clear_cpumask(&cpu_contention_mask, gd->gd_cpumask);
-}
#define MP_LOCK_HELD() LWKT_TOKEN_HELD(&mp_token)
#define ASSERT_MP_LOCK_HELD() ASSERT_LWKT_TOKEN_HELD(&mp_token)
-
#else
/*
struct lwkt_tokref *t_ref; /* Owning ref or NULL */
intptr_t t_flags; /* MP lock required */
long t_collisions; /* Collision counter */
+ cpumask_t t_collmask; /* Collision cpu mask for resched */
const char *t_desc; /* Descriptive name */
} lwkt_token;
.t_ref = NULL, \
.t_flags = 0, \
.t_collisions = 0, \
+ .t_collmask = 0, \
.t_desc = #name \
}
.t_ref = NULL, \
.t_flags = LWKT_TOKEN_MPSAFE, \
.t_collisions = 0, \
+ .t_collmask = 0, \
.t_desc = #name \
}
#define TDF_RUNQ 0x0002 /* on an LWKT run queue */
#define TDF_PREEMPT_LOCK 0x0004 /* I have been preempted */
#define TDF_PREEMPT_DONE 0x0008 /* acknowledge preemption complete */
-#define TDF_IDLE_NOHLT 0x0010 /* we need to spin */
+#define TDF_UNUSED00000010 0x0010
#define TDF_MIGRATING 0x0020 /* thread is being migrated */
#define TDF_SINTR 0x0040 /* interruptability hint for 'ps' */
#define TDF_TSLEEPQ 0x0080 /* on a tsleep wait queue */