if (db_more(&nl) < 0)
return;
- db_printf("cpu %d tdrunqmask %08x curthread %p reqflags %04x\n",
- gd->gd_cpuid, gd->gd_runqmask,
- gd->gd_curthread, gd->gd_reqflags);
+ db_printf("cpu %d curthread %p reqflags %04x\n",
+ gd->gd_cpuid, gd->gd_curthread, gd->gd_reqflags);
if (gd->gd_curthread && gd->gd_curthread->td_preempted) {
db_printf(" PREEMPTING THREAD %p\n",
gd->gd_curthread->td_preempted);
if (db_more(&nl) < 0)
return;
db_printf(" tdq thread pid flags pri/cs/mp sp wmesg comm\n");
- for (np = 0; np < 32; ++np) {
- TAILQ_FOREACH(td, &gd->gd_tdrunq[np], td_threadq) {
- if (db_more(&nl) < 0)
- return;
- db_printf(" %3d %p %3d %08x %2d/%02d/%02d %p %8.8s %s\n",
- np, td,
- (td->td_proc ? td->td_proc->p_pid : -1),
- td->td_flags,
- td->td_pri & TDPRI_MASK,
- td->td_pri / TDPRI_CRIT,
+ TAILQ_FOREACH(td, &gd->gd_tdrunq, td_threadq) {
+ if (db_more(&nl) < 0)
+ return;
+ db_printf(" %p %3d %08x %2d/%02d/%02d %p %8.8s %s\n",
+ td,
+ (td->td_proc ? td->td_proc->p_pid : -1),
+ td->td_flags,
+ td->td_pri,
+ td->td_critcount,
#ifdef SMP
- td->td_mpcount,
+ td->td_mpcount,
#else
- 0,
+ 0,
#endif
- td->td_sp,
- td->td_wmesg ? td->td_wmesg : "-",
- td->td_proc ? td->td_proc->p_comm : td->td_comm);
- if (td->td_preempted)
- db_printf(" PREEMPTING THREAD %p\n", td->td_preempted);
- db_dump_td_tokens(td);
- }
+ td->td_sp,
+ td->td_wmesg ? td->td_wmesg : "-",
+ td->td_proc ? td->td_proc->p_comm : td->td_comm);
+ if (td->td_preempted)
+ db_printf(" PREEMPTING THREAD %p\n", td->td_preempted);
+ db_dump_td_tokens(td);
}
if (db_more(&nl) < 0)
return;
np, td,
(td->td_proc ? td->td_proc->p_pid : -1),
td->td_flags,
- td->td_pri & TDPRI_MASK,
- td->td_pri / TDPRI_CRIT,
+ td->td_pri,
+ td->td_critcount,
#ifdef SMP
td->td_mpcount,
#else
db_printf(" %p[tok=%p", ref, ref->tr_tok);
#ifdef SMP
- if (td == tok->t_ref->tr_owner)
+ if (tok->t_ref && td == tok->t_ref->tr_owner)
db_printf(",held");
#endif
db_printf("]");
}
/*
+ * lwkt thread scheduler fair queueing
+ */
+ lwkt_fairq_schedulerclock(curthread);
+
+ /*
* softticks are handled for all cpus
*/
hardclock_softtick(gd);
td = curthread;
/* We must be in critical section. */
- KKASSERT(td->td_pri >= TDPRI_CRIT);
+ KKASSERT(td->td_critcount);
info = &intr_info_ary[intr];
kp->kp_lwp.kl_tdprio = td->td_pri;
kp->kp_lwp.kl_rtprio.type = RTP_PRIO_THREAD;
- kp->kp_lwp.kl_rtprio.prio = td->td_pri & TDPRI_MASK;
+ kp->kp_lwp.kl_rtprio.prio = td->td_pri;
kp->kp_lwp.kl_uticks = td->td_uticks;
kp->kp_lwp.kl_sticks = td->td_sticks;
size_t cnt;
int error = 0;
int save = 0;
- int baseticks = ticks;
KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
("uiomove: mode"));
switch (uio->uio_segflg) {
case UIO_USERSPACE:
- if (ticks - baseticks >= hogticks) {
- uio_yield();
- baseticks = ticks;
- }
+ lwkt_user_yield();
if (uio->uio_rw == UIO_READ)
error = copyout(cp, iov->iov_base, cnt);
else
* in case this is the idle process and already asleep.
*/
splz();
- oldpri = td->td_pri & TDPRI_MASK;
+ oldpri = td->td_pri;
lwkt_setpri_self(safepri);
lwkt_switch();
lwkt_setpri_self(oldpri);
}
/*
- * Yield / synchronous reschedule. This is a bit tricky because the trap
- * code might have set a lazy release on the switch function. Setting
- * P_PASSIVE_ACQ will ensure that the lazy release executes when we call
- * switch, and that we are given a greater chance of affinity with our
- * current cpu.
- *
- * We call lwkt_setpri_self() to rotate our thread to the end of the lwkt
- * run queue. lwkt_switch() will also execute any assigned passive release
- * (which usually calls release_curproc()), allowing a same/higher priority
- * process to be designated as the current process.
- *
- * While it is possible for a lower priority process to be designated,
- * it's call to lwkt_maybe_switch() in acquire_curproc() will likely
- * round-robin back to us and we will be able to re-acquire the current
- * process designation.
- *
- * MPSAFE
- */
-void
-uio_yield(void)
-{
- struct thread *td = curthread;
- struct proc *p = td->td_proc;
-
- lwkt_setpri_self(td->td_pri & TDPRI_MASK);
- if (p) {
- p->p_flag |= P_PASSIVE_ACQ;
- lwkt_switch();
- p->p_flag &= ~P_PASSIVE_ACQ;
- } else {
- lwkt_switch();
- }
-}
-
-/*
* Compute a tenex style load average of a quantity on
* 1, 5 and 15 minute intervals.
*/
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/sysproto.h>
-#include <sys/uio.h> /* uio_yield() fixme */
#if 0
sys_yield(struct yield_args *uap)
{
uap->sysmsg_result = 0;
- uio_yield();
+ lwkt_user_yield();
return(0);
}
struct timespec ts, ts2, ts3;
struct timeval tv;
int error;
- int tried_yield;
if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000)
return (EINVAL);
nanouptime(&ts);
timespecadd(&ts, rqt); /* ts = target timestamp compare */
TIMESPEC_TO_TIMEVAL(&tv, rqt); /* tv = sleep interval */
- tried_yield = 0;
for (;;) {
int ticks;
if (tv.tv_sec == 0 && ticks == 0) {
thread_t td = curthread;
- if (tried_yield || tv.tv_usec < sleep_hard_us) {
- tried_yield = 0;
- uio_yield();
+ if (tv.tv_usec < sleep_hard_us) {
+ lwkt_user_yield();
} else {
crit_enter_quick(td);
systimer_init_oneshot(&info, ns1_systimer,
if (gd->gd_intr_nesting_level > 20)
panic("lwkt_send_ipiq: TOO HEAVILY NESTED!");
#endif
- KKASSERT(curthread->td_pri >= TDPRI_CRIT);
+ KKASSERT(curthread->td_critcount);
++ipiq_count;
ip = &gd->gd_ipiq[target->gd_cpuid];
if (gd->gd_intr_nesting_level > 20)
panic("lwkt_send_ipiq: TOO HEAVILY NESTED!");
#endif
- KKASSERT(curthread->td_pri >= TDPRI_CRIT);
+ KKASSERT(curthread->td_critcount);
++ipiq_count;
++ipiq_passive;
ip = &gd->gd_ipiq[target->gd_cpuid];
struct globaldata *gd = mycpu;
logipiq(send_nbio, func, arg1, arg2, gd, target);
- KKASSERT(curthread->td_pri >= TDPRI_CRIT);
+ KKASSERT(curthread->td_critcount);
if (target == gd) {
func(arg1, arg2, NULL);
logipiq(send_end, func, arg1, arg2, gd, target);
* Issue a load fence to prevent speculative reads of e.g. data written
* by the other cpu prior to it updating the index.
*/
- KKASSERT(curthread->td_pri >= TDPRI_CRIT);
+ KKASSERT(curthread->td_critcount);
wi = ip->ip_windex;
cpu_lfence();
#ifdef SMP
static void lwkt_schedule_remote(void *arg, int arg2, struct intrframe *frame);
#endif
+static void lwkt_fairq_accumulate(globaldata_t gd, thread_t td);
extern void cpu_heavy_restore(void);
extern void cpu_lwkt_restore(void);
SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count, CTLFLAG_RW,
&token_contention_count, 0, "spinning due to token contention");
#endif
+static int fairq_enable = 1;
+SYSCTL_INT(_lwkt, OID_AUTO, fairq_enable, CTLFLAG_RW, &fairq_enable, 0, "");
/*
* These helper procedures handle the runq, they can only be called from
_lwkt_dequeue(thread_t td)
{
if (td->td_flags & TDF_RUNQ) {
- int nq = td->td_pri & TDPRI_MASK;
struct globaldata *gd = td->td_gd;
td->td_flags &= ~TDF_RUNQ;
- TAILQ_REMOVE(&gd->gd_tdrunq[nq], td, td_threadq);
- /* runqmask is passively cleaned up by the switcher */
+ TAILQ_REMOVE(&gd->gd_tdrunq, td, td_threadq);
+ gd->gd_fairq_total_pri -= td->td_pri;
+ if (TAILQ_FIRST(&gd->gd_tdrunq) == NULL)
+ atomic_clear_int_nonlocked(&gd->gd_reqflags, RQF_RUNNING);
}
}
+/*
+ * Priority enqueue.
+ *
+ * NOTE: There are a limited number of lwkt threads runnable since user
+ * processes only schedule one at a time per cpu.
+ */
static __inline
void
_lwkt_enqueue(thread_t td)
{
+ thread_t xtd;
+
if ((td->td_flags & (TDF_RUNQ|TDF_MIGRATING|TDF_BLOCKQ)) == 0) {
- int nq = td->td_pri & TDPRI_MASK;
struct globaldata *gd = td->td_gd;
td->td_flags |= TDF_RUNQ;
- TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], td, td_threadq);
- gd->gd_runqmask |= 1 << nq;
+ xtd = TAILQ_FIRST(&gd->gd_tdrunq);
+ if (xtd == NULL) {
+ TAILQ_INSERT_TAIL(&gd->gd_tdrunq, td, td_threadq);
+ atomic_set_int_nonlocked(&gd->gd_reqflags, RQF_RUNNING);
+ } else {
+ while (xtd && xtd->td_pri > td->td_pri)
+ xtd = TAILQ_NEXT(xtd, td_threadq);
+ if (xtd)
+ TAILQ_INSERT_BEFORE(xtd, td, td_threadq);
+ else
+ TAILQ_INSERT_TAIL(&gd->gd_tdrunq, td, td_threadq);
+ }
+ gd->gd_fairq_total_pri += td->td_pri;
}
}
lwkt_schedule_self(thread_t td)
{
crit_enter_quick(td);
- KASSERT(td != &td->td_gd->gd_idlethread, ("lwkt_schedule_self(): scheduling gd_idlethread is illegal!"));
+ KASSERT(td != &td->td_gd->gd_idlethread,
+ ("lwkt_schedule_self(): scheduling gd_idlethread is illegal!"));
KKASSERT(td->td_lwp == NULL || (td->td_lwp->lwp_flag & LWP_ONRUNQ) == 0);
_lwkt_enqueue(td);
crit_exit_quick(td);
void
lwkt_gdinit(struct globaldata *gd)
{
- int i;
-
- for (i = 0; i < sizeof(gd->gd_tdrunq)/sizeof(gd->gd_tdrunq[0]); ++i)
- TAILQ_INIT(&gd->gd_tdrunq[i]);
- gd->gd_runqmask = 0;
+ TAILQ_INIT(&gd->gd_tdrunq);
TAILQ_INIT(&gd->gd_tdallq);
}
td->td_kstack_size = stksize;
td->td_flags = flags;
td->td_gd = gd;
- td->td_pri = TDPRI_KERN_DAEMON + TDPRI_CRIT;
+ td->td_pri = TDPRI_KERN_DAEMON;
+ td->td_critcount = 1;
td->td_toks_stop = &td->td_toks_base;
#ifdef SMP
if ((flags & TDF_MPSAFE) == 0)
globaldata_t gd = mycpu;
thread_t td = gd->gd_curthread;
thread_t ntd;
+ thread_t xtd;
+ thread_t nlast;
#ifdef SMP
+ int nquserok;
int mpheld;
#endif
+ int didaccumulate;
/*
* Switching from within a 'fast' (non thread switched) interrupt or IPI
}
#endif
#endif
+
+ /*
+ * If we had preempted another thread on this cpu, resume the preempted
+ * thread. This occurs transparently, whether the preempted thread
+ * was scheduled or not (it may have been preempted after descheduling
+ * itself).
+ *
+ * We have to setup the MP lock for the original thread after backing
+ * out the adjustment that was made to curthread when the original
+ * was preempted.
+ */
if ((ntd = td->td_preempted) != NULL) {
- /*
- * We had preempted another thread on this cpu, resume the preempted
- * thread. This occurs transparently, whether the preempted thread
- * was scheduled or not (it may have been preempted after descheduling
- * itself).
- *
- * We have to setup the MP lock for the original thread after backing
- * out the adjustment that was made to curthread when the original
- * was preempted.
- */
KKASSERT(ntd->td_flags & TDF_PREEMPT_LOCK);
#ifdef SMP
if (ntd->td_mpcount && mpheld == 0) {
* set the reschedule flag if the originally interrupted thread is
* at a lower priority.
*/
- if (gd->gd_runqmask > (2 << (ntd->td_pri & TDPRI_MASK)) - 1)
+ if (TAILQ_FIRST(&gd->gd_tdrunq) &&
+ TAILQ_FIRST(&gd->gd_tdrunq)->td_pri > ntd->td_pri) {
need_lwkt_resched();
+ }
/* YYY release mp lock on switchback if original doesn't need it */
- } else {
+ goto havethread_preempted;
+ }
+
+ /*
+ * Implement round-robin fairq with priority insertion. The priority
+ * insertion is handled by _lwkt_enqueue()
+ *
+ * We have to adjust the MP lock for the target thread. If we
+ * need the MP lock and cannot obtain it we try to locate a
+ * thread that does not need the MP lock. If we cannot, we spin
+ * instead of HLT.
+ *
+ * A similar issue exists for the tokens held by the target thread.
+ * If we cannot obtain ownership of the tokens we cannot immediately
+ * schedule the thread.
+ */
+ for (;;) {
+ clear_lwkt_resched();
+ didaccumulate = 0;
+ ntd = TAILQ_FIRST(&gd->gd_tdrunq);
+
/*
- * Priority queue / round-robin at each priority. Note that user
- * processes run at a fixed, low priority and the user process
- * scheduler deals with interactions between user processes
- * by scheduling and descheduling them from the LWKT queue as
- * necessary.
- *
- * We have to adjust the MP lock for the target thread. If we
- * need the MP lock and cannot obtain it we try to locate a
- * thread that does not need the MP lock. If we cannot, we spin
- * instead of HLT.
+ * Hotpath if we can get all necessary resources.
*
- * A similar issue exists for the tokens held by the target thread.
- * If we cannot obtain ownership of the tokens we cannot immediately
- * schedule the thread.
+ * If nothing is runnable switch to the idle thread
*/
+ if (ntd == NULL) {
+ ntd = &gd->gd_idlethread;
+ if (gd->gd_reqflags & RQF_IDLECHECK_MASK)
+ ntd->td_flags |= TDF_IDLE_NOHLT;
+ if (ntd->td_mpcount) {
+ if (gd->gd_trap_nesting_level == 0 && panicstr == NULL)
+ panic("Idle thread %p was holding the BGL!", ntd);
+ if (mpheld == 0) {
+ cpu_pause();
+ continue;
+ }
+ }
+ goto haveidle;
+ }
/*
- * If an LWKT reschedule was requested, well that is what we are
- * doing now so clear it.
+ * Hotpath schedule
+ */
+ if (ntd->td_fairq_accum >= 0 &&
+#ifdef SMP
+ (ntd->td_mpcount == 0 || mpheld || cpu_try_mplock()) &&
+#endif
+ (!TD_TOKS_HELD(ntd) || lwkt_getalltokens(ntd))
+ ) {
+#ifdef SMP
+ clr_mplock_contention_mask(gd);
+#endif
+ goto havethread;
+ }
+
+#ifdef SMP
+ /* Reload mpheld (it become stale after mplock/token ops) */
+ mpheld = MP_LOCK_HELD();
+#endif
+
+ /*
+ * Coldpath - unable to schedule ntd, continue looking for threads
+ * to schedule. This is only allowed of the (presumably) kernel
+ * thread exhausted its fair share. A kernel thread stuck on
+ * resources does not currently allow a user thread to get in
+ * front of it.
*/
- clear_lwkt_resched();
-again:
- if (gd->gd_runqmask) {
- int nq = bsrl(gd->gd_runqmask);
- if ((ntd = TAILQ_FIRST(&gd->gd_tdrunq[nq])) == NULL) {
- gd->gd_runqmask &= ~(1 << nq);
- goto again;
- }
#ifdef SMP
+ nquserok = ((ntd->td_pri < TDPRI_KERN_LPSCHED) ||
+ (ntd->td_fairq_accum < 0));
+#endif
+ nlast = NULL;
+
+ for (;;) {
/*
- * THREAD SELECTION FOR AN SMP MACHINE BUILD
+ * If the fair-share scheduler ran out ntd gets moved to the
+ * end and its accumulator will be bumped, if it didn't we
+ * maintain the same queue position.
*
- * If the target needs the MP lock and we couldn't get it,
- * or if the target is holding tokens and we could not
- * gain ownership of the tokens, continue looking for a
- * thread to schedule and spin instead of HLT if we can't.
- *
- * NOTE: the mpheld variable invalid after this conditional, it
- * can change due to both cpu_try_mplock() returning success
- * AND interactions in lwkt_getalltokens() due to the fact that
- * we are trying to check the mpcount of a thread other then
- * the current thread. Because of this, if the current thread
- * is not holding td_mpcount, an IPI indirectly run via
- * lwkt_getalltokens() can obtain and release the MP lock and
- * cause the core MP lock to be released.
+ * nlast keeps track of the last element prior to any moves.
*/
- if ((ntd->td_mpcount && mpheld == 0 && !cpu_try_mplock()) ||
- (TD_TOKS_HELD(ntd) && lwkt_getalltokens(ntd) == 0)
- ) {
- u_int32_t rqmask = gd->gd_runqmask;
+ if (ntd->td_fairq_accum < 0) {
+ xtd = TAILQ_NEXT(ntd, td_threadq);
+ lwkt_fairq_accumulate(gd, ntd);
+ didaccumulate = 1;
+ TAILQ_REMOVE(&gd->gd_tdrunq, ntd, td_threadq);
+ TAILQ_INSERT_TAIL(&gd->gd_tdrunq, ntd, td_threadq);
+ if (nlast == NULL) {
+ nlast = ntd;
+ if (xtd == NULL)
+ xtd = ntd;
+ }
+ ntd = xtd;
+ } else {
+ ntd = TAILQ_NEXT(ntd, td_threadq);
+ }
+ /*
+ * If we exhausted the run list switch to the idle thread.
+ * Since one or more threads had resource acquisition issues
+ * we do not allow the idle thread to halt.
+ *
+ * NOTE: nlast can be NULL.
+ */
+ if (ntd == nlast) {
cpu_pause();
-
- mpheld = MP_LOCK_HELD();
- ntd = NULL;
- while (rqmask) {
- TAILQ_FOREACH(ntd, &gd->gd_tdrunq[nq], td_threadq) {
- if (ntd->td_mpcount && !mpheld && !cpu_try_mplock()) {
- /* spinning due to MP lock being held */
- continue;
- }
-
- /*
- * mpheld state invalid after getalltokens call returns
- * failure, but the variable is only needed for
- * the loop.
- */
- if (TD_TOKS_HELD(ntd) && !lwkt_getalltokens(ntd)) {
- /* spinning due to token contention */
-#ifdef INVARIANTS
- ++token_contention_count;
-#endif
- mpheld = MP_LOCK_HELD();
- continue;
- }
- break;
- }
- if (ntd)
- break;
- rqmask &= ~(1 << nq);
- nq = bsrl(rqmask);
-
- /*
- * We have two choices. We can either refuse to run a
- * user thread when a kernel thread needs the MP lock
- * but could not get it, or we can allow it to run but
- * then expect an IPI (hopefully) later on to force a
- * reschedule when the MP lock might become available.
- */
- if (nq < TDPRI_KERN_LPSCHED) {
- break; /* for now refuse to run */
-#if 0
- if (chain_mplock == 0)
- break;
- /* continue loop, allow user threads to be scheduled */
-#endif
+ ntd = &gd->gd_idlethread;
+ ntd->td_flags |= TDF_IDLE_NOHLT;
+ set_mplock_contention_mask(gd);
+ cpu_mplock_contested();
+ if (ntd->td_mpcount) {
+ mpheld = MP_LOCK_HELD();
+ if (gd->gd_trap_nesting_level == 0 && panicstr == NULL)
+ panic("Idle thread %p was holding the BGL!", ntd);
+ if (mpheld == 0) {
+ cpu_pause();
+ break; /* try again from the top, almost */
}
}
/*
- * Case where a (kernel) thread needed the MP lock and could
- * not get one, and we may or may not have found another
- * thread which does not need the MP lock to run while
- * we wait (ntd).
+ * If fairq accumulations occured we do not schedule the
+ * idle thread. This will cause us to try again from
+ * the (almost) top.
*/
- if (ntd == NULL) {
- ntd = &gd->gd_idlethread;
- ntd->td_flags |= TDF_IDLE_NOHLT;
- set_mplock_contention_mask(gd);
- cpu_mplock_contested();
- goto using_idle_thread;
- } else {
- clr_mplock_contention_mask(gd);
- ++gd->gd_cnt.v_swtch;
- TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
- TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
- }
- } else {
- clr_mplock_contention_mask(gd);
- ++gd->gd_cnt.v_swtch;
- TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
- TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
+ if (didaccumulate)
+ break;
+ goto haveidle;
}
-#else
+
/*
- * THREAD SELECTION FOR A UP MACHINE BUILD. We don't have to
- * worry about tokens or the BGL. However, we still have
- * to call lwkt_getalltokens() in order to properly detect
- * stale tokens. This call cannot fail for a UP build!
+ * Try to switch to this thread.
*/
- lwkt_getalltokens(ntd);
- ++gd->gd_cnt.v_swtch;
- TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
- TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
+ if ((ntd->td_pri >= TDPRI_KERN_LPSCHED || nquserok) &&
+ ntd->td_fairq_accum >= 0 &&
+#ifdef SMP
+ (ntd->td_mpcount == 0 || mpheld || cpu_try_mplock()) &&
#endif
- } else {
- /*
- * We have nothing to run but only let the idle loop halt
- * the cpu if there are no pending interrupts.
- */
- ntd = &gd->gd_idlethread;
- if (gd->gd_reqflags & RQF_IDLECHECK_MASK)
- ntd->td_flags |= TDF_IDLE_NOHLT;
+ (!TD_TOKS_HELD(ntd) || lwkt_getalltokens(ntd))
+ ) {
#ifdef SMP
-using_idle_thread:
- /*
- * The idle thread should not be holding the MP lock unless we
- * are trapping in the kernel or in a panic. Since we select the
- * idle thread unconditionally when no other thread is available,
- * if the MP lock is desired during a panic or kernel trap, we
- * have to loop in the scheduler until we get it.
- */
- if (ntd->td_mpcount) {
- mpheld = MP_LOCK_HELD();
- if (gd->gd_trap_nesting_level == 0 && panicstr == NULL)
- panic("Idle thread %p was holding the BGL!", ntd);
- if (mpheld == 0)
- goto again;
+ clr_mplock_contention_mask(gd);
+#endif
+ goto havethread;
}
+#ifdef SMP
+ /* Reload mpheld (it become stale after mplock/token ops) */
+ mpheld = MP_LOCK_HELD();
+ if (ntd->td_pri >= TDPRI_KERN_LPSCHED && ntd->td_fairq_accum >= 0)
+ nquserok = 0;
#endif
}
}
- KASSERT(ntd->td_pri >= TDPRI_CRIT,
- ("priority problem in lwkt_switch %d %d", td->td_pri, ntd->td_pri));
/*
- * Do the actual switch. If the new target does not need the MP lock
- * and we are holding it, release the MP lock. If the new target requires
- * the MP lock we have already acquired it for the target.
+ * Do the actual switch. WARNING: mpheld is stale here.
+ *
+ * We must always decrement td_fairq_accum on non-idle threads just
+ * in case a thread never gets a tick due to being in a continuous
+ * critical section. The page-zeroing code does that.
+ *
+ * If the thread we came up with is a higher or equal priority verses
+ * the thread at the head of the queue we move our thread to the
+ * front. This way we can always check the front of the queue.
*/
+havethread:
+ ++gd->gd_cnt.v_swtch;
+ --ntd->td_fairq_accum;
+ xtd = TAILQ_FIRST(&gd->gd_tdrunq);
+ if (ntd != xtd && ntd->td_pri >= xtd->td_pri) {
+ TAILQ_REMOVE(&gd->gd_tdrunq, ntd, td_threadq);
+ TAILQ_INSERT_HEAD(&gd->gd_tdrunq, ntd, td_threadq);
+ }
+havethread_preempted:
+ ;
+ /*
+ * If the new target does not need the MP lock and we are holding it,
+ * release the MP lock. If the new target requires the MP lock we have
+ * already acquired it for the target.
+ *
+ * WARNING: mpheld is stale here.
+ */
+haveidle:
+ KASSERT(ntd->td_critcount,
+ ("priority problem in lwkt_switch %d %d", td->td_pri, ntd->td_pri));
#ifdef SMP
if (ntd->td_mpcount == 0 ) {
if (MP_LOCK_HELD())
if (td != ntd) {
++switch_count;
#ifdef __x86_64__
- {
- int tos_ok __debugvar = jg_tos_ok(ntd);
- KKASSERT(tos_ok);
- }
+ {
+ int tos_ok __debugvar = jg_tos_ok(ntd);
+ KKASSERT(tos_ok);
+ }
#endif
KTR_LOG(ctxsw_sw, gd->gd_cpuid, ntd);
td->td_switch(ntd);
*
* THE CALLER OF LWKT_PREEMPT() MUST BE IN A CRITICAL SECTION. Typically
* this is called via lwkt_schedule() through the td_preemptable callback.
- * critpri is the managed critical priority that we should ignore in order
+ * critcount is the managed critical priority that we should ignore in order
* to determine whether preemption is possible (aka usually just the crit
* priority of lwkt_schedule() itself).
*
* can leave it synchronized on return).
*/
void
-lwkt_preempt(thread_t ntd, int critpri)
+lwkt_preempt(thread_t ntd, int critcount)
{
struct globaldata *gd = mycpu;
thread_t td;
/*
* The caller has put us in a critical section. We can only preempt
* if the caller of the caller was not in a critical section (basically
- * a local interrupt), as determined by the 'critpri' parameter. We
+ * a local interrupt), as determined by the 'critcount' parameter. We
* also can't preempt if the caller is holding any spinlocks (even if
* he isn't in a critical section). This also handles the tokens test.
*
*
* Set need_lwkt_resched() unconditionally for now YYY.
*/
- KASSERT(ntd->td_pri >= TDPRI_CRIT, ("BADCRIT0 %d", ntd->td_pri));
+ KASSERT(ntd->td_critcount, ("BADCRIT0 %d", ntd->td_pri));
td = gd->gd_curthread;
- if ((ntd->td_pri & TDPRI_MASK) <= (td->td_pri & TDPRI_MASK)) {
+ if (ntd->td_pri <= td->td_pri) {
++preempt_miss;
return;
}
- if ((td->td_pri & ~TDPRI_MASK) > critpri) {
+ if (td->td_critcount > critcount) {
++preempt_miss;
need_lwkt_resched();
return;
globaldata_t gd = mycpu;
thread_t td = gd->gd_curthread;
- if (gd->gd_reqflags && td->td_nest_count < 2)
+ if ((gd->gd_reqflags & RQF_IDLECHECK_MASK) && td->td_nest_count < 2)
splz();
}
/*
- * This implements a normal yield which will yield to equal priority
- * threads as well as higher priority threads. Note that gd_reqflags
- * tests will be handled by the crit_exit() call in lwkt_switch().
- *
- * (self contained on a per cpu basis)
+ * This function is used to negotiate a passive release of the current
+ * process/lwp designation with the user scheduler, allowing the user
+ * scheduler to schedule another user thread. The related kernel thread
+ * (curthread) continues running in the released state.
*/
void
-lwkt_yield(void)
+lwkt_passive_release(struct thread *td)
{
- lwkt_schedule_self(curthread);
- lwkt_switch();
+ struct lwp *lp = td->td_lwp;
+
+ td->td_release = NULL;
+ lwkt_setpri_self(TDPRI_KERN_USER);
+ lp->lwp_proc->p_usched->release_curproc(lp);
}
+
/*
- * This function is used along with the lwkt_passive_recover() inline
- * by the trap code to negotiate a passive release of the current
- * process/lwp designation with the user scheduler.
+ * This implements a normal yield. This routine is virtually a nop if
+ * there is nothing to yield to but it will always run any pending interrupts
+ * if called from a critical section.
+ *
+ * This yield is designed for kernel threads without a user context.
+ *
+ * (self contained on a per cpu basis)
*/
void
-lwkt_passive_release(struct thread *td)
+lwkt_yield(void)
{
- struct lwp *lp = td->td_lwp;
+ globaldata_t gd = mycpu;
+ thread_t td = gd->gd_curthread;
+ thread_t xtd;
- td->td_release = NULL;
- lwkt_setpri_self(TDPRI_KERN_USER);
- lp->lwp_proc->p_usched->release_curproc(lp);
+ if ((gd->gd_reqflags & RQF_IDLECHECK_MASK) && td->td_nest_count < 2)
+ splz();
+ if (td->td_fairq_accum < 0) {
+ lwkt_schedule_self(curthread);
+ lwkt_switch();
+ } else {
+ xtd = TAILQ_FIRST(&gd->gd_tdrunq);
+ if (xtd && xtd->td_pri > td->td_pri) {
+ lwkt_schedule_self(curthread);
+ lwkt_switch();
+ }
+ }
}
/*
- * Make a kernel thread act as if it were in user mode with regards
- * to scheduling, to avoid becoming cpu-bound in the kernel. Kernel
- * loops which may be potentially cpu-bound can call lwkt_user_yield().
+ * This yield is designed for kernel threads with a user context.
+ *
+ * The kernel acting on behalf of the user is potentially cpu-bound,
+ * this function will efficiently allow other threads to run and also
+ * switch to other processes by releasing.
*
* The lwkt_user_yield() function is designed to have very low overhead
* if no yield is determined to be needed.
void
lwkt_user_yield(void)
{
- thread_t td = curthread;
- struct lwp *lp = td->td_lwp;
+ globaldata_t gd = mycpu;
+ thread_t td = gd->gd_curthread;
+
+ /*
+ * Always run any pending interrupts in case we are in a critical
+ * section.
+ */
+ if ((gd->gd_reqflags & RQF_IDLECHECK_MASK) && td->td_nest_count < 2)
+ splz();
#ifdef SMP
/*
#endif
/*
- * Another kernel thread wants the cpu
+ * Switch (which forces a release) if another kernel thread needs
+ * the cpu, if userland wants us to resched, or if our kernel
+ * quantum has run out.
*/
- if (lwkt_resched_wanted())
+ if (lwkt_resched_wanted() ||
+ user_resched_wanted() ||
+ td->td_fairq_accum < 0)
+ {
lwkt_switch();
-
- /*
- * If the user scheduler has asynchronously determined that the current
- * process (when running in user mode) needs to lose the cpu then make
- * sure we are released.
- */
- if (user_resched_wanted()) {
- if (td->td_release)
- td->td_release(td);
}
+#if 0
/*
- * If we are released reduce our priority
+ * Reacquire the current process if we are released.
+ *
+ * XXX not implemented atm. The kernel may be holding locks and such,
+ * so we want the thread to continue to receive cpu.
*/
- if (td->td_release == NULL) {
- if (lwkt_check_resched(td) > 0)
- lwkt_switch();
- if (lp) {
- lp->lwp_proc->p_usched->acquire_curproc(lp);
- td->td_release = lwkt_passive_release;
- lwkt_setpri_self(TDPRI_USER_NORM);
- }
+ if (td->td_release == NULL && lp) {
+ lp->lwp_proc->p_usched->acquire_curproc(lp);
+ td->td_release = lwkt_passive_release;
+ lwkt_setpri_self(TDPRI_USER_NORM);
}
-}
-
-/*
- * Return 0 if no runnable threads are pending at the same or higher
- * priority as the passed thread.
- *
- * Return 1 if runnable threads are pending at the same priority.
- *
- * Return 2 if runnable threads are pending at a higher priority.
- */
-int
-lwkt_check_resched(thread_t td)
-{
- int pri = td->td_pri & TDPRI_MASK;
-
- if (td->td_gd->gd_runqmask > (2 << pri) - 1)
- return(2);
- if (TAILQ_NEXT(td, td_threadq))
- return(1);
- return(0);
+#endif
}
/*
*/
static __inline
void
-_lwkt_schedule_post(globaldata_t gd, thread_t ntd, int cpri, int reschedok)
+_lwkt_schedule_post(globaldata_t gd, thread_t ntd, int ccount, int reschedok)
{
thread_t otd;
if (ntd->td_flags & TDF_RUNQ) {
if (ntd->td_preemptable && reschedok) {
- ntd->td_preemptable(ntd, cpri); /* YYY +token */
+ ntd->td_preemptable(ntd, ccount); /* YYY +token */
} else if (reschedok) {
otd = curthread;
- if ((ntd->td_pri & TDPRI_MASK) > (otd->td_pri & TDPRI_MASK))
+ if (ntd->td_pri > otd->td_pri)
need_lwkt_resched();
}
+
+ /*
+ * Give the thread a little fair share scheduler bump if it
+ * has been asleep for a while. This is primarily to avoid
+ * a degenerate case for interrupt threads where accumulator
+ * crosses into negative territory unnecessarily.
+ */
+ if (ntd->td_fairq_lticks != ticks) {
+ ntd->td_fairq_lticks = ticks;
+ ntd->td_fairq_accum += gd->gd_fairq_total_pri;
+ if (ntd->td_fairq_accum > TDFAIRQ_MAX(gd))
+ ntd->td_fairq_accum = TDFAIRQ_MAX(gd);
+ }
}
}
#ifdef SMP
if (td->td_gd == mygd) {
_lwkt_enqueue(td);
- _lwkt_schedule_post(mygd, td, TDPRI_CRIT, reschedok);
+ _lwkt_schedule_post(mygd, td, 1, reschedok);
} else {
lwkt_send_ipiq3(td->td_gd, lwkt_schedule_remote, td, 0);
}
#else
_lwkt_enqueue(td);
- _lwkt_schedule_post(mygd, td, TDPRI_CRIT, reschedok);
+ _lwkt_schedule_post(mygd, td, 1, reschedok);
#endif
}
crit_exit_gd(mygd);
* Set the target thread's priority. This routine does not automatically
* switch to a higher priority thread, LWKT threads are not designed for
* continuous priority changes. Yield if you want to switch.
- *
- * We have to retain the critical section count which uses the high bits
- * of the td_pri field. The specified priority may also indicate zero or
- * more critical sections by adding TDPRI_CRIT*N.
- *
- * Note that we requeue the thread whether it winds up on a different runq
- * or not. uio_yield() depends on this and the routine is not normally
- * called with the same priority otherwise.
*/
void
lwkt_setpri(thread_t td, int pri)
{
- KKASSERT(pri >= 0);
KKASSERT(td->td_gd == mycpu);
- crit_enter();
- if (td->td_flags & TDF_RUNQ) {
- _lwkt_dequeue(td);
- td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
- _lwkt_enqueue(td);
- } else {
- td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
+ if (td->td_pri != pri) {
+ KKASSERT(pri >= 0);
+ crit_enter();
+ if (td->td_flags & TDF_RUNQ) {
+ _lwkt_dequeue(td);
+ td->td_pri = pri;
+ _lwkt_enqueue(td);
+ } else {
+ td->td_pri = pri;
+ }
+ crit_exit();
}
- crit_exit();
}
/*
{
KKASSERT(pri >= 0);
KKASSERT((td->td_flags & TDF_RUNQ) == 0);
- td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
+ td->td_pri = pri;
}
void
crit_enter();
if (td->td_flags & TDF_RUNQ) {
_lwkt_dequeue(td);
- td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
+ td->td_pri = pri;
_lwkt_enqueue(td);
} else {
- td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri;
+ td->td_pri = pri;
}
crit_exit();
}
/*
+ * 1/hz tick (typically 10ms) x TDFAIRQ_SCALE (typ 8) = 80ms full cycle.
+ *
+ * Example: two competing threads, same priority N. decrement by (2*N)
+ * increment by N*8, each thread will get 4 ticks.
+ */
+void
+lwkt_fairq_schedulerclock(thread_t td)
+{
+ if (fairq_enable) {
+ while (td) {
+ if (td != &td->td_gd->gd_idlethread) {
+ td->td_fairq_accum -= td->td_gd->gd_fairq_total_pri;
+ if (td->td_fairq_accum < -TDFAIRQ_MAX(td->td_gd))
+ td->td_fairq_accum = -TDFAIRQ_MAX(td->td_gd);
+ if (td->td_fairq_accum < 0)
+ need_lwkt_resched();
+ td->td_fairq_lticks = ticks;
+ }
+ td = td->td_preempted;
+ }
+ }
+}
+
+static void
+lwkt_fairq_accumulate(globaldata_t gd, thread_t td)
+{
+ td->td_fairq_accum += td->td_pri * TDFAIRQ_SCALE;
+ if (td->td_fairq_accum > TDFAIRQ_MAX(td->td_gd))
+ td->td_fairq_accum = TDFAIRQ_MAX(td->td_gd);
+}
+
+/*
* Migrate the current thread to the specified cpu.
*
* This is accomplished by descheduling ourselves from the current cpu,
*
* Since the tokref is already active the scheduler now
* takes care of acquisition, so we need only call
- * lwkt_yield().
+ * lwkt_switch().
*
* Since we failed this was not a recursive token so upon
* return tr_tok->t_ref should be assigned to this specific
*/
atomic_add_long(&ref->tr_tok->t_collisions, 1);
logtoken(fail, ref);
- lwkt_yield();
+ lwkt_switch();
logtoken(succ, ref);
KKASSERT(ref->tr_tok->t_ref == ref);
}
* the run queue. When we are reactivated we will have
* another chance.
*/
- if (lwkt_check_resched(lp->lwp_thread) > 1) {
- lwkt_switch();
- continue;
- }
+ lwkt_yield();
} while (dd->uschedcp != lp);
crit_exit();
* Package up an I/O request on a vnode into a uio and do it. The I/O
* request is split up into smaller chunks and we try to avoid saturating
* the buffer cache while potentially holding a vnode locked, so we
- * check bwillwrite() before calling vn_rdwr(). We also call uio_yield()
+ * check bwillwrite() before calling vn_rdwr(). We also call lwkt_user_yield()
* to give other processes a chance to lock the vnode (either other processes
* core'ing the same binary, or unrelated processes scanning the directory).
*
break;
offset += chunk;
base += chunk;
- uio_yield();
+ lwkt_user_yield();
} while (len);
if (aresid)
*aresid += len;
KASSERT(cap != NULL, ("%s: Driver disappeared.", __func__));
if (!cap->cc_qblocked) {
result = crypto_invoke(cap, crp, 0);
+ lwkt_yield();
if (result != ERESTART)
return (result);
/*
CRYPTO_Q_UNLOCK(tdinfo);
result = crypto_invoke(cap, submit, hint);
+ lwkt_yield();
CRYPTO_Q_LOCK(tdinfo);
if (result == ERESTART) {
pushl %eax ; \
testl $-1,TD_NEST_COUNT(%ebx) ; \
jne 1f ; \
- cmpl $TDPRI_CRIT,TD_PRI(%ebx) ; \
- jl 2f ; \
+ testl $-1,TD_CRITCOUNT(%ebx) ; \
+ je 2f ; \
1: ; \
/* in critical section, make interrupt pending */ \
/* set the pending bit and return, leave interrupt masked */ \
andl $~IRQ_LBIT(irq_num),PCPU(fpending) ; \
pushl $irq_num ; \
pushl %esp ; /* pass frame by reference */ \
- addl $TDPRI_CRIT,TD_PRI(%ebx) ; \
+ incl TD_CRITCOUNT(%ebx) ; \
call ithread_fast_handler ; /* returns 0 to unmask */ \
- subl $TDPRI_CRIT,TD_PRI(%ebx) ; \
+ decl TD_CRITCOUNT(%ebx) ; \
addl $8, %esp ; \
UNMASK_IRQ(irq_num) ; \
5: ; \
incl PCPU(cnt) + V_IPI
movl PCPU(curthread),%ebx
- cmpl $TDPRI_CRIT,TD_PRI(%ebx)
- jge 1f
+ testl $-1,TD_CRITCOUNT(%ebx)
+ jne 1f
subl $8,%esp /* make same as interrupt frame */
pushl %esp /* pass frame by reference */
incl PCPU(intr_nesting_level)
- addl $TDPRI_CRIT,TD_PRI(%ebx)
+ incl TD_CRITCOUNT(%ebx)
call lwkt_process_ipiq_frame
- subl $TDPRI_CRIT,TD_PRI(%ebx)
+ decl TD_CRITCOUNT(%ebx)
decl PCPU(intr_nesting_level)
addl $12,%esp
pushl $0 /* CPL for frame (REMOVED) */
incl PCPU(cnt) + V_TIMER
movl PCPU(curthread),%ebx
- cmpl $TDPRI_CRIT,TD_PRI(%ebx)
- jge 1f
+ testl $-1,TD_CRITCOUNT(%ebx)
+ jne 1f
testl $-1,TD_NEST_COUNT(%ebx)
jne 1f
subl $8,%esp /* make same as interrupt frame */
pushl %esp /* pass frame by reference */
incl PCPU(intr_nesting_level)
- addl $TDPRI_CRIT,TD_PRI(%ebx)
+ incl TD_CRITCOUNT(%ebx)
call lapic_timer_process_frame
- subl $TDPRI_CRIT,TD_PRI(%ebx)
+ decl TD_CRITCOUNT(%ebx)
decl PCPU(intr_nesting_level)
addl $12,%esp
pushl $0 /* CPL for frame (REMOVED) */
pushl %ecx ; \
movl GD_CURTHREAD(%eax),%edx ; /* EDX = CURTHREAD */ \
movl TD_SAVEFPU(%edx),%ebx ; /* save app save area */\
- addl $TDPRI_CRIT,TD_PRI(%edx) ; \
+ incl TD_CRITCOUNT(%edx) ; \
cmpl $0,GD_NPXTHREAD(%eax) ; \
je 100f ; \
fxsave 0(%ebx) ; /* race(1) */ \
orl $TDF_KERNELFP,TD_FLAGS(%edx) ; \
clts ; \
movl %edx,GD_NPXTHREAD(%eax) ; /* race(3) */ \
- subl $TDPRI_CRIT,TD_PRI(%edx) ; /* crit_exit() */ \
+ decl TD_CRITCOUNT(%edx) ; /* crit_exit() */ \
cmpl $0,GD_REQFLAGS(%eax) ; \
je 101f ; \
- cmpl $TDPRI_CRIT,TD_PRI(%edx) ; \
- jge 101f ; \
+ testl $-1,TD_CRITCOUNT(%edx) ; \
+ jne 101f ; \
call splz_check ; \
/* note: eax,ecx,edx destroyed */ \
101: ; \
*/
ENTRY(fork_trampoline)
movl PCPU(curthread),%eax
- subl $TDPRI_CRIT,TD_PRI(%eax)
+ decl TD_CRITCOUNT(%eax)
/*
* cpu_set_fork_handler intercepts this function call to
#ifdef SMP
ASSYM(TD_MPCOUNT, offsetof(struct thread, td_mpcount));
#endif
+ASSYM(TD_CRITCOUNT, offsetof(struct thread, td_critcount));
ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
ASSYM(TDF_RUNNING, TDF_RUNNING);
ASSYM(TDF_USINGFP, TDF_USINGFP);
ASSYM(TD_SAVEFPU, offsetof(struct thread, td_mach) + offsetof(struct md_thread, mtd_savefpu));
-ASSYM(TDPRI_CRIT, TDPRI_CRIT);
ASSYM(TDPRI_INT_SUPPORT, TDPRI_INT_SUPPORT);
#ifdef SMP
ASSYM(CPUMASK_LOCK, CPUMASK_LOCK);
*/
vu->vu_pending = 0;
upcall.upc_pending = morepending;
- crit_count += TDPRI_CRIT;
+ ++crit_count;
copyout(&upcall.upc_pending, &lp->lwp_upcall->upc_pending,
sizeof(upcall.upc_pending));
copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff,
crit_count = 0;
if (error == 0)
error = copyin((char *)upcall.upc_uthread + upcall.upc_critoff, &crit_count, sizeof(int));
- crit_count += TDPRI_CRIT;
+ ++crit_count;
if (error == 0)
error = copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff, sizeof(int));
regs->tf_eax = (register_t)vu->vu_func;
struct thread *td = curthread;
crit_exit();
- KKASSERT(td->td_pri < TDPRI_CRIT);
+ KKASSERT(td->td_critcount == 0);
for (;;) {
/*
* See if there are any LWKTs ready to go.
movl %ecx,%cr3
andl $~TDF_RUNNING,TD_FLAGS(%ebx)
orl $TDF_RUNNING,TD_FLAGS(%eax)
- subl $TDPRI_CRIT,TD_PRI(%eax)
+ decl TD_CRITCOUNT(%eax)
popl %eax /* kthread exit function */
pushl PCB_EBX(%edx) /* argument to ESI function */
pushl %eax /* set exit func as return address */
int have_mplock = 0;
#endif
#ifdef INVARIANTS
- int crit_count = td->td_pri & ~TDPRI_MASK;
+ int crit_count = td->td_critcount;
#endif
vm_offset_t eva;
if (p != NULL && lp != NULL)
KTR_LOG(kernentry_trap_ret, p->p_pid, lp->lwp_tid);
#ifdef INVARIANTS
- KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK),
+ KASSERT(crit_count == td->td_critcount,
("syscall: critical section count mismatch! %d/%d",
- crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT));
+ crit_count, td->td_pri));
#endif
}
kprintf("Idle\n");
}
kprintf("current thread = pri %d ", curthread->td_pri);
- if (curthread->td_pri >= TDPRI_CRIT)
+ if (curthread->td_critcount)
kprintf("(CRIT)");
kprintf("\n");
#ifdef SMP
int error;
int narg;
#ifdef INVARIANTS
- int crit_count = td->td_pri & ~TDPRI_MASK;
+ int crit_count = td->td_critcount;
#endif
#ifdef SMP
int have_mplock = 0;
#endif
KTR_LOG(kernentry_syscall_ret, p->p_pid, lp->lwp_tid, error);
#ifdef INVARIANTS
- KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK),
+ KASSERT(crit_count == td->td_critcount,
("syscall: critical section count mismatch! %d/%d",
- crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT));
+ crit_count, td->td_pri));
#endif
}
pushl $0 ; /* DUMMY CPL FOR DORETI */ \
testl $-1,TD_NEST_COUNT(%ebx) ; \
jne 1f ; \
- cmpl $TDPRI_CRIT,TD_PRI(%ebx) ; \
- jl 2f ; \
+ testl $-1,TD_CRITCOUNT(%ebx) ; \
+ je 2f ; \
1: ; \
/* set pending bit and return, leave interrupt masked */ \
orl $IRQ_LBIT(irq_num),PCPU(fpending) ; \
andl $~IRQ_LBIT(irq_num),PCPU(fpending) ; \
pushl $irq_num ; \
pushl %esp ; /* pass frame by reference */ \
- addl $TDPRI_CRIT,TD_PRI(%ebx) ; \
+ incl TD_CRITCOUNT(%ebx) ; \
call ithread_fast_handler ; /* returns 0 to unmask int */ \
- subl $TDPRI_CRIT,TD_PRI(%ebx) ; \
+ decl TD_CRITCOUNT(%ebx) ; \
addl $8,%esp ; \
UNMASK_IRQ(icu, irq_num) ; \
5: ; \
popl %eax /* cpl to restore XXX */
movl $0,%eax /* irq mask unavailable due to BGL */
movl PCPU(curthread),%ebx
- cli /* interlock with TDPRI_CRIT */
+ cli /* interlock with td_critcount */
cmpl $0,PCPU(reqflags) /* short cut if nothing to do */
je 5f
- cmpl $TDPRI_CRIT,TD_PRI(%ebx) /* can't unpend if in critical sec */
- jge 5f
- addl $TDPRI_CRIT,TD_PRI(%ebx) /* force all ints to pending */
+ testl $-1,TD_CRITCOUNT(%ebx) /* can't unpend if in critical sec */
+ jne 5f
+ incl TD_CRITCOUNT(%ebx) /* force all ints to pending */
doreti_next:
sti /* allow new interrupts */
movl %eax,%ecx /* irq mask unavailable due to BGL */
* BGL requirements. We can only clear RQF_INTPEND if *ALL* pending
* interrupts have been processed.
*/
- subl $TDPRI_CRIT,TD_PRI(%ebx) /* interlocked with cli */
+ decl TD_CRITCOUNT(%ebx) /* interlocked with cli */
testl %eax,%eax
jnz 5f
andl $~RQF_INTPEND,PCPU(reqflags)
pushl %eax
pushl %ecx
incl TD_NEST_COUNT(%ebx) /* prevent doreti/splz nesting */
- subl $TDPRI_CRIT,TD_PRI(%ebx) /* so we can preempt */
+ decl TD_CRITCOUNT(%ebx) /* so we can preempt */
call sched_ithd /* YYY must pull in imasks */
- addl $TDPRI_CRIT,TD_PRI(%ebx)
+ incl TD_CRITCOUNT(%ebx)
decl TD_NEST_COUNT(%ebx)
addl $4,%esp
popl %eax
movl %eax,%esi /* save cpl (can't use stack) */
movl $T_ASTFLT,TF_TRAPNO(%esp)
pushl %esp /* pass frame by reference */
- subl $TDPRI_CRIT,TD_PRI(%ebx)
+ decl TD_CRITCOUNT(%ebx)
call trap
- addl $TDPRI_CRIT,TD_PRI(%ebx)
+ incl TD_CRITCOUNT(%ebx)
addl $4,%esp
movl %esi,%eax /* restore cpl for loop */
jmp doreti_next
pushfl
pushl %ebx
movl PCPU(curthread),%ebx
- addl $TDPRI_CRIT,TD_PRI(%ebx)
+ incl TD_CRITCOUNT(%ebx)
movl $0,%eax
splz_next:
cmpl $0,%ecx
jnz splz_soft
- subl $TDPRI_CRIT,TD_PRI(%ebx)
+ decl TD_CRITCOUNT(%ebx)
/*
* Nothing left to do, finish up. Interrupts are still disabled.
sti
pushl %eax
pushl %ecx
- subl $TDPRI_CRIT,TD_PRI(%ebx)
+ decl TD_CRITCOUNT(%ebx)
incl TD_NEST_COUNT(%ebx) /* prevent doreti/splz nesting */
call sched_ithd /* YYY must pull in imasks */
- addl $TDPRI_CRIT,TD_PRI(%ebx)
+ incl TD_CRITCOUNT(%ebx)
decl TD_NEST_COUNT(%ebx) /* prevent doreti/splz nesting */
addl $4,%esp
popl %eax
movq PCPU(curthread),%rbx ; \
testl $-1,TD_NEST_COUNT(%rbx) ; \
jne 1f ; \
- cmpl $TDPRI_CRIT,TD_PRI(%rbx) ; \
- jl 2f ; \
+ testl $-1,TD_CRITCOUNT(%rbx) ; \
+ je 2f ; \
1: ; \
/* in critical section, make interrupt pending */ \
/* set the pending bit and return, leave interrupt masked */ \
andl $~IRQ_LBIT(irq_num),PCPU(fpending) ; \
pushq $irq_num ; /* trapframe -> intrframe */ \
movq %rsp, %rdi ; /* pass frame by reference */ \
- addl $TDPRI_CRIT,TD_PRI(%rbx) ; \
+ incl TD_CRITCOUNT(%rbx) ; \
call ithread_fast_handler ; /* returns 0 to unmask */ \
- subl $TDPRI_CRIT,TD_PRI(%rbx) ; \
+ decl TD_CRITCOUNT(%rbx) ; \
addq $8, %rsp ; /* intrframe -> trapframe */ \
UNMASK_IRQ(irq_num) ; \
5: ; \
incl PCPU(cnt) + V_IPI
movq PCPU(curthread),%rbx
- cmpl $TDPRI_CRIT,TD_PRI(%rbx)
- jge 1f
+ testl $-1,TD_CRITCOUNT(%rbx)
+ jne 1f
subq $8,%rsp /* make same as interrupt frame */
movq %rsp,%rdi /* pass frame by reference */
incl PCPU(intr_nesting_level)
- addl $TDPRI_CRIT,TD_PRI(%rbx)
+ incl TD_CRITCOUNT(%rbx)
call lwkt_process_ipiq_frame
- subl $TDPRI_CRIT,TD_PRI(%rbx)
+ decl TD_CRITCOUNT(%rbx)
decl PCPU(intr_nesting_level)
addq $8,%rsp /* turn into trapframe */
MEXITCOUNT
incl PCPU(cnt) + V_TIMER
movq PCPU(curthread),%rbx
- cmpl $TDPRI_CRIT,TD_PRI(%rbx)
- jge 1f
+ testl $-1,TD_CRITCOUNT(%rbx)
+ jne 1f
testl $-1,TD_NEST_COUNT(%rbx)
jne 1f
subq $8,%rsp /* make same as interrupt frame */
movq %rsp,%rdi /* pass frame by reference */
incl PCPU(intr_nesting_level)
- addl $TDPRI_CRIT,TD_PRI(%rbx)
+ incl TD_CRITCOUNT(%rbx)
call lapic_timer_process_frame
- subl $TDPRI_CRIT,TD_PRI(%rbx)
+ decl TD_CRITCOUNT(%rbx)
decl PCPU(intr_nesting_level)
addq $8,%rsp /* turn into trapframe */
MEXITCOUNT
movq PCPU(curthread),%rbx ; \
testl $-1,TD_NEST_COUNT(%rbx) ; \
jne 1f ; \
- cmpl $TDPRI_CRIT,TD_PRI(%rbx) ; \
- jl 2f ; \
+ testl $-1,TD_CRITCOUNT(%rbx) ; \
+ je 2f ; \
1: ; \
/* set pending bit and return, leave interrupt masked */ \
orl $IRQ_LBIT(irq_num),PCPU(fpending) ; \
andl $~IRQ_LBIT(irq_num),PCPU(fpending) ; \
pushq $irq_num ; \
movq %rsp,%rdi ; /* rdi = call argument */ \
- addl $TDPRI_CRIT,TD_PRI(%rbx) ; \
+ incl TD_CRITCOUNT(%rbx) ; \
call ithread_fast_handler ; /* returns 0 to unmask int */ \
- subl $TDPRI_CRIT,TD_PRI(%rbx) ; \
+ decl TD_CRITCOUNT(%rbx) ; \
addq $8,%rsp ; /* intr frame -> trap frame */ \
UNMASK_IRQ(icu, irq_num) ; \
5: ; \
*/
ENTRY(fork_trampoline)
movq PCPU(curthread),%rax
- subl $TDPRI_CRIT,TD_PRI(%rax)
+ decl TD_CRITCOUNT(%rax)
/*
* cpu_set_fork_handler intercepts this function call to
ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
ASSYM(TD_SP, offsetof(struct thread, td_sp));
ASSYM(TD_PRI, offsetof(struct thread, td_pri));
+ASSYM(TD_CRITCOUNT, offsetof(struct thread, td_critcount));
ASSYM(TD_MACH, offsetof(struct thread, td_mach));
ASSYM(TD_WCHAN, offsetof(struct thread, td_wchan));
ASSYM(TD_NEST_COUNT, offsetof(struct thread, td_nest_count));
ASSYM(MACHINTR_INTREN, offsetof(struct machintr_abi, intren));
-ASSYM(TDPRI_CRIT, TDPRI_CRIT);
ASSYM(TDPRI_INT_SUPPORT, TDPRI_INT_SUPPORT);
#ifdef SMP
ASSYM(CPUMASK_LOCK, CPUMASK_LOCK);
FAKE_MCOUNT(bintr) /* init "from" bintr -> doreti */
movq $0,%rax /* irq mask unavailable due to BGL */
movq PCPU(curthread),%rbx
- cli /* interlock with TDPRI_CRIT */
+ cli /* interlock with critical section */
cmpl $0,PCPU(reqflags) /* short cut if nothing to do */
je 5f
- cmpl $TDPRI_CRIT,TD_PRI(%rbx) /* can't unpend if in critical sec */
- jge 5f
- addl $TDPRI_CRIT,TD_PRI(%rbx) /* force all ints to pending */
+ testl $-1,TD_CRITCOUNT(%rbx) /* can't unpend if in critical sec */
+ jne 5f
+ incl TD_CRITCOUNT(%rbx) /* force all ints to pending */
doreti_next:
sti /* allow new interrupts */
movl %eax,%ecx /* irq mask unavailable due to BGL */
* BGL requirements. We can only clear RQF_INTPEND if *ALL* pending
* interrupts have been processed.
*/
- subl $TDPRI_CRIT,TD_PRI(%rbx) /* interlocked with cli */
+ decl TD_CRITCOUNT(%rbx) /* interlocked with cli */
testl %eax,%eax
jnz 5f
andl $~RQF_INTPEND,PCPU(reqflags)
pushq %rax
movl %ecx,%edi /* argument to C call */
incl TD_NEST_COUNT(%rbx) /* prevent doreti/splz nesting */
- subl $TDPRI_CRIT,TD_PRI(%rbx) /* so we can preempt */
+ decl TD_CRITCOUNT(%rbx) /* so we can preempt */
call sched_ithd /* YYY must pull in imasks */
- addl $TDPRI_CRIT,TD_PRI(%rbx)
+ incl TD_CRITCOUNT(%rbx)
decl TD_NEST_COUNT(%rbx)
popq %rax
jmp doreti_next
movl %eax,%r12d /* save cpl (can't use stack) */
movl $T_ASTFLT,TF_TRAPNO(%rsp)
movq %rsp,%rdi /* pass frame by ref (%edi = C arg) */
- subl $TDPRI_CRIT,TD_PRI(%rbx)
+ decl TD_CRITCOUNT(%rbx)
call trap
- addl $TDPRI_CRIT,TD_PRI(%rbx)
+ incl TD_CRITCOUNT(%rbx)
movl %r12d,%eax /* restore cpl for loop */
jmp doreti_next
pushfq
pushq %rbx
movq PCPU(curthread),%rbx
- addl $TDPRI_CRIT,TD_PRI(%rbx)
+ incl TD_CRITCOUNT(%rbx)
movl $0,%eax
splz_next:
cmpl $0,%ecx
jnz splz_soft
- subl $TDPRI_CRIT,TD_PRI(%rbx)
+ decl TD_CRITCOUNT(%rbx)
/*
* Nothing left to do, finish up. Interrupts are still disabled.
sti
pushq %rax
movl %ecx,%edi /* C argument */
- subl $TDPRI_CRIT,TD_PRI(%rbx)
+ decl TD_CRITCOUNT(%rbx)
incl TD_NEST_COUNT(%rbx) /* prevent doreti/splz nesting */
call sched_ithd /* YYY must pull in imasks */
- addl $TDPRI_CRIT,TD_PRI(%rbx)
+ incl TD_CRITCOUNT(%rbx)
decl TD_NEST_COUNT(%rbx) /* prevent doreti/splz nesting */
popq %rax
jmp splz_next
*/
vu->vu_pending = 0;
upcall.upc_pending = morepending;
- crit_count += TDPRI_CRIT;
+ ++crit_count;
copyout(&upcall.upc_pending, &lp->lwp_upcall->upc_pending,
sizeof(upcall.upc_pending));
copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff,
crit_count = 0;
if (error == 0)
error = copyin((char *)upcall.upc_uthread + upcall.upc_critoff, &crit_count, sizeof(int));
- crit_count += TDPRI_CRIT;
+ ++crit_count;
if (error == 0)
error = copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff, sizeof(int));
regs->tf_rax = (register_t)vu->vu_func;
struct thread *td = curthread;
crit_exit();
- KKASSERT(td->td_pri < TDPRI_CRIT);
+ KKASSERT(td->td_critcount == 0);
for (;;) {
/*
* See if there are any LWKTs ready to go.
/* rax and rbx come from the switchout code */
andl $~TDF_RUNNING,TD_FLAGS(%rbx)
orl $TDF_RUNNING,TD_FLAGS(%rax)
- subl $TDPRI_CRIT,TD_PRI(%rax)
+ decl TD_CRITCOUNT(%rax)
movq PCB_R12(%rdx),%rdi /* argument to RBX function */
movq PCB_RBX(%rdx),%rax /* thread function */
/* note: top of stack return address inherited by function */
int have_mplock = 0;
#endif
#ifdef INVARIANTS
- int crit_count = td->td_pri & ~TDPRI_MASK;
+ int crit_count = td->td_critcount;
#endif
vm_offset_t eva;
if (p != NULL && lp != NULL)
KTR_LOG(kernentry_trap_ret, p->p_pid, lp->lwp_tid);
#ifdef INVARIANTS
- KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK),
+ KASSERT(crit_count == td->td_critcount,
("syscall: critical section count mismatch! %d/%d",
- crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT));
+ crit_count, td->td_pri));
#endif
}
kprintf("Idle\n");
}
kprintf("current thread = pri %d ", curthread->td_pri);
- if (curthread->td_pri >= TDPRI_CRIT)
+ if (curthread->td_critcount)
kprintf("(CRIT)");
kprintf("\n");
int error;
int narg;
#ifdef INVARIANTS
- int crit_count = td->td_pri & ~TDPRI_MASK;
+ int crit_count = td->td_critcount;
#endif
#ifdef SMP
int have_mplock = 0;
#endif
KTR_LOG(kernentry_syscall_ret, p->p_pid, lp->lwp_tid, error);
#ifdef INVARIANTS
- KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK),
+ KASSERT(crit_count == td->td_critcount,
("syscall: critical section count mismatch! %d/%d",
- crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT));
+ crit_count, td->td_pri));
#endif
}
*/
vu->vu_pending = 0;
upcall.upc_pending = morepending;
- crit_count += TDPRI_CRIT;
+ ++crit_count;
copyout(&upcall.upc_pending, &lp->lwp_upcall->upc_pending,
sizeof(upcall.upc_pending));
copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff,
crit_count = 0;
if (error == 0)
error = copyin((char *)upcall.upc_uthread + upcall.upc_critoff, &crit_count, sizeof(int));
- crit_count += TDPRI_CRIT;
+ ++crit_count;
if (error == 0)
error = copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff, sizeof(int));
regs->tf_eax = (register_t)vu->vu_func;
struct mdglobaldata *gd = mdcpu;
crit_exit();
- KKASSERT(td->td_pri < TDPRI_CRIT);
+ KKASSERT(td->td_critcount == 0);
cpu_enable_intr();
for (;;) {
/*
struct timeval tv1, tv2;
gettimeofday(&tv1, NULL);
#endif
- umtx_sleep(&gd->mi.gd_runqmask, 0, 1000000);
+ umtx_sleep(&gd->mi.gd_reqflags, 0, 1000000);
#ifdef DEBUGIDLE
gettimeofday(&tv2, NULL);
if (tv2.tv_usec - tv1.tv_usec +
> 500000) {
kprintf("cpu %d idlelock %08x %08x\n",
gd->mi.gd_cpuid,
- gd->mi.gd_runqmask,
+ gd->mi.gd_reqflags,
gd->gd_fpending);
}
#endif
void
ipisig(int nada, siginfo_t *info, void *ctxp)
{
- if (curthread->td_pri < TDPRI_CRIT) {
- curthread->td_pri += TDPRI_CRIT;
+ if (curthread->td_critcount == 0) {
+ ++curthread->td_critcount;
++mycpu->gd_intr_nesting_level;
lwkt_process_ipiq();
--mycpu->gd_intr_nesting_level;
- curthread->td_pri -= TDPRI_CRIT;
+ --curthread->td_critcount;
} else {
need_ipiq();
}
sigaddset(&ss, SIGTERM);
sigaddset(&ss, SIGWINCH);
- curthread->td_pri += TDPRI_CRIT;
+ ++curthread->td_critcount;
++mycpu->gd_intr_nesting_level;
while (stopped_cpus & mycpu->gd_cpumask) {
sigsuspend(&ss);
}
--mycpu->gd_intr_nesting_level;
- curthread->td_pri -= TDPRI_CRIT;
+ --curthread->td_critcount;
}
#endif
*/
ENTRY(fork_trampoline)
movl PCPU(curthread),%eax
- subl $TDPRI_CRIT,TD_PRI(%eax)
+ decl TD_CRITCOUNT(%eax)
/*
* cpu_set_fork_handler intercepts this function call to
ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
ASSYM(TD_SP, offsetof(struct thread, td_sp));
ASSYM(TD_PRI, offsetof(struct thread, td_pri));
+ASSYM(TD_CRITCOUNT, offsetof(struct thread, td_critcount));
ASSYM(TD_MACH, offsetof(struct thread, td_mach));
ASSYM(TD_WCHAN, offsetof(struct thread, td_wchan));
ASSYM(TD_NEST_COUNT, offsetof(struct thread, td_nest_count));
ASSYM(TD_SAVEFPU, offsetof(struct thread, td_mach) + offsetof(struct md_thread, mtd_savefpu));
-ASSYM(TDPRI_CRIT, TDPRI_CRIT);
ASSYM(TDPRI_INT_SUPPORT, TDPRI_INT_SUPPORT);
#ifdef SMP
ASSYM(CPUMASK_LOCK, CPUMASK_LOCK);
movl $0,%ebp
andl $~TDF_RUNNING,TD_FLAGS(%ebx)
orl $TDF_RUNNING,TD_FLAGS(%eax)
- subl $TDPRI_CRIT,TD_PRI(%eax)
+ decl TD_CRITCOUNT(%eax)
popl %eax /* kthread exit function */
pushl PCB_EBX(%edx) /* argument to ESI function */
pushl %eax /* set exit func as return address */
int have_mplock = 0;
#endif
#ifdef INVARIANTS
- int crit_count = td->td_pri & ~TDPRI_MASK;
+ int crit_count = td->td_critcount;
#endif
vm_offset_t eva;
#endif
KTR_LOG(kernentry_trap_ret, lp->lwp_proc->p_pid, lp->lwp_tid);
#ifdef INVARIANTS
- KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK),
+ KASSERT(crit_count == td->td_critcount,
("syscall: critical section count mismatch! %d/%d",
- crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT));
+ crit_count, td->td_pri));
#endif
}
int have_mplock = 0;
#endif
#ifdef INVARIANTS
- int crit_count = td->td_pri & ~TDPRI_MASK;
+ int crit_count = td->td_critcount;
#endif
vm_offset_t eva;
rel_mplock();
#endif
#ifdef INVARIANTS
- KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK),
+ KASSERT(crit_count == td->td_critcount,
("syscall: critical section count mismatch! %d/%d",
- crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT));
+ crit_count, td->td_pri));
#endif
}
kprintf("Idle\n");
}
kprintf("current thread = pri %d ", curthread->td_pri);
- if (curthread->td_pri >= TDPRI_CRIT)
+ if (curthread->td_critcount)
kprintf("(CRIT)");
kprintf("\n");
#ifdef SMP
int error;
int narg;
#ifdef INVARIANTS
- int crit_count = td->td_pri & ~TDPRI_MASK;
+ int crit_count = td->td_critcount;
#endif
#ifdef SMP
int have_mplock = 0;
#endif
KTR_LOG(kernentry_syscall_ret, lp->lwp_proc->p_pid, lp->lwp_tid, error);
#ifdef INVARIANTS
- KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK),
+ KASSERT(crit_count == td->td_critcount,
("syscall: critical section count mismatch! %d/%d",
- crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT));
+ crit_count, td->td_pri));
#endif
}
struct mdglobaldata *gd = mdcpu;
thread_t td = gd->mi.gd_curthread;
- if (td->td_pri >= TDPRI_CRIT || td->td_nest_count) {
+ if (td->td_critcount || td->td_nest_count) {
atomic_set_int_nonlocked(&gd->gd_fpending, 1 << intr);
atomic_set_int_nonlocked(&gd->mi.gd_reqflags, RQF_INTPEND);
} else {
struct mdglobaldata *gd = mdcpu;
thread_t td = gd->mi.gd_curthread;
- if (td->td_pri >= TDPRI_CRIT || td->td_nest_count) {
+ if (td->td_critcount || td->td_nest_count) {
atomic_set_int_nonlocked(&gd->gd_fpending, 1 << intr);
atomic_set_int_nonlocked(&gd->mi.gd_reqflags, RQF_INTPEND);
} else {
*/
vu->vu_pending = 0;
upcall.upc_pending = morepending;
- crit_count += TDPRI_CRIT;
+ ++crit_count;
copyout(&upcall.upc_pending, &lp->lwp_upcall->upc_pending,
sizeof(upcall.upc_pending));
copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff,
crit_count = 0;
if (error == 0)
error = copyin((char *)upcall.upc_uthread + upcall.upc_critoff, &crit_count, sizeof(int));
- crit_count += TDPRI_CRIT;
+ ++crit_count;
if (error == 0)
error = copyout(&crit_count, (char *)upcall.upc_uthread + upcall.upc_critoff, sizeof(int));
regs->tf_rax = (register_t)vu->vu_func;
struct mdglobaldata *gd = mdcpu;
crit_exit();
- KKASSERT(td->td_pri < TDPRI_CRIT);
+ KKASSERT(td->td_critcount == 0);
cpu_enable_intr();
for (;;) {
/*
struct timeval tv1, tv2;
gettimeofday(&tv1, NULL);
#endif
- umtx_sleep(&gd->mi.gd_runqmask, 0, 1000000);
+ umtx_sleep(&gd->mi.gd_reqflags, 0, 1000000);
#ifdef DEBUGIDLE
gettimeofday(&tv2, NULL);
if (tv2.tv_usec - tv1.tv_usec +
> 500000) {
kprintf("cpu %d idlelock %08x %08x\n",
gd->mi.gd_cpuid,
- gd->mi.gd_runqmask,
+ gd->mi.gd_reqflags,
gd->gd_fpending);
}
#endif
void
ipisig(int nada, siginfo_t *info, void *ctxp)
{
- if (curthread->td_pri < TDPRI_CRIT) {
- curthread->td_pri += TDPRI_CRIT;
- ++mycpu->gd_intr_nesting_level;
+ globaldata_t gd = mycpu;
+ thread_t td = gd->gd_curthread;
+
+ if (td->td_critcount == 0) {
+ ++td->td_critcount;
+ ++gd->gd_intr_nesting_level;
lwkt_process_ipiq();
- --mycpu->gd_intr_nesting_level;
- curthread->td_pri -= TDPRI_CRIT;
+ --gd->gd_intr_nesting_level;
+ --td->td_critcount;
} else {
need_ipiq();
}
void
stopsig(int nada, siginfo_t *info, void *ctxp)
{
+ globaldata_t gd = mycpu;
+ thread_t td = gd->gd_curthread;
sigset_t ss;
sigemptyset(&ss);
sigaddset(&ss, SIGTERM);
sigaddset(&ss, SIGWINCH);
- curthread->td_pri += TDPRI_CRIT;
- ++mycpu->gd_intr_nesting_level;
- while (stopped_cpus & mycpu->gd_cpumask) {
+ ++td->td_critcount;
+ ++gd->gd_intr_nesting_level;
+ while (stopped_cpus & gd->gd_cpumask) {
sigsuspend(&ss);
}
- --mycpu->gd_intr_nesting_level;
- curthread->td_pri -= TDPRI_CRIT;
+ --gd->gd_intr_nesting_level;
+ --td->td_critcount;
}
#endif
*/
ENTRY(fork_trampoline)
movq PCPU(curthread),%rax
- subl $TDPRI_CRIT,TD_PRI(%rax)
+ decl TD_CRITCOUNT(%rax)
/*
* cpu_set_fork_handler intercepts this function call to
ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
ASSYM(TD_SP, offsetof(struct thread, td_sp));
ASSYM(TD_PRI, offsetof(struct thread, td_pri));
+ASSYM(TD_CRITCOUNT, offsetof(struct thread, td_critcount));
#ifdef SMP
ASSYM(TD_MPCOUNT, offsetof(struct thread, td_mpcount));
#endif
ASSYM(TD_SAVEFPU, offsetof(struct thread, td_savefpu));
ASSYM(TDF_RUNNING, TDF_RUNNING);
ASSYM(GD_NPXTHREAD, offsetof(struct mdglobaldata, gd_npxthread));
-
-ASSYM(TDPRI_CRIT, TDPRI_CRIT);
/* rax and rbx come from the switchout code */
andl $~TDF_RUNNING,TD_FLAGS(%rbx)
orl $TDF_RUNNING,TD_FLAGS(%rax)
- subl $TDPRI_CRIT,TD_PRI(%rax)
+ decl TD_CRITCOUNT(%rax)
movq PCB_R12(%rdx),%rdi /* argument to RBX function */
movq PCB_RBX(%rdx),%rax /* thread function */
/* note: top of stack return address inherited by function */
int have_mplock = 0;
#endif
#ifdef INVARIANTS
- int crit_count = td->td_pri & ~TDPRI_MASK;
+ int crit_count = td->td_critcount;
#endif
vm_offset_t eva;
#endif
KTR_LOG(kernentry_trap_ret, lp->lwp_proc->p_pid, lp->lwp_tid);
#ifdef INVARIANTS
- KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK),
+ KASSERT(crit_count == td->td_critcount,
("syscall: critical section count mismatch! %d/%d",
- crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT));
+ crit_count, td->td_pri));
#endif
}
int have_mplock = 0;
#endif
#ifdef INVARIANTS
- int crit_count = td->td_pri & ~TDPRI_MASK;
+ int crit_count = td->td_critcount;
#endif
vm_offset_t eva;
rel_mplock();
#endif
#ifdef INVARIANTS
- KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK),
+ KASSERT(crit_count == td->td_critcount,
("syscall: critical section count mismatch! %d/%d",
- crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT));
+ crit_count, td->td_pri));
#endif
}
kprintf("Idle\n");
}
kprintf("current thread = pri %d ", curthread->td_pri);
- if (curthread->td_pri >= TDPRI_CRIT)
+ if (curthread->td_critcount)
kprintf("(CRIT)");
kprintf("\n");
#ifdef SMP
int error;
int narg;
#ifdef INVARIANTS
- int crit_count = td->td_pri & ~TDPRI_MASK;
+ int crit_count = td->td_critcount;
#endif
#ifdef SMP
int have_mplock = 0;
#endif
KTR_LOG(kernentry_syscall_ret, lp->lwp_proc->p_pid, lp->lwp_tid, error);
#ifdef INVARIANTS
- KASSERT(crit_count == (td->td_pri & ~TDPRI_MASK),
+ KASSERT(crit_count == td->td_critcount,
("syscall: critical section count mismatch! %d/%d",
- crit_count / TDPRI_CRIT, td->td_pri / TDPRI_CRIT));
+ crit_count, td->td_pri));
#endif
}
__uint32_t gd_reqflags; /* (see note above) */
void *gd_unused00B;
lwkt_queue gd_tdallq; /* all threads */
- lwkt_queue gd_unused00C;
- lwkt_queue gd_tdrunq[32]; /* runnable threads */
- __uint32_t gd_runqmask; /* which queues? */
+ lwkt_queue gd_tdrunq; /* runnable threads */
__uint32_t gd_cpuid;
cpumask_t gd_cpumask; /* mask = 1<<cpuid */
cpumask_t gd_other_cpus; /* mask of 'other' cpus */
struct vmmeter gd_cnt;
struct lwkt_ipiq *gd_ipiq; /* array[ncpu] of ipiq's */
struct lwkt_ipiq gd_cpusyncq; /* ipiq for cpu synchro */
- short gd_unused01;
- short gd_unused02;
+ int gd_fairq_total_pri;
struct thread gd_unused02B;
struct thread gd_idlethread;
SLGlobalData gd_slab; /* slab allocator */
struct vm_map_entry *gd_vme_base; /* vm_map_entry reservation */
struct systimerq gd_systimerq; /* per-cpu system timers */
int gd_syst_nest;
- sysclock_t gd_unused03;
struct systimer gd_hardclock; /* scheduler periodic */
struct systimer gd_statclock; /* statistics periodic */
struct systimer gd_schedclock; /* scheduler periodic */
#define RQB_AST_LWKT_RESCHED 5
#define RQB_AST_UPCALL 6
#define RQB_TIMER 7
+#define RQB_RUNNING 8
#define RQF_IPIQ (1 << RQB_IPIQ)
#define RQF_INTPEND (1 << RQB_INTPEND)
#define RQF_AST_USER_RESCHED (1 << RQB_AST_USER_RESCHED)
#define RQF_AST_LWKT_RESCHED (1 << RQB_AST_LWKT_RESCHED)
#define RQF_AST_UPCALL (1 << RQB_AST_UPCALL)
+#define RQF_RUNNING (1 << RQB_RUNNING)
#define RQF_AST_MASK (RQF_AST_OWEUPC|RQF_AST_SIGNAL|\
RQF_AST_USER_RESCHED|RQF_AST_LWKT_RESCHED|\
RQF_AST_UPCALL)
* must be done through cpu_*msg() functions. e.g. you could request
* ownership of a thread that way, or hand a thread off to another cpu.
*
- * NOTE: td_pri is bumped by TDPRI_CRIT when entering a critical section,
- * but this does not effect how the thread is scheduled by LWKT.
- *
* NOTE: td_ucred is synchronized from the p_ucred on user->kernel syscall,
* trap, and AST/signal transitions to provide a stable ucred for
* (primarily) system calls. This field will be NULL for pure kernel
const char *td_wmesg; /* string name for blockage */
const volatile void *td_wchan; /* waiting on channel */
int td_pri; /* 0-31, 31=highest priority (note 1) */
+ int td_critcount; /* critical section priority */
int td_flags; /* TDF flags */
int td_wdomain; /* domain for wchan address (typ 0) */
- void (*td_preemptable)(struct thread *td, int critpri);
+ void (*td_preemptable)(struct thread *td, int critcount);
void (*td_release)(struct thread *td);
char *td_kstack; /* kernel stack */
int td_kstack_size; /* size of kernel stack */
__uint64_t td_sticks; /* Statclock hits in system mode (uS) */
__uint64_t td_iticks; /* Statclock hits processing intr (uS) */
int td_locks; /* lockmgr lock debugging */
- int td_unused01;
+ int td_fairq_lticks; /* fairq wakeup accumulator reset */
void *td_dsched_priv1; /* priv data for I/O schedulers */
int td_refs; /* hold position in gd_tdallq / hold free */
int td_nest_count; /* prevent splz nesting */
int td_crit_debug_index;
int td_in_crit_report;
#endif
+ int td_fairq_accum; /* fairq priority accumulator */
struct md_thread td_mach;
};
#define TDF_KERNELFP 0x01000000 /* kernel using fp coproc */
#define TDF_NETWORK 0x02000000 /* network proto thread */
#define TDF_CRYPTO 0x04000000 /* crypto thread */
+#define TDF_MARKER 0x80000000 /* fairq marker thread */
/*
* Thread priorities. Typically only one thread from any given
#define TDPRI_INT_HIGH 29 /* high priority interrupt */
#define TDPRI_MAX 31
-#define TDPRI_MASK 31
-#define TDPRI_CRIT 32 /* high bits of td_pri used for crit */
+/*
+ * Scale is the approximate number of ticks for which we desire the
+ * entire gd_tdrunq to get service. With hz = 100 a scale of 8 is 80ms.
+ *
+ * Setting this value too small will result in inefficient switching
+ * rates.
+ */
+#define TDFAIRQ_SCALE 8
+#define TDFAIRQ_MAX(gd) ((gd)->gd_fairq_total_pri * TDFAIRQ_SCALE)
#define LWKT_THREAD_STACK (UPAGES * PAGE_SIZE)
#define CACHE_NTHREADS 6
-#define IN_CRITICAL_SECT(td) ((td)->td_pri >= TDPRI_CRIT)
+#define IN_CRITICAL_SECT(td) ((td)->td_critcount)
#ifdef _KERNEL
extern void lwkt_setpri(thread_t, int);
extern void lwkt_setpri_initial(thread_t, int);
extern void lwkt_setpri_self(int);
-extern int lwkt_check_resched(thread_t);
+extern void lwkt_fairq_schedulerclock(thread_t td);
+extern void lwkt_fairq_setpri_self(int pri);
+extern int lwkt_fairq_push(int pri);
+extern void lwkt_fairq_pop(int pri);
+extern void lwkt_fairq_yield(void);
extern void lwkt_setcpu_self(struct globaldata *);
extern void lwkt_migratecpu(int);
struct thread *td = curthread;
#ifdef INVARIANTS
- if (td->td_pri < 0)
+ if (td->td_critcount < 0)
crit_panic();
#endif
- td->td_pri += TDPRI_CRIT;
+ ++td->td_critcount;
__DEBUG_CRIT_ENTER(td);
cpu_ccfence();
}
static __inline void
_crit_enter_quick(struct thread *curtd __DEBUG_CRIT_ADD_ARG__)
{
- curtd->td_pri += TDPRI_CRIT;
+ ++curtd->td_critcount;
__DEBUG_CRIT_ENTER(curtd);
cpu_ccfence();
}
_crit_exit_noyield(struct thread *curtd __DEBUG_CRIT_ADD_ARG__)
{
__DEBUG_CRIT_EXIT(curtd);
- curtd->td_pri -= TDPRI_CRIT;
+ --curtd->td_critcount;
#ifdef INVARIANTS
if (curtd->td_pri < 0)
crit_panic();
thread_t td = curthread;
__DEBUG_CRIT_EXIT(td);
- td->td_pri -= TDPRI_CRIT;
+ --td->td_critcount;
#ifdef INVARIANTS
if (td->td_pri < 0)
crit_panic();
#endif
cpu_ccfence(); /* prevent compiler reordering */
- if (td->td_gd->gd_reqflags && td->td_pri < TDPRI_CRIT)
+ if (td->td_gd->gd_reqflags && td->td_critcount == 0)
splz_check();
}
globaldata_t gd = curtd->td_gd;
__DEBUG_CRIT_EXIT(curtd);
- curtd->td_pri -= TDPRI_CRIT;
+ --curtd->td_critcount;
cpu_ccfence(); /* prevent compiler reordering */
- if (gd->gd_reqflags && curtd->td_pri < TDPRI_CRIT)
+ if (gd->gd_reqflags && curtd->td_critcount == 0)
splz_check();
}
static __inline int
crit_test(thread_t td)
{
- return(td->td_pri >= TDPRI_CRIT);
+ return(td->td_critcount);
}
/*
static __inline int
lwkt_runnable(void)
{
- return (mycpu->gd_runqmask != 0);
+ return (TAILQ_FIRST(&mycpu->gd_tdrunq) != NULL);
}
static __inline int
lwkt_getpri(thread_t td)
{
- return(td->td_pri & TDPRI_MASK);
+ return(td->td_pri);
}
static __inline int
struct vm_object;
struct vm_page;
-void uio_yield (void);
int uiomove (caddr_t, size_t, struct uio *);
int uiomovez (size_t, struct uio *);
int uiomove_frombuf (void *buf, size_t buflen, struct uio *uio);
#define UPC_CONTROL_POLLANDCLEAR 5
#define UPC_CONTROL_WAIT 6
-#define UPC_CRITADD 32 /* NOTE! same as TDPRI_CRIT */
#define UPC_RESERVED 32 /* # of reserved id's */
#if defined(_KERNEL)
if (++hmp->check_yield > hammer_yield_check) {
hmp->check_yield = 0;
- lwkt_user_yield();
+ lwkt_yield();
}
/*
#include <sys/sysctl.h>
static int ffs_rawread_readahead(struct vnode *vp, caddr_t udata, off_t offset,
- size_t len, struct buf *bp, int *baseticks);
+ size_t len, struct buf *bp);
static int ffs_rawread_main(struct vnode *vp,
struct uio *uio);
static int
ffs_rawread_readahead(struct vnode *vp, caddr_t udata, off_t loffset,
- size_t len, struct buf *bp, int *baseticks)
+ size_t len, struct buf *bp)
{
int error;
int iolen;
if (vmapbuf(bp, udata, len) < 0)
return EFAULT;
- if (ticks - *baseticks >= hogticks) {
- *baseticks = ticks;
- uio_yield();
- }
+ lwkt_user_yield();
bzero(bp->b_data, bp->b_bcount);
/* Mark operation completed (similar to bufdone()) */
int error, nerror;
struct buf *bp, *nbp, *tbp;
int iolen;
- int baseticks = ticks;
caddr_t udata;
int resid;
off_t offset;
if (bp == NULL) { /* Setup first read */
/* XXX: Leave some bufs for swap */
bp = getpbuf_kva(&ffsrawbufcnt);
- error = ffs_rawread_readahead(vp, udata, offset, resid,
- bp, &baseticks);
+ error = ffs_rawread_readahead(vp, udata, offset,
+ resid, bp);
if (error != 0)
break;
udata + bp->b_bufsize,
offset + bp->b_bufsize,
resid - bp->b_bufsize,
- nbp, &baseticks);
+ nbp);
if (nerror) {
relpbuf(nbp, &ffsrawbufcnt);
nbp = NULL;
/* Incomplete read. Try to read remaining part */
error = ffs_rawread_readahead(
vp, udata, offset,
- bp->b_bufsize - iolen, bp, &baseticks);
+ bp->b_bufsize - iolen, bp);
if (error != 0)
break;
} else if (nbp != NULL) { /* Complete read with readahead */
vp, udata + bp->b_bufsize,
offset + bp->b_bufsize,
resid - bp->b_bufsize,
- nbp, &baseticks);
+ nbp);
if (nerror != 0) {
relpbuf(nbp, &ffsrawbufcnt);
nbp = NULL;
break;
} else if (resid > 0) { /* More to read, no readahead */
error = ffs_rawread_readahead(vp, udata, offset,
- resid, bp,
- &baseticks);
+ resid, bp);
if (error != 0)
break;
}
* resched has been requested.
*/
while (i < PAGE_SIZE) {
- if (lwkt_check_resched(curthread))
- break;
+ lwkt_yield();
if (idlezero_nocache == 1)
bzeront(&pg[i], IDLEZERO_RUN);
else
++idlezero_count;
break;
}
- if (lwkt_check_resched(curthread))
- lwkt_switch();
+ lwkt_yield();
}
}