From 8cee56f46a3ba3b169e5c0b4716c457bc9d03164 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 13 Sep 2013 19:02:55 -0700 Subject: [PATCH] kernel - Misc adjustments used by the vkernel and VMM, misc optimizations * This section committed separately because it is basically independent of VMM. * Improve pfind(). Don't get proc_token if the process being looked up is the current process. * Improve kern_kill(). Do not obtain proc_token any more. p->p_token is sufficient and the process group has its own lock now. * Call pthread_yield() when spinning on various things. x Spinlocks x Tokens (spinning in lwkt_switch) x cpusync (ipiq) * Rewrite sched_yield() -> dfly_yield(). dfly_yield() will unconditionally round-robin the LWP, ignoring estcpu. It isn't perfect but it works fairly well. The dfly scheduler will also no longer attempt to migrate threads across cpus when handling yields. They migrate normally in all other circumstances. This fixes situations where the vkernel is spinning waiting for multiple events from other cpus and in particular when it is doing a global IPI for pmap synchronization of the kernel_pmap. --- sys/kern/kern_proc.c | 14 +++++++- sys/kern/kern_sig.c | 18 +++------- sys/kern/kern_spinlock.c | 10 ++++++ sys/kern/lwkt_ipiq.c | 13 +++++++ sys/kern/lwkt_thread.c | 13 +++++++ sys/kern/usched_dfly.c | 78 +++++++++++++++++++++++++++++----------- sys/sys/thread.h | 1 + 7 files changed, 111 insertions(+), 36 deletions(-) diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 49059e1b07..3e8b6ab0e0 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -348,8 +348,19 @@ inferior(struct proc *p) struct proc * pfind(pid_t pid) { - struct proc *p; + struct proc *p = curproc; + /* + * Shortcut the current process + */ + if (p && p->p_pid == pid) { + PHOLD(p); + return (p); + } + + /* + * Otherwise find it in the hash table. + */ lwkt_gettoken(&proc_token); LIST_FOREACH(p, PIDHASH(pid), p_hash) { if (p->p_pid == pid) { @@ -359,6 +370,7 @@ pfind(pid_t pid) } } lwkt_reltoken(&proc_token); + return (NULL); } diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 3538c1b724..d4529cfb20 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -739,8 +739,6 @@ kern_kill(int sig, pid_t pid, lwpid_t tid) if ((u_int)sig > _SIG_MAXSIG) return (EINVAL); - lwkt_gettoken(&proc_token); - if (pid > 0) { struct proc *p; struct lwp *lp = NULL; @@ -752,11 +750,8 @@ kern_kill(int sig, pid_t pid, lwpid_t tid) * don't actually try to deliver the signal. */ if ((p = pfind(pid)) == NULL) { - if ((p = zpfind(pid)) == NULL) { - lwkt_reltoken(&proc_token); + if ((p = zpfind(pid)) == NULL) return (ESRCH); - } - lwkt_reltoken(&proc_token); PRELE(p); return (0); } @@ -764,7 +759,6 @@ kern_kill(int sig, pid_t pid, lwpid_t tid) if (!CANSIGNAL(p, sig)) { lwkt_reltoken(&p->p_token); PRELE(p); - lwkt_reltoken(&proc_token); return (EPERM); } @@ -776,7 +770,6 @@ kern_kill(int sig, pid_t pid, lwpid_t tid) if (p->p_flags & P_WEXIT) { lwkt_reltoken(&p->p_token); PRELE(p); - lwkt_reltoken(&proc_token); return (0); } if (tid != -1) { @@ -784,7 +777,6 @@ kern_kill(int sig, pid_t pid, lwpid_t tid) if (lp == NULL) { lwkt_reltoken(&p->p_token); PRELE(p); - lwkt_reltoken(&proc_token); return (ESRCH); } } @@ -792,7 +784,7 @@ kern_kill(int sig, pid_t pid, lwpid_t tid) lwpsignal(p, lp, sig); lwkt_reltoken(&p->p_token); PRELE(p); - lwkt_reltoken(&proc_token); + return (0); } @@ -800,10 +792,9 @@ kern_kill(int sig, pid_t pid, lwpid_t tid) * If we come here, pid is a special broadcast pid. * This doesn't mix with a tid. */ - if (tid != -1) { - lwkt_reltoken(&proc_token); + if (tid != -1) return (EINVAL); - } + switch (pid) { case -1: /* broadcast signal */ t = (dokillpg(sig, 0, 1)); @@ -815,7 +806,6 @@ kern_kill(int sig, pid_t pid, lwpid_t tid) t = (dokillpg(sig, -pid, 0)); break; } - lwkt_reltoken(&proc_token); return t; } diff --git a/sys/kern/kern_spinlock.c b/sys/kern/kern_spinlock.c index 5c523d1d57..d739119344 100644 --- a/sys/kern/kern_spinlock.c +++ b/sys/kern/kern_spinlock.c @@ -66,6 +66,10 @@ #include #include +#ifdef _KERNEL_VIRTUAL +#include +#endif + struct spinlock pmap_spin = SPINLOCK_INITIALIZER(pmap_spin); struct indefinite_info { @@ -247,6 +251,9 @@ spin_lock_contested(struct spinlock *spin) if (spin_indefinite_check(spin, &info)) break; } +#ifdef _KERNEL_VIRTUAL + pthread_yield(); +#endif } /*logspin(end, spin, 'w');*/ } @@ -309,6 +316,9 @@ spin_lock_shared_contested(struct spinlock *spin) if (spin_indefinite_check(spin, &info)) break; } +#ifdef _KERNEL_VIRTUAL + pthread_yield(); +#endif } /*logspin(end, spin, 'w');*/ } diff --git a/sys/kern/lwkt_ipiq.c b/sys/kern/lwkt_ipiq.c index 58d4522806..94f9dacbeb 100644 --- a/sys/kern/lwkt_ipiq.c +++ b/sys/kern/lwkt_ipiq.c @@ -66,6 +66,10 @@ #include #include +#ifdef _KERNEL_VIRTUAL +#include +#endif + struct ipiq_stats { __int64_t ipiq_count; /* total calls to lwkt_send_ipiq*() */ __int64_t ipiq_fifofull; /* number of fifo full conditions detected */ @@ -821,6 +825,9 @@ lwkt_cpusync_interlock(lwkt_cpusync_t cs) while (cs->cs_mack != mask) { lwkt_process_ipiq(); cpu_pause(); +#ifdef _KERNEL_VIRTUAL + pthread_yield(); +#endif } #if 0 if (gd->gd_curthread->td_wmesg == smsg) @@ -869,6 +876,9 @@ lwkt_cpusync_deinterlock(lwkt_cpusync_t cs) while (cs->cs_mack != mask) { lwkt_process_ipiq(); cpu_pause(); +#ifdef _KERNEL_VIRTUAL + pthread_yield(); +#endif } #if 0 if (gd->gd_curthread->td_wmesg == smsg) @@ -923,6 +933,9 @@ lwkt_cpusync_remote2(lwkt_cpusync_t cs) lwkt_ipiq_t ip; int wi; +#ifdef _KERNEL_VIRTUAL + pthread_yield(); +#endif ip = &gd->gd_cpusyncq; wi = ip->ip_windex & MAXCPUFIFO_MASK; ip->ip_info[wi].func = (ipifunc3_t)(ipifunc1_t)lwkt_cpusync_remote2; diff --git a/sys/kern/lwkt_thread.c b/sys/kern/lwkt_thread.c index d98813faed..7ae40a134e 100644 --- a/sys/kern/lwkt_thread.c +++ b/sys/kern/lwkt_thread.c @@ -71,6 +71,10 @@ #include #include +#ifdef _KERNEL_VIRTUAL +#include +#endif + #if !defined(KTR_CTXSW) #define KTR_CTXSW KTR_ALL #endif @@ -150,8 +154,10 @@ static int lwkt_cache_threads = 0; SYSCTL_INT(_lwkt, OID_AUTO, cache_threads, CTLFLAG_RD, &lwkt_cache_threads, 0, "thread+kstack cache"); +#ifndef _KERNEL_VIRTUAL static __cachealign int lwkt_cseq_rindex; static __cachealign int lwkt_cseq_windex; +#endif /* * These helper procedures handle the runq, they can only be called from @@ -749,6 +755,9 @@ skip: * are contested. */ cpu_pause(); +#ifdef _KERNEL_VIRTUAL + pthread_yield(); +#endif ntd = &gd->gd_idlethread; if (gd->gd_trap_nesting_level == 0 && panicstr == NULL) ASSERT_NO_TOKENS_HELD(ntd); @@ -782,6 +791,7 @@ skip: if (spinning < 0x7FFFFFFF) ++spinning; +#ifndef _KERNEL_VIRTUAL /* * lwkt_getalltokens() failed in sorted token mode, we can use * monitor/mwait in this case. @@ -795,6 +805,7 @@ skip: ~RQF_IDLECHECK_WK_MASK, cpu_mwait_spin); } +#endif /* * We already checked that td is still scheduled so this should be @@ -802,6 +813,7 @@ skip: */ splz_check(); +#ifndef _KERNEL_VIRTUAL /* * This experimental resequencer is used as a fall-back to reduce * hw cache line contention by placing each core's scheduler into a @@ -837,6 +849,7 @@ skip: DELAY(1); atomic_add_int(&lwkt_cseq_rindex, 1); } +#endif /* highest level for(;;) loop */ } diff --git a/sys/kern/usched_dfly.c b/sys/kern/usched_dfly.c index 32a7511445..e917fcf513 100644 --- a/sys/kern/usched_dfly.c +++ b/sys/kern/usched_dfly.c @@ -336,17 +336,18 @@ dfly_acquire_curproc(struct lwp *lp) spin_lock(&dd->spin); - /* - * We are not or are no longer the current lwp and a forced - * reschedule was requested. Figure out the best cpu to - * run on (our current cpu will be given significant weight). - * - * (if a reschedule was not requested we want to move this - * step after the uschedcp tests). - */ if (force_resched && - (usched_dfly_features & 0x08) && - (rdd = dfly_choose_best_queue(lp)) != dd) { + (usched_dfly_features & 0x08) && + (rdd = dfly_choose_best_queue(lp)) != dd) { + /* + * We are not or are no longer the current lwp and a + * forced reschedule was requested. Figure out the + * best cpu to run on (our current cpu will be given + * significant weight). + * + * (if a reschedule was not requested we want to + * move this step after the uschedcp tests). + */ dfly_changeqcpu_locked(lp, dd, rdd); spin_unlock(&dd->spin); lwkt_deschedule(lp->lwp_thread); @@ -363,6 +364,8 @@ dfly_acquire_curproc(struct lwp *lp) * trivially become the current lwp on the current cpu. */ if (dd->uschedcp == NULL) { + atomic_clear_int(&lp->lwp_thread->td_mpflags, + TDF_MP_DIDYIELD); atomic_set_cpumask(&dfly_curprocmask, gd->gd_cpumask); dd->uschedcp = lp; dd->upri = lp->lwp_priority; @@ -371,6 +374,40 @@ dfly_acquire_curproc(struct lwp *lp) break; } + /* + * Put us back on the same run queue unconditionally. + * + * Set rrinterval to force placement at end of queue. + * Select the worst queue to ensure we round-robin, + * but do not change estcpu. + */ + if (lp->lwp_thread->td_mpflags & TDF_MP_DIDYIELD) { + u_int32_t tsqbits; + + atomic_clear_int(&lp->lwp_thread->td_mpflags, + TDF_MP_DIDYIELD); + + switch(lp->lwp_rqtype) { + case RTP_PRIO_NORMAL: + tsqbits = dd->queuebits; + spin_unlock(&dd->spin); + + lp->lwp_rrcount = usched_dfly_rrinterval; + if (tsqbits) + lp->lwp_rqindex = bsrl(tsqbits); + break; + default: + spin_unlock(&dd->spin); + break; + } + lwkt_deschedule(lp->lwp_thread); + dfly_setrunqueue_dd(dd, lp); + lwkt_switch(); + gd = mycpu; + dd = &dfly_pcpu[gd->gd_cpuid]; + continue; + } + /* * Can we steal the current designated user thread? * @@ -1124,17 +1161,16 @@ static void dfly_yield(struct lwp *lp) { -#if 0 - /* FUTURE (or something similar) */ - switch(lp->lwp_rqtype) { - case RTP_PRIO_NORMAL: - lp->lwp_estcpu = ESTCPULIM(lp->lwp_estcpu + ESTCPUINCR); - break; - default: - break; - } -#endif - need_user_resched(); + if (lp->lwp_qcpu != mycpu->gd_cpuid) + return; + KKASSERT(lp == curthread->td_lwp); + + /* + * Don't set need_user_resched() or mess with rrcount or anything. + * the TDF flag will override everything as long as we release. + */ + atomic_set_int(&lp->lwp_thread->td_mpflags, TDF_MP_DIDYIELD); + dfly_release_curproc(lp); } /* diff --git a/sys/sys/thread.h b/sys/sys/thread.h index a73c9bda17..a69d393048 100644 --- a/sys/sys/thread.h +++ b/sys/sys/thread.h @@ -376,6 +376,7 @@ struct thread { #define TDF_MP_EXITWAIT 0x00000004 /* reaper, see lwp_wait() */ #define TDF_MP_EXITSIG 0x00000008 /* reaper, see lwp_wait() */ #define TDF_MP_BATCH_DEMARC 0x00000010 /* batch mode handling */ +#define TDF_MP_DIDYIELD 0x00000020 /* effects scheduling */ #define TD_TYPE_GENERIC 0 /* generic thread */ #define TD_TYPE_CRYPTO 1 /* crypto thread */ -- 2.41.0