kernel - Misc adjustments used by the vkernel and VMM, misc optimizations
authorMatthew Dillon <dillon@apollo.backplane.com>
Sat, 14 Sep 2013 02:02:55 +0000 (19:02 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sat, 14 Sep 2013 02:02:55 +0000 (19:02 -0700)
* This section committed separately because it is basically independent
  of VMM.

* Improve pfind().  Don't get proc_token if the process being looked up
  is the current process.

* Improve kern_kill().  Do not obtain proc_token any more.  p->p_token
  is sufficient and the process group has its own lock now.

* Call pthread_yield() when spinning on various things.

    x Spinlocks
    x Tokens (spinning in lwkt_switch)
    x cpusync (ipiq)

* Rewrite sched_yield() -> dfly_yield().  dfly_yield() will
  unconditionally round-robin the LWP, ignoring estcpu.  It isn't
  perfect but it works fairly well.

  The dfly scheduler will also no longer attempt to migrate threads
  across cpus when handling yields.  They migrate normally in all
  other circumstances.

  This fixes situations where the vkernel is spinning waiting for multiple
  events from other cpus and in particular when it is doing a global IPI
  for pmap synchronization of the kernel_pmap.

sys/kern/kern_proc.c
sys/kern/kern_sig.c
sys/kern/kern_spinlock.c
sys/kern/lwkt_ipiq.c
sys/kern/lwkt_thread.c
sys/kern/usched_dfly.c
sys/sys/thread.h

index 49059e1..3e8b6ab 100644 (file)
@@ -348,8 +348,19 @@ inferior(struct proc *p)
 struct proc *
 pfind(pid_t pid)
 {
-       struct proc *p;
+       struct proc *p = curproc;
 
+       /*
+        * Shortcut the current process
+        */
+       if (p && p->p_pid == pid) {
+               PHOLD(p);
+               return (p);
+       }
+
+       /*
+        * Otherwise find it in the hash table.
+        */
        lwkt_gettoken(&proc_token);
        LIST_FOREACH(p, PIDHASH(pid), p_hash) {
                if (p->p_pid == pid) {
@@ -359,6 +370,7 @@ pfind(pid_t pid)
                }
        }
        lwkt_reltoken(&proc_token);
+
        return (NULL);
 }
 
index 3538c1b..d4529cf 100644 (file)
@@ -739,8 +739,6 @@ kern_kill(int sig, pid_t pid, lwpid_t tid)
        if ((u_int)sig > _SIG_MAXSIG)
                return (EINVAL);
 
-       lwkt_gettoken(&proc_token);
-
        if (pid > 0) {
                struct proc *p;
                struct lwp *lp = NULL;
@@ -752,11 +750,8 @@ kern_kill(int sig, pid_t pid, lwpid_t tid)
                 * don't actually try to deliver the signal.
                 */
                if ((p = pfind(pid)) == NULL) {
-                       if ((p = zpfind(pid)) == NULL) {
-                               lwkt_reltoken(&proc_token);
+                       if ((p = zpfind(pid)) == NULL)
                                return (ESRCH);
-                       }
-                       lwkt_reltoken(&proc_token);
                        PRELE(p);
                        return (0);
                }
@@ -764,7 +759,6 @@ kern_kill(int sig, pid_t pid, lwpid_t tid)
                if (!CANSIGNAL(p, sig)) {
                        lwkt_reltoken(&p->p_token);
                        PRELE(p);
-                       lwkt_reltoken(&proc_token);
                        return (EPERM);
                }
 
@@ -776,7 +770,6 @@ kern_kill(int sig, pid_t pid, lwpid_t tid)
                if (p->p_flags & P_WEXIT) {
                        lwkt_reltoken(&p->p_token);
                        PRELE(p);
-                       lwkt_reltoken(&proc_token);
                        return (0);
                }
                if (tid != -1) {
@@ -784,7 +777,6 @@ kern_kill(int sig, pid_t pid, lwpid_t tid)
                        if (lp == NULL) {
                                lwkt_reltoken(&p->p_token);
                                PRELE(p);
-                               lwkt_reltoken(&proc_token);
                                return (ESRCH);
                        }
                }
@@ -792,7 +784,7 @@ kern_kill(int sig, pid_t pid, lwpid_t tid)
                        lwpsignal(p, lp, sig);
                lwkt_reltoken(&p->p_token);
                PRELE(p);
-               lwkt_reltoken(&proc_token);
+
                return (0);
        }
 
@@ -800,10 +792,9 @@ kern_kill(int sig, pid_t pid, lwpid_t tid)
         * If we come here, pid is a special broadcast pid.
         * This doesn't mix with a tid.
         */
-       if (tid != -1) {
-               lwkt_reltoken(&proc_token);
+       if (tid != -1)
                return (EINVAL);
-       }
+
        switch (pid) {
        case -1:                /* broadcast signal */
                t = (dokillpg(sig, 0, 1));
@@ -815,7 +806,6 @@ kern_kill(int sig, pid_t pid, lwpid_t tid)
                t = (dokillpg(sig, -pid, 0));
                break;
        }
-       lwkt_reltoken(&proc_token);
        return t;
 }
 
index 5c523d1..d739119 100644 (file)
 #include <sys/spinlock2.h>
 #include <sys/ktr.h>
 
+#ifdef _KERNEL_VIRTUAL
+#include <pthread.h>
+#endif
+
 struct spinlock pmap_spin = SPINLOCK_INITIALIZER(pmap_spin);
 
 struct indefinite_info {
@@ -247,6 +251,9 @@ spin_lock_contested(struct spinlock *spin)
                        if (spin_indefinite_check(spin, &info))
                                break;
                }
+#ifdef _KERNEL_VIRTUAL
+               pthread_yield();
+#endif
        }
        /*logspin(end, spin, 'w');*/
 }
@@ -309,6 +316,9 @@ spin_lock_shared_contested(struct spinlock *spin)
                        if (spin_indefinite_check(spin, &info))
                                break;
                }
+#ifdef _KERNEL_VIRTUAL
+               pthread_yield();
+#endif
        }
        /*logspin(end, spin, 'w');*/
 }
index 58d4522..94f9dac 100644 (file)
 #include <machine/smp.h>
 #include <machine/atomic.h>
 
+#ifdef _KERNEL_VIRTUAL
+#include <pthread.h>
+#endif
+
 struct ipiq_stats {
     __int64_t ipiq_count;      /* total calls to lwkt_send_ipiq*() */
     __int64_t ipiq_fifofull;   /* number of fifo full conditions detected */
@@ -821,6 +825,9 @@ lwkt_cpusync_interlock(lwkt_cpusync_t cs)
        while (cs->cs_mack != mask) {
            lwkt_process_ipiq();
            cpu_pause();
+#ifdef _KERNEL_VIRTUAL
+           pthread_yield();
+#endif
        }
 #if 0
        if (gd->gd_curthread->td_wmesg == smsg)
@@ -869,6 +876,9 @@ lwkt_cpusync_deinterlock(lwkt_cpusync_t cs)
        while (cs->cs_mack != mask) {
            lwkt_process_ipiq();
            cpu_pause();
+#ifdef _KERNEL_VIRTUAL
+           pthread_yield();
+#endif
        }
 #if 0
        if (gd->gd_curthread->td_wmesg == smsg)
@@ -923,6 +933,9 @@ lwkt_cpusync_remote2(lwkt_cpusync_t cs)
        lwkt_ipiq_t ip;
        int wi;
 
+#ifdef _KERNEL_VIRTUAL
+       pthread_yield();
+#endif
        ip = &gd->gd_cpusyncq;
        wi = ip->ip_windex & MAXCPUFIFO_MASK;
        ip->ip_info[wi].func = (ipifunc3_t)(ipifunc1_t)lwkt_cpusync_remote2;
index d98813f..7ae40a1 100644 (file)
 #include <machine/stdarg.h>
 #include <machine/smp.h>
 
+#ifdef _KERNEL_VIRTUAL
+#include <pthread.h>
+#endif
+
 #if !defined(KTR_CTXSW)
 #define KTR_CTXSW KTR_ALL
 #endif
@@ -150,8 +154,10 @@ static int lwkt_cache_threads = 0;
 SYSCTL_INT(_lwkt, OID_AUTO, cache_threads, CTLFLAG_RD,
        &lwkt_cache_threads, 0, "thread+kstack cache");
 
+#ifndef _KERNEL_VIRTUAL
 static __cachealign int lwkt_cseq_rindex;
 static __cachealign int lwkt_cseq_windex;
+#endif
 
 /*
  * These helper procedures handle the runq, they can only be called from
@@ -749,6 +755,9 @@ skip:
         * are contested.
         */
        cpu_pause();
+#ifdef _KERNEL_VIRTUAL
+       pthread_yield();
+#endif
        ntd = &gd->gd_idlethread;
        if (gd->gd_trap_nesting_level == 0 && panicstr == NULL)
            ASSERT_NO_TOKENS_HELD(ntd);
@@ -782,6 +791,7 @@ skip:
        if (spinning < 0x7FFFFFFF)
            ++spinning;
 
+#ifndef _KERNEL_VIRTUAL
        /*
         * lwkt_getalltokens() failed in sorted token mode, we can use
         * monitor/mwait in this case.
@@ -795,6 +805,7 @@ skip:
                              ~RQF_IDLECHECK_WK_MASK,
                              cpu_mwait_spin);
        }
+#endif
 
        /*
         * We already checked that td is still scheduled so this should be
@@ -802,6 +813,7 @@ skip:
         */
        splz_check();
 
+#ifndef _KERNEL_VIRTUAL
        /*
         * This experimental resequencer is used as a fall-back to reduce
         * hw cache line contention by placing each core's scheduler into a
@@ -837,6 +849,7 @@ skip:
            DELAY(1);
            atomic_add_int(&lwkt_cseq_rindex, 1);
        }
+#endif
        /* highest level for(;;) loop */
     }
 
index 32a7511..e917fcf 100644 (file)
@@ -336,17 +336,18 @@ dfly_acquire_curproc(struct lwp *lp)
 
                spin_lock(&dd->spin);
 
-               /*
-                * We are not or are no longer the current lwp and a forced
-                * reschedule was requested.  Figure out the best cpu to
-                * run on (our current cpu will be given significant weight).
-                *
-                * (if a reschedule was not requested we want to move this
-                *  step after the uschedcp tests).
-                */
                if (force_resched &&
-                   (usched_dfly_features & 0x08) &&
-                   (rdd = dfly_choose_best_queue(lp)) != dd) {
+                  (usched_dfly_features & 0x08) &&
+                  (rdd = dfly_choose_best_queue(lp)) != dd) {
+                       /*
+                        * We are not or are no longer the current lwp and a
+                        * forced reschedule was requested.  Figure out the
+                        * best cpu to run on (our current cpu will be given
+                        * significant weight).
+                        *
+                        * (if a reschedule was not requested we want to
+                        *  move this step after the uschedcp tests).
+                        */
                        dfly_changeqcpu_locked(lp, dd, rdd);
                        spin_unlock(&dd->spin);
                        lwkt_deschedule(lp->lwp_thread);
@@ -363,6 +364,8 @@ dfly_acquire_curproc(struct lwp *lp)
                 * trivially become the current lwp on the current cpu.
                 */
                if (dd->uschedcp == NULL) {
+                       atomic_clear_int(&lp->lwp_thread->td_mpflags,
+                                        TDF_MP_DIDYIELD);
                        atomic_set_cpumask(&dfly_curprocmask, gd->gd_cpumask);
                        dd->uschedcp = lp;
                        dd->upri = lp->lwp_priority;
@@ -371,6 +374,40 @@ dfly_acquire_curproc(struct lwp *lp)
                        break;
                }
 
+               /*
+                * Put us back on the same run queue unconditionally.
+                *
+                * Set rrinterval to force placement at end of queue.
+                * Select the worst queue to ensure we round-robin,
+                * but do not change estcpu.
+                */
+               if (lp->lwp_thread->td_mpflags & TDF_MP_DIDYIELD) {
+                       u_int32_t tsqbits;
+
+                       atomic_clear_int(&lp->lwp_thread->td_mpflags,
+                                        TDF_MP_DIDYIELD);
+
+                       switch(lp->lwp_rqtype) {
+                       case RTP_PRIO_NORMAL:
+                               tsqbits = dd->queuebits;
+                               spin_unlock(&dd->spin);
+
+                               lp->lwp_rrcount = usched_dfly_rrinterval;
+                               if (tsqbits)
+                                       lp->lwp_rqindex = bsrl(tsqbits);
+                               break;
+                       default:
+                               spin_unlock(&dd->spin);
+                               break;
+                       }
+                       lwkt_deschedule(lp->lwp_thread);
+                       dfly_setrunqueue_dd(dd, lp);
+                       lwkt_switch();
+                       gd = mycpu;
+                       dd = &dfly_pcpu[gd->gd_cpuid];
+                       continue;
+               }
+
                /*
                 * Can we steal the current designated user thread?
                 *
@@ -1124,17 +1161,16 @@ static
 void
 dfly_yield(struct lwp *lp)
 {
-#if 0
-       /* FUTURE (or something similar) */
-       switch(lp->lwp_rqtype) {
-       case RTP_PRIO_NORMAL:
-               lp->lwp_estcpu = ESTCPULIM(lp->lwp_estcpu + ESTCPUINCR);
-               break;
-       default:
-               break;
-       }
-#endif
-        need_user_resched();
+       if (lp->lwp_qcpu != mycpu->gd_cpuid)
+               return;
+       KKASSERT(lp == curthread->td_lwp);
+
+       /*
+        * Don't set need_user_resched() or mess with rrcount or anything.
+        * the TDF flag will override everything as long as we release.
+        */
+       atomic_set_int(&lp->lwp_thread->td_mpflags, TDF_MP_DIDYIELD);
+       dfly_release_curproc(lp);
 }
 
 /*
index a73c9bd..a69d393 100644 (file)
@@ -376,6 +376,7 @@ struct thread {
 #define TDF_MP_EXITWAIT                0x00000004      /* reaper, see lwp_wait() */
 #define TDF_MP_EXITSIG         0x00000008      /* reaper, see lwp_wait() */
 #define TDF_MP_BATCH_DEMARC    0x00000010      /* batch mode handling */
+#define TDF_MP_DIDYIELD                0x00000020      /* effects scheduling */
 
 #define TD_TYPE_GENERIC                0               /* generic thread */
 #define TD_TYPE_CRYPTO         1               /* crypto thread */