kernel - Fix MP system call performance regression
authorMatthew Dillon <dillon@apollo.backplane.com>
Fri, 10 Dec 2010 07:53:44 +0000 (23:53 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Fri, 10 Dec 2010 07:53:44 +0000 (23:53 -0800)
* The userland scheduler was unconditionally calling lwkt_switch()
  via userexit() (i.e. on every system call), creating unnecessary
  overhead and possibly also triggering a bsd4 scheduler event
  requiring a common spinlock.

* Rearrange the code slightly to reduce instances where lwkt_switch()
  is called.  We want to try to keep instances where a higher priority
  LWKT thread is potentially runnable or when the LWKT fairq accumulator
  for the current thread has been exhausted.

* This removes system call overhead multiplication on MP systems.  For
  example, on a 48-core box system call overhead when all 48 cpus are
  busy doing getuid() loops went from 10uS back down to 270nS (which
  is near the single-cpu test results).

sys/kern/usched_bsd4.c

index 23581a9..6d22eaf 100644 (file)
@@ -252,7 +252,7 @@ bsd4_acquire_curproc(struct lwp *lp)
                 * Reload after a switch or setrunqueue/switch possibly
                 * moved us to another cpu.
                 */
-               clear_lwkt_resched();
+               /*clear_lwkt_resched();*/
                gd = mycpu;
                dd = &bsd4_pcpu[gd->gd_cpuid];
 
@@ -276,6 +276,7 @@ bsd4_acquire_curproc(struct lwp *lp)
                        dd->upri = lp->lwp_priority;
                        lwkt_deschedule(olp->lwp_thread);
                        bsd4_setrunqueue(olp);
+                       lwkt_switch();
                } else {
                        lwkt_deschedule(lp->lwp_thread);
                        bsd4_setrunqueue(lp);
@@ -293,7 +294,10 @@ bsd4_acquire_curproc(struct lwp *lp)
                 * the run queue.  When we are reactivated we will have
                 * another chance.
                 */
-               lwkt_switch();
+               if (lwkt_resched_wanted() ||
+                   lp->lwp_thread->td_fairq_accum < 0) {
+                       lwkt_switch();
+               }
        } while (dd->uschedcp != lp);
 
        crit_exit();