kernel - Properly account system time for contending tokens
authorMatthew Dillon <dillon@apollo.backplane.com>
Wed, 20 Feb 2013 06:56:58 +0000 (22:56 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Wed, 20 Feb 2013 06:56:58 +0000 (22:56 -0800)
* When the LWKT schedule gets stuck on a contending token it switches
  through the idle thread, the idle thread is told not to halt, and
  resolution of the contention is handled by lwkt_switch() from the
  idle thread's context.

* This was causing token contention to be improperly accounted for as
  idle time in the per-cpu stats.  Fix the case by testing the
  RQF_AST_LWKT_RESCHED flag which tells the idle thread not to halt,
  and account for the tick as system time if the flag is set.

* The improper time accounting was causing powerd to come to the wrong
  conclusion in massively parralel fsstress tests on monster.dragonflybsd.org
  (48 cpus).  With the fix, powerd no longer becomes confused.

Reported-by: vsrinivas
sys/kern/kern_clock.c

index 0027080..607b3c0 100644 (file)
@@ -675,6 +675,10 @@ statclock(systimer_t info, int in_ipi, struct intrframe *frame)
                        td->td_sticks += bump;
 
                if (IS_INTR_RUNNING) {
+                       /*
+                        * If we interrupted an interrupt thread, well,
+                        * count it as interrupt time.
+                        */
 #ifdef DEBUG_PCTRACK
                        if (frame)
                                do_pctrack(frame, PCTRACK_INT);
@@ -682,8 +686,20 @@ statclock(systimer_t info, int in_ipi, struct intrframe *frame)
                        cpu_time.cp_intr += bump;
                } else {
                        if (td == &mycpu->gd_idlethread) {
-                               cpu_time.cp_idle += bump;
+                               /*
+                                * Even if the current thread is the idle
+                                * thread it could be due to token contention
+                                * in the LWKT scheduler.  Count such as
+                                * system time.
+                                */
+                               if (mycpu->gd_reqflags & RQF_AST_LWKT_RESCHED)
+                                       cpu_time.cp_sys += bump;
+                               else
+                                       cpu_time.cp_idle += bump;
                        } else {
+                               /*
+                                * System thread was running.
+                                */
 #ifdef DEBUG_PCTRACK
                                if (frame)
                                        do_pctrack(frame, PCTRACK_SYS);