From be71787bd3c8d2d199af9bb30ffb6df730c6b858 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sun, 19 Dec 2010 11:17:36 -0800 Subject: [PATCH] kernel - Optimize idle thread halt * Count the number of times the idle thread is entered on a cpu without switching to a non-idle thread. Use the fast-halt (non-ACPI) until the count exceeds a reasonable machdep.cpu_idle_repeat. This improves the default performance to levels closer to cpu_idle_hlt mode 1 but still gives us the power savings from mode 3. Performanced is improved significantly because many threads on SMP boxes are event or pipe oriented and only sleep for short periods of time or ping-pong back and forth. For example, a cc -pipe, or typical kernel threads blocking on tokens or locks for short periods of time. * Adjust machdep.cpu_idle_hlt modes: 0 Never halt, the idle thread just spins. 1 Always use a fast HLT/MONITOR/MWAIT 2 Hybrid approach use (1) up to a certain point, then use (3). (this is the default) 3 Always use the ACPI halt --- sys/kern/lwkt_thread.c | 4 +++ sys/platform/pc32/i386/machdep.c | 55 +++++++++++++++++++++++++----- sys/platform/pc64/x86_64/machdep.c | 47 +++++++++++++++++++++---- sys/sys/globaldata.h | 5 ++- 4 files changed, 96 insertions(+), 15 deletions(-) diff --git a/sys/kern/lwkt_thread.c b/sys/kern/lwkt_thread.c index e448da3373..55b5ab6b07 100644 --- a/sys/kern/lwkt_thread.c +++ b/sys/kern/lwkt_thread.c @@ -878,6 +878,9 @@ havethread: * If the thread we came up with is a higher or equal priority verses * the thread at the head of the queue we move our thread to the * front. This way we can always check the front of the queue. + * + * Clear gd_idle_repeat when doing a normal switch to a non-idle + * thread. */ ++gd->gd_cnt.v_swtch; --ntd->td_fairq_accum; @@ -887,6 +890,7 @@ havethread: TAILQ_REMOVE(&gd->gd_tdrunq, ntd, td_threadq); TAILQ_INSERT_HEAD(&gd->gd_tdrunq, ntd, td_threadq); } + gd->gd_idle_repeat = 0; havethread_preempted: /* diff --git a/sys/platform/pc32/i386/machdep.c b/sys/platform/pc32/i386/machdep.c index a5c39f0e1c..3fbaca67cb 100644 --- a/sys/platform/pc32/i386/machdep.c +++ b/sys/platform/pc32/i386/machdep.c @@ -885,12 +885,15 @@ cpu_halt(void) static int cpu_idle_hlt = 2; static int cpu_idle_hltcnt; static int cpu_idle_spincnt; +static u_int cpu_idle_repeat = 4; SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, &cpu_idle_hlt, 0, "Idle loop HLT enable"); SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hltcnt, CTLFLAG_RW, &cpu_idle_hltcnt, 0, "Idle loop entry halts"); SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_spincnt, CTLFLAG_RW, &cpu_idle_spincnt, 0, "Idle loop entry spins"); +SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_repeat, CTLFLAG_RW, + &cpu_idle_repeat, 0, "Idle entries before acpi hlt"); static void cpu_idle_default_hook(void) @@ -908,7 +911,10 @@ void (*cpu_idle_hook)(void) = cpu_idle_default_hook; void cpu_idle(void) { - struct thread *td = curthread; + globaldata_t gd = mycpu; + struct thread *td = gd->gd_curthread; + int reqflags; + int quick; crit_exit(); KKASSERT(td->td_critcount == 0); @@ -919,16 +925,49 @@ cpu_idle(void) lwkt_switch(); /* - * If we are going to halt call splz unconditionally after - * CLIing to catch any interrupt races. Note that we are - * at SPL0 and interrupts are enabled. + * When halting inside a cli we must check for reqflags + * races, particularly [re]schedule requests. Running + * splz() does the job. + * + * cpu_idle_hlt: + * 0 Never halt, just spin + * + * 1 Always use HLT (or MONITOR/MWAIT if avail). + * This typically eats more power than the + * ACPI halt. + * + * 2 Use HLT/MONITOR/MWAIT up to a point and then + * use the ACPI halt (default). This is a hybrid + * approach. See machdep.cpu_idle_repeat. + * + * 3 Always use the ACPI halt. This typically + * eats the least amount of power but the cpu + * will be slow waking up. Slows down e.g. + * compiles and other pipe/event oriented stuff. + * + * + * NOTE: Interrupts are enabled and we are not in a critical + * section. + * + * NOTE: Preemptions do not reset gd_idle_repeat. Also we + * don't bother capping gd_idle_repeat, it is ok if + * it overflows. */ - if (cpu_idle_hlt && - (td->td_gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) { + ++gd->gd_idle_repeat; + reqflags = gd->gd_reqflags; + quick = (cpu_idle_hlt == 1) || + (cpu_idle_hlt < 3 && + gd->gd_idle_repeat < cpu_idle_repeat); + + if (quick && (cpu_mi_feature & CPU_MI_MONITOR) && + (reqflags & RQF_IDLECHECK_WK_MASK) == 0) { + cpu_mmw_pause_int(&gd->gd_reqflags, reqflags); + ++cpu_idle_hltcnt; + } else if (cpu_idle_hlt) { __asm __volatile("cli"); splz(); - if ((td->td_gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) { - if (cpu_idle_hlt == 1) + if ((gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) { + if (quick) cpu_idle_default_hook(); else cpu_idle_hook(); diff --git a/sys/platform/pc64/x86_64/machdep.c b/sys/platform/pc64/x86_64/machdep.c index 6de49280e8..c07dc8cb1f 100644 --- a/sys/platform/pc64/x86_64/machdep.c +++ b/sys/platform/pc64/x86_64/machdep.c @@ -902,16 +902,22 @@ cpu_halt(void) * * NOTE: cpu_idle_hlt again defaults to 2 (use ACPI sleep states). Set to * 1 to just use hlt and for debugging purposes. + * + * NOTE: cpu_idle_repeat determines how many entries into the idle thread + * must occur before it starts using ACPI halt. */ static int cpu_idle_hlt = 2; static int cpu_idle_hltcnt; static int cpu_idle_spincnt; +static u_int cpu_idle_repeat = 4; SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, &cpu_idle_hlt, 0, "Idle loop HLT enable"); SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hltcnt, CTLFLAG_RW, &cpu_idle_hltcnt, 0, "Idle loop entry halts"); SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_spincnt, CTLFLAG_RW, &cpu_idle_spincnt, 0, "Idle loop entry spins"); +SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_repeat, CTLFLAG_RW, + &cpu_idle_repeat, 0, "Idle entries before acpi hlt"); static void cpu_idle_default_hook(void) @@ -932,6 +938,7 @@ cpu_idle(void) globaldata_t gd = mycpu; struct thread *td = gd->gd_curthread; int reqflags; + int quick; crit_exit(); KKASSERT(td->td_critcount == 0); @@ -942,20 +949,48 @@ cpu_idle(void) lwkt_switch(); /* - * If we are going to halt call splz unconditionally after - * CLIing to catch any interrupt races. Note that we are - * at SPL0 and interrupts are enabled. + * When halting inside a cli we must check for reqflags + * races, particularly [re]schedule requests. Running + * splz() does the job. + * + * cpu_idle_hlt: + * 0 Never halt, just spin + * + * 1 Always use HLT (or MONITOR/MWAIT if avail). + * This typically eats more power than the + * ACPI halt. + * + * 2 Use HLT/MONITOR/MWAIT up to a point and then + * use the ACPI halt (default). This is a hybrid + * approach. See machdep.cpu_idle_repeat. + * + * 3 Always use the ACPI halt. This typically + * eats the least amount of power but the cpu + * will be slow waking up. Slows down e.g. + * compiles and other pipe/event oriented stuff. + * + * NOTE: Interrupts are enabled and we are not in a critical + * section. + * + * NOTE: Preemptions do not reset gd_idle_repeat. Also we + * don't bother capping gd_idle_repeat, it is ok if + * it overflows. */ + ++gd->gd_idle_repeat; reqflags = gd->gd_reqflags; - if (cpu_idle_hlt == 1 && - (cpu_mi_feature & CPU_MI_MONITOR) && + quick = (cpu_idle_hlt == 1) || + (cpu_idle_hlt < 3 && + gd->gd_idle_repeat < cpu_idle_repeat); + + if (quick && (cpu_mi_feature & CPU_MI_MONITOR) && (reqflags & RQF_IDLECHECK_WK_MASK) == 0) { cpu_mmw_pause_int(&gd->gd_reqflags, reqflags); + ++cpu_idle_hltcnt; } else if (cpu_idle_hlt) { __asm __volatile("cli"); splz(); if ((gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) { - if (cpu_idle_hlt == 1) + if (quick) cpu_idle_default_hook(); else cpu_idle_hook(); diff --git a/sys/sys/globaldata.h b/sys/sys/globaldata.h index 507f3d5450..e1fe49ad39 100644 --- a/sys/sys/globaldata.h +++ b/sys/sys/globaldata.h @@ -163,7 +163,9 @@ struct globaldata { int gd_spinlocks_wr; /* Exclusive spinlocks held */ struct systimer *gd_systimer_inprog; /* in-progress systimer */ int gd_timer_running; - void *gd_reserved[11]; /* future fields */ + u_int gd_idle_repeat; /* repeated switches to idle */ + int gd_ireserved[7]; + void *gd_preserved[11]; /* future fields */ /* extended by */ }; @@ -190,6 +192,7 @@ typedef struct globaldata *globaldata_t; #define RQF_AST_UPCALL (1 << RQB_AST_UPCALL) #define RQF_RUNNING (1 << RQB_RUNNING) #define RQF_WAKEUP (1 << RQB_WAKEUP) + #define RQF_AST_MASK (RQF_AST_OWEUPC|RQF_AST_SIGNAL|\ RQF_AST_USER_RESCHED|RQF_AST_LWKT_RESCHED|\ RQF_AST_UPCALL) -- 2.41.0