2 * Copyright (c) 2010 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * - Monitor the cpu load and adjusts cpu and cpu power domain
38 * performance accordingly.
39 * - Monitor battery life. Alarm alerts and shutdown the machine
40 * if battery life goes low.
43 #define _KERNEL_STRUCTURES
44 #include <sys/types.h>
45 #include <sys/sysctl.h>
46 #include <sys/kinfo.h>
48 #include <sys/queue.h>
49 #include <sys/soundcard.h>
51 #include <machine/cpufunc.h>
61 #define MAXDOM MAXCPU /* worst case, 1 cpu per domain */
67 TAILQ_ENTRY(cpu_pwrdom) dom_link;
70 cpumask_t dom_cpumask;
75 double cpu_uavg; /* used for speeding up */
76 double cpu_davg; /* used for slowing down */
82 static void usage(void);
83 static void get_ncpus(void);
86 static void get_uschedcpus(void);
87 static void set_uschedcpus(void);
90 static int has_perfbias(void);
91 static void set_perfbias(int, int);
94 static void acpi_getcpufreq_str(int, int *, int *);
95 static int acpi_getcpufreq_bin(int, int *, int *);
96 static void acpi_get_cpufreq(int, int *, int *);
97 static void acpi_set_cpufreq(int, int);
98 static int acpi_get_cpupwrdom(void);
100 /* mwait C-state hint */
101 static int probe_cstate(void);
102 static void set_cstate(int, int);
104 /* Performance monitoring */
105 static void init_perf(void);
106 static void mon_perf(double);
107 static void adj_perf(cpumask_t, cpumask_t);
108 static void adj_cpu_pwrdom(int, int);
109 static void adj_cpu_perf(int, int);
110 static void get_cputime(double);
111 static int get_nstate(struct cpu_state *, double);
112 static void add_spare_cpus(const cpumask_t, int);
113 static void restore_perf(void);
115 /* Battery monitoring */
116 static int has_battery(void);
117 static int mon_battery(void);
118 static void low_battery_alert(int);
121 static void restore_backlight(void);
123 /* Runtime states for performance monitoring */
124 static int global_pcpu_limit;
125 static struct cpu_state pcpu_state[MAXCPU];
126 static struct cpu_state global_cpu_state;
127 static cpumask_t cpu_used; /* cpus w/ high perf */
128 static cpumask_t cpu_pwrdom_used; /* cpu power domains w/ high perf */
129 static cpumask_t usched_cpu_used; /* cpus for usched */
132 static cpumask_t cpu_pwrdom_mask; /* usable cpu power domains */
133 static int cpu2pwrdom[MAXCPU]; /* cpu to cpu power domain map */
134 static struct cpu_pwrdom *cpu_pwrdomain[MAXDOM];
135 static int NCpus; /* # of cpus */
136 static char orig_global_cx[CST_STRLEN];
137 static char cpu_perf_cx[CST_STRLEN];
138 static int cpu_perf_cxlen;
139 static char cpu_idle_cx[CST_STRLEN];
140 static int cpu_idle_cxlen;
143 static int TurboOpt = 1;
145 static int Hysteresis = 10; /* percentage */
146 static double TriggerUp = 0.25; /* single-cpu load to force max freq */
147 static double TriggerDown; /* load per cpu to force the min freq */
148 static int HasPerfbias = 0;
149 static int AdjustCpuFreq = 1;
150 static int AdjustCstate = 0;
151 static int HighestCpuFreq;
152 static int LowestCpuFreq;
154 static volatile int stopped;
156 /* Battery life monitoring */
157 static int BatLifeMin = 2; /* shutdown the box, if low on battery life */
158 static struct timespec BatLifePrevT;
159 static int BatLifePollIntvl = 5; /* unit: sec */
160 static struct timespec BatShutdownStartT;
161 static int BatShutdownLinger = -1;
162 static int BatShutdownLingerSet = 60; /* unit: sec */
163 static int BatShutdownLingerCnt;
164 static int BatShutdownAudioAlert = 1;
165 static int BackLightPct = 100;
166 static int OldBackLightLevel;
167 static int BackLightDown;
169 static void sigintr(int signo);
172 main(int ac, char **av)
180 srt = 8.0; /* time for samples - 8 seconds */
181 pollrate = 1.0; /* polling rate in seconds */
183 while ((ch = getopt(ac, av, "b:cdefh:l:p:r:tu:B:L:P:QT:")) != -1) {
186 BackLightPct = strtol(optarg, NULL, 10);
201 HighestCpuFreq = strtol(optarg, NULL, 10);
204 LowestCpuFreq = strtol(optarg, NULL, 10);
207 Hysteresis = (int)strtol(optarg, NULL, 10);
210 pollrate = strtod(optarg, NULL);
216 TriggerUp = (double)strtol(optarg, NULL, 10) / 100;
219 BatLifeMin = strtol(optarg, NULL, 10);
222 BatShutdownLingerSet = strtol(optarg, NULL, 10);
223 if (BatShutdownLingerSet < 0)
224 BatShutdownLingerSet = 0;
227 BatLifePollIntvl = strtol(optarg, NULL, 10);
230 BatShutdownAudioAlert = 0;
233 srt = strtod(optarg, NULL);
245 /* Get number of cpus */
248 if (0 > Hysteresis || Hysteresis > 99) {
249 fprintf(stderr, "Invalid hysteresis value\n");
253 if (0 > TriggerUp || TriggerUp > 1) {
254 fprintf(stderr, "Invalid load limit value\n");
258 if (BackLightPct > 100 || BackLightPct <= 0) {
259 fprintf(stderr, "Invalid backlight setting, ignore\n");
263 TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100);
266 * Make sure powerd is not already running.
268 PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644);
271 "Cannot create /var/run/powerd.pid, "
272 "continuing anyway\n");
274 if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) {
275 fprintf(stderr, "powerd is already running\n");
281 * Demonize and set pid
285 openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON);
289 ftruncate(PowerFd, 0);
290 snprintf(buf, sizeof(buf), "%d\n", (int)getpid());
291 write(PowerFd, buf, strlen(buf));
294 /* Do we need to monitor battery life? */
295 if (BatLifePollIntvl <= 0)
298 monbat = has_battery();
300 /* Do we have perfbias(4)? */
302 HasPerfbias = has_perfbias();
304 /* Could we adjust C-state? */
306 AdjustCstate = probe_cstate();
309 * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel.
311 * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
312 * taskqueue and ACPI taskqueue is shared across various
313 * ACPI modules, any delay in other modules may cause
314 * hw.acpi.cpu.px_dom* to be created at quite a later time
315 * (e.g. cmbat module's task could take quite a lot of time).
318 /* Prime delta cputime calculation. */
319 get_cputime(pollrate);
321 /* Wait for all cpus to appear */
322 if (acpi_get_cpupwrdom())
324 usleep((int)(pollrate * 1000000.0));
328 * Catch some signals so that max performance could be restored.
330 signal(SIGINT, sigintr);
331 signal(SIGTERM, sigintr);
333 /* Initialize performance states */
336 srt = srt / pollrate; /* convert to sample count */
338 printf("samples for downgrading: %5.2f\n", srt);
345 * Monitor performance
347 get_cputime(pollrate);
354 monbat = mon_battery();
356 usleep((int)(pollrate * 1000000.0));
360 * Set to maximum performance if killed.
362 syslog(LOG_INFO, "killed, setting max and exiting");
370 sigintr(int signo __unused)
376 * Figure out the cpu power domains.
379 acpi_get_cpupwrdom(void)
381 struct cpu_pwrdom *dom;
382 cpumask_t pwrdom_mask;
387 int n, i, ncpu = 0, dom_id;
389 memset(cpu2pwrdom, 0, sizeof(cpu2pwrdom));
390 memset(cpu_pwrdomain, 0, sizeof(cpu_pwrdomain));
391 CPUMASK_ASSZERO(cpu_pwrdom_mask);
393 for (i = 0; i < MAXDOM; ++i) {
394 snprintf(buf, sizeof(buf),
395 "hw.acpi.cpu.px_dom%d.available", i);
396 if (sysctlbyname(buf, NULL, NULL, NULL, 0) < 0)
399 dom = calloc(1, sizeof(*dom));
402 if (cpu_pwrdomain[i] != NULL) {
403 fprintf(stderr, "cpu power domain %d exists\n", i);
406 cpu_pwrdomain[i] = dom;
407 CPUMASK_ORBIT(cpu_pwrdom_mask, i);
409 pwrdom_mask = cpu_pwrdom_mask;
411 while (CPUMASK_TESTNZERO(pwrdom_mask)) {
412 dom_id = BSFCPUMASK(pwrdom_mask);
413 CPUMASK_NANDBIT(pwrdom_mask, dom_id);
414 dom = cpu_pwrdomain[dom_id];
416 CPUMASK_ASSZERO(dom->dom_cpumask);
418 snprintf(buf, sizeof(buf),
419 "hw.acpi.cpu.px_dom%d.members", dom->dom_id);
420 msize = sizeof(members);
421 if (sysctlbyname(buf, members, &msize, NULL, 0) < 0) {
422 cpu_pwrdomain[dom_id] = NULL;
428 for (str = strtok(members, " "); str; str = strtok(NULL, " ")) {
430 sscanf(str, "cpu%d", &n);
434 CPUMASK_ORBIT(dom->dom_cpumask, n);
435 cpu2pwrdom[n] = dom->dom_id;
438 if (dom->dom_ncpus == 0) {
439 cpu_pwrdomain[dom_id] = NULL;
444 printf("dom%d cpumask: ", dom->dom_id);
445 for (i = 0; i < (int)NELEM(dom->dom_cpumask.ary); ++i) {
447 (uintmax_t)dom->dom_cpumask.ary[i]);
455 printf("Found %d cpus, expecting %d\n", ncpu, NCpus);
457 pwrdom_mask = cpu_pwrdom_mask;
458 while (CPUMASK_TESTNZERO(pwrdom_mask)) {
459 dom_id = BSFCPUMASK(pwrdom_mask);
460 CPUMASK_NANDBIT(pwrdom_mask, dom_id);
461 dom = cpu_pwrdomain[dom_id];
471 * Save per-cpu load and sum of per-cpu load.
474 get_cputime(double pollrate)
476 static struct kinfo_cputime ocpu_time[MAXCPU];
477 static struct kinfo_cputime ncpu_time[MAXCPU];
483 bcopy(ncpu_time, ocpu_time, sizeof(struct kinfo_cputime) * NCpus);
485 slen = sizeof(ncpu_time);
486 if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) {
487 fprintf(stderr, "kern.cputime sysctl not available\n");
490 ncpu = slen / sizeof(ncpu_time[0]);
493 for (cpu = 0; cpu < ncpu; ++cpu) {
496 d = (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys +
497 ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) -
498 (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys +
499 ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr);
500 pcpu_state[cpu].cpu_qavg = (double)d / (pollrate * 1000000.0);
504 global_cpu_state.cpu_qavg = (double)delta / (pollrate * 1000000.0);
508 acpi_getcpufreq_str(int dom_id, int *highest0, int *lowest0)
510 char buf[256], sysid[64];
513 int v, highest, lowest;
516 * Retrieve availability list
518 snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.available",
520 buflen = sizeof(buf) - 1;
521 if (sysctlbyname(sysid, buf, &buflen, NULL, 0) < 0)
526 * Parse out the highest and lowest cpu frequencies
529 highest = lowest = 0;
530 while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
531 if ((lowest == 0 || lowest > v) &&
532 (LowestCpuFreq <= 0 || v >= LowestCpuFreq))
534 if ((highest == 0 || highest < v) &&
535 (HighestCpuFreq <= 0 || v <= HighestCpuFreq))
540 if (!TurboOpt && highest - v == 1)
549 acpi_getcpufreq_bin(int dom_id, int *highest0, int *lowest0)
557 * Retrieve availability list
559 snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.avail", dom_id);
560 freqlen = sizeof(freq);
561 if (sysctlbyname(sysid, freq, &freqlen, NULL, 0) < 0)
564 freqcnt = freqlen / sizeof(freq[0]);
568 for (i = freqcnt - 1; i >= 0; --i) {
570 if (LowestCpuFreq <= 0 || *lowest0 >= LowestCpuFreq)
576 if (!TurboOpt && freqcnt > 1 && freq[0] - freq[1] == 1) {
580 for (; i < freqcnt; ++i) {
581 if (HighestCpuFreq <= 0 || *highest0 <= HighestCpuFreq)
589 acpi_get_cpufreq(int dom_id, int *highest, int *lowest)
594 if (acpi_getcpufreq_bin(dom_id, highest, lowest))
596 acpi_getcpufreq_str(dom_id, highest, lowest);
603 fprintf(stderr, "usage: powerd [-cdeftQ] [-p hysteresis] "
604 "[-h highest_freq] [-l lowest_freq] "
605 "[-r poll_interval] [-u trigger_up] "
606 "[-B min_battery_life] [-L low_battery_linger] "
607 "[-P battery_poll_interval] [-T sample_interval] "
613 #define timespecsub(vvp, uvp) \
615 (vvp)->tv_sec -= (uvp)->tv_sec; \
616 (vvp)->tv_nsec -= (uvp)->tv_nsec; \
617 if ((vvp)->tv_nsec < 0) { \
619 (vvp)->tv_nsec += 1000000000; \
624 #define BAT_SYSCTL_TIME_MAX 50000000 /* unit: nanosecond */
629 struct timespec s, e;
633 clock_gettime(CLOCK_MONOTONIC_FAST, &s);
637 if (sysctlbyname("hw.acpi.acline", &val, &len, NULL, 0) < 0) {
638 /* No AC line information */
641 clock_gettime(CLOCK_MONOTONIC_FAST, &e);
644 if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
645 /* hw.acpi.acline takes to long to be useful */
646 syslog(LOG_NOTICE, "hw.acpi.acline takes too long");
650 clock_gettime(CLOCK_MONOTONIC_FAST, &s);
652 if (sysctlbyname("hw.acpi.battery.life", &val, &len, NULL, 0) < 0) {
653 /* No battery life */
656 clock_gettime(CLOCK_MONOTONIC_FAST, &e);
659 if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
660 /* hw.acpi.battery.life takes to long to be useful */
661 syslog(LOG_NOTICE, "hw.acpi.battery.life takes too long");
668 low_battery_alert(int life)
670 int fmt, stereo, freq;
673 syslog(LOG_ALERT, "low battery life %d%%, please plugin AC line, #%d",
674 life, BatShutdownLingerCnt);
675 ++BatShutdownLingerCnt;
677 if (!BatShutdownAudioAlert)
680 fd = open("/dev/dsp", O_WRONLY);
685 if (ioctl(fd, SNDCTL_DSP_SETFMT, &fmt, sizeof(fmt)) < 0)
689 if (ioctl(fd, SNDCTL_DSP_STEREO, &stereo, sizeof(stereo)) < 0)
693 if (ioctl(fd, SNDCTL_DSP_SPEED, &freq, sizeof(freq)) < 0)
696 write(fd, alert1, sizeof(alert1));
697 write(fd, alert1, sizeof(alert1));
706 struct timespec cur, ts;
710 clock_gettime(CLOCK_MONOTONIC_FAST, &cur);
712 timespecsub(&ts, &BatLifePrevT);
713 if (ts.tv_sec < BatLifePollIntvl)
717 len = sizeof(acline);
718 if (sysctlbyname("hw.acpi.acline", &acline, &len, NULL, 0) < 0)
721 BatShutdownLinger = -1;
722 BatShutdownLingerCnt = 0;
727 if (!BackLightDown && BackLightPct != 100) {
728 int backlight_max, backlight;
730 len = sizeof(backlight_max);
731 if (sysctlbyname("hw.backlight_max", &backlight_max, &len,
733 /* No more backlight adjustment */
735 goto after_backlight;
738 len = sizeof(OldBackLightLevel);
739 if (sysctlbyname("hw.backlight_level", &OldBackLightLevel, &len,
741 /* No more backlight adjustment */
743 goto after_backlight;
746 backlight = (backlight_max * BackLightPct) / 100;
747 if (backlight >= OldBackLightLevel) {
748 /* No more backlight adjustment */
750 goto after_backlight;
753 if (sysctlbyname("hw.backlight_level", NULL, NULL,
754 &backlight, sizeof(backlight)) < 0) {
755 /* No more backlight adjustment */
757 goto after_backlight;
764 if (sysctlbyname("hw.acpi.battery.life", &life, &len, NULL, 0) < 0)
767 if (BatShutdownLinger > 0) {
769 timespecsub(&ts, &BatShutdownStartT);
770 if (ts.tv_sec > BatShutdownLinger)
771 BatShutdownLinger = 0;
774 if (life <= BatLifeMin) {
775 if (BatShutdownLinger == 0 || BatShutdownLingerSet == 0) {
776 syslog(LOG_ALERT, "low battery life %d%%, "
777 "shutting down", life);
779 execlp("poweroff", "poweroff", NULL);
781 } else if (BatShutdownLinger < 0) {
782 BatShutdownLinger = BatShutdownLingerSet;
783 BatShutdownStartT = cur;
785 low_battery_alert(life);
795 slen = sizeof(NCpus);
796 if (sysctlbyname("hw.ncpu", &NCpus, &slen, NULL, 0) < 0)
797 err(1, "sysctlbyname hw.ncpu failed");
799 printf("hw.ncpu %d\n", NCpus);
807 slen = sizeof(usched_cpu_used);
808 if (sysctlbyname("kern.usched_global_cpumask", &usched_cpu_used, &slen,
810 err(1, "sysctlbyname kern.usched_global_cpumask failed");
814 printf("usched cpumask was: ");
815 for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i)
816 printf("%jx ", (uintmax_t)usched_cpu_used.ary[i]);
827 printf("usched cpumask: ");
828 for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i) {
830 (uintmax_t)usched_cpu_used.ary[i]);
834 sysctlbyname("kern.usched_global_cpumask", NULL, 0,
835 &usched_cpu_used, sizeof(usched_cpu_used));
845 if (sysctlbyname("machdep.perfbias0.hint", &hint, &len, NULL, 0) < 0)
851 set_perfbias(int cpu, int inc)
853 int hint = inc ? 0 : 15;
857 printf("cpu%d set perfbias hint %d\n", cpu, hint);
858 snprintf(sysid, sizeof(sysid), "machdep.perfbias%d.hint", cpu);
859 sysctlbyname(sysid, NULL, NULL, &hint, sizeof(hint));
865 struct cpu_state *state;
868 /* Get usched cpumask */
872 * Assume everything are used and are maxed out, before we
876 CPUMASK_ASSBMASK(cpu_used, NCpus);
877 cpu_pwrdom_used = cpu_pwrdom_mask;
878 global_pcpu_limit = NCpus;
880 for (cpu = 0; cpu < NCpus; ++cpu) {
881 state = &pcpu_state[cpu];
883 state->cpu_uavg = 0.0;
884 state->cpu_davg = 0.0;
885 state->cpu_limit = 1;
886 state->cpu_count = 1;
887 snprintf(state->cpu_name, sizeof(state->cpu_name), "cpu%d",
891 state = &global_cpu_state;
892 state->cpu_uavg = 0.0;
893 state->cpu_davg = 0.0;
894 state->cpu_limit = NCpus;
895 state->cpu_count = NCpus;
896 strlcpy(state->cpu_name, "global", sizeof(state->cpu_name));
900 get_nstate(struct cpu_state *state, double srt)
902 int ustate, dstate, nstate;
905 state->cpu_uavg = (state->cpu_uavg * 2.0 + state->cpu_qavg) / 3.0;
907 state->cpu_davg = (state->cpu_davg * srt + state->cpu_qavg) / (srt + 1);
908 if (state->cpu_davg < state->cpu_uavg)
909 state->cpu_davg = state->cpu_uavg;
911 ustate = state->cpu_uavg / TriggerUp;
912 if (ustate < state->cpu_limit)
913 ustate = state->cpu_uavg / TriggerDown;
914 dstate = state->cpu_davg / TriggerUp;
915 if (dstate < state->cpu_limit)
916 dstate = state->cpu_davg / TriggerDown;
918 nstate = (ustate > dstate) ? ustate : dstate;
919 if (nstate > state->cpu_count)
920 nstate = state->cpu_count;
923 printf("%s qavg=%5.2f uavg=%5.2f davg=%5.2f "
924 "%2d ncpus=%d\n", state->cpu_name,
925 state->cpu_qavg, state->cpu_uavg, state->cpu_davg,
926 state->cpu_limit, nstate);
934 cpumask_t ocpu_used, ocpu_pwrdom_used;
935 int pnstate = 0, nstate;
939 * Find cpus requiring performance and their cooresponding power
940 * domains. Save the number of cpus requiring performance in
943 ocpu_used = cpu_used;
944 ocpu_pwrdom_used = cpu_pwrdom_used;
946 CPUMASK_ASSZERO(cpu_used);
947 CPUMASK_ASSZERO(cpu_pwrdom_used);
949 for (cpu = 0; cpu < NCpus; ++cpu) {
950 struct cpu_state *state = &pcpu_state[cpu];
953 s = get_nstate(state, srt);
955 CPUMASK_ORBIT(cpu_used, cpu);
956 CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
960 state->cpu_limit = s;
964 * Calculate nstate, the number of cpus we wish to run at max
967 nstate = get_nstate(&global_cpu_state, srt);
969 if (nstate == global_cpu_state.cpu_limit &&
970 (pnstate == global_pcpu_limit || nstate > pnstate)) {
971 /* Nothing changed; keep the sets */
972 cpu_used = ocpu_used;
973 cpu_pwrdom_used = ocpu_pwrdom_used;
975 global_pcpu_limit = pnstate;
978 global_pcpu_limit = pnstate;
980 if (nstate > pnstate) {
982 * Add spare cpus to meet global performance requirement.
984 add_spare_cpus(ocpu_used, nstate - pnstate);
987 global_cpu_state.cpu_limit = nstate;
990 * Adjust cpu and cpu power domain performance
992 adj_perf(ocpu_used, ocpu_pwrdom_used);
996 add_spare_cpus(const cpumask_t ocpu_used, int ncpu)
998 cpumask_t saved_pwrdom, xcpu_used;
1002 * Find more cpus in the previous cpu set.
1004 xcpu_used = cpu_used;
1005 CPUMASK_XORMASK(xcpu_used, ocpu_used);
1006 while (CPUMASK_TESTNZERO(xcpu_used)) {
1007 cpu = BSFCPUMASK(xcpu_used);
1008 CPUMASK_NANDBIT(xcpu_used, cpu);
1010 if (CPUMASK_TESTBIT(ocpu_used, cpu)) {
1011 CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
1012 CPUMASK_ORBIT(cpu_used, cpu);
1020 * Find more cpus in the used cpu power domains.
1022 saved_pwrdom = cpu_pwrdom_used;
1024 while (CPUMASK_TESTNZERO(saved_pwrdom)) {
1025 cpumask_t unused_cpumask;
1028 dom = BSFCPUMASK(saved_pwrdom);
1029 CPUMASK_NANDBIT(saved_pwrdom, dom);
1031 unused_cpumask = cpu_pwrdomain[dom]->dom_cpumask;
1032 CPUMASK_NANDMASK(unused_cpumask, cpu_used);
1034 while (CPUMASK_TESTNZERO(unused_cpumask)) {
1035 cpu = BSFCPUMASK(unused_cpumask);
1036 CPUMASK_NANDBIT(unused_cpumask, cpu);
1038 CPUMASK_ORBIT(cpu_pwrdom_used, dom);
1039 CPUMASK_ORBIT(cpu_used, cpu);
1048 * Find more cpus in unused cpu power domains
1050 saved_pwrdom = cpu_pwrdom_mask;
1051 CPUMASK_NANDMASK(saved_pwrdom, cpu_pwrdom_used);
1055 printf("%d cpus not found\n", ncpu);
1059 acpi_set_cpufreq(int dom, int inc)
1061 int lowest, highest, desired;
1064 acpi_get_cpufreq(dom, &highest, &lowest);
1065 if (highest == 0 || lowest == 0)
1067 desired = inc ? highest : lowest;
1070 printf("dom%d set frequency %d\n", dom, desired);
1071 snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.select", dom);
1072 sysctlbyname(sysid, NULL, NULL, &desired, sizeof(desired));
1076 adj_cpu_pwrdom(int dom, int inc)
1079 acpi_set_cpufreq(dom, inc);
1083 adj_cpu_perf(int cpu, int inc)
1087 printf("cpu%d increase perf\n", cpu);
1089 printf("cpu%d decrease perf\n", cpu);
1093 set_perfbias(cpu, inc);
1095 set_cstate(cpu, inc);
1099 adj_perf(cpumask_t xcpu_used, cpumask_t xcpu_pwrdom_used)
1101 cpumask_t old_usched_used;
1105 * Set cpus requiring performance to the userland process
1106 * scheduler. Leave the rest of cpus unmapped.
1108 old_usched_used = usched_cpu_used;
1109 usched_cpu_used = cpu_used;
1110 if (CPUMASK_TESTZERO(usched_cpu_used))
1111 CPUMASK_ORBIT(usched_cpu_used, 0);
1112 if (CPUMASK_CMPMASKNEQ(usched_cpu_used, old_usched_used))
1116 * Adjust per-cpu performance.
1118 CPUMASK_XORMASK(xcpu_used, cpu_used);
1119 while (CPUMASK_TESTNZERO(xcpu_used)) {
1120 cpu = BSFCPUMASK(xcpu_used);
1121 CPUMASK_NANDBIT(xcpu_used, cpu);
1123 if (CPUMASK_TESTBIT(cpu_used, cpu)) {
1124 /* Increase cpu performance */
1127 /* Decrease cpu performance */
1130 adj_cpu_perf(cpu, inc);
1134 * Adjust cpu power domain performance. This could affect
1137 CPUMASK_XORMASK(xcpu_pwrdom_used, cpu_pwrdom_used);
1138 while (CPUMASK_TESTNZERO(xcpu_pwrdom_used)) {
1141 dom = BSFCPUMASK(xcpu_pwrdom_used);
1142 CPUMASK_NANDBIT(xcpu_pwrdom_used, dom);
1144 if (CPUMASK_TESTBIT(cpu_pwrdom_used, dom)) {
1145 /* Increase cpu power domain performance */
1148 /* Decrease cpu power domain performance */
1151 adj_cpu_pwrdom(dom, inc);
1158 cpumask_t ocpu_used, ocpu_pwrdom_used;
1160 /* Remove highest cpu frequency limitation */
1163 ocpu_used = cpu_used;
1164 ocpu_pwrdom_used = cpu_pwrdom_used;
1166 /* Max out all cpus and cpu power domains performance */
1167 CPUMASK_ASSBMASK(cpu_used, NCpus);
1168 cpu_pwrdom_used = cpu_pwrdom_mask;
1170 adj_perf(ocpu_used, ocpu_pwrdom_used);
1174 * Restore the original mwait C-state
1177 printf("global set cstate %s\n", orig_global_cx);
1178 sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1179 orig_global_cx, strlen(orig_global_cx) + 1);
1186 char cx_supported[1024];
1189 int idle_hlt, deep = 1;
1192 len = sizeof(idle_hlt);
1193 if (sysctlbyname("machdep.cpu_idle_hlt", &idle_hlt, &len, NULL, 0) < 0)
1198 len = sizeof(cx_supported);
1199 if (sysctlbyname("machdep.mwait.CX.supported", cx_supported, &len,
1203 len = sizeof(orig_global_cx);
1204 if (sysctlbyname("machdep.mwait.CX.idle", orig_global_cx, &len,
1208 strlcpy(cpu_perf_cx, "AUTODEEP", sizeof(cpu_perf_cx));
1209 cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1210 if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1211 cpu_perf_cx, cpu_perf_cxlen) < 0) {
1212 /* AUTODEEP is not supported; try AUTO */
1214 strlcpy(cpu_perf_cx, "AUTO", sizeof(cpu_perf_cx));
1215 cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1216 if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1217 cpu_perf_cx, cpu_perf_cxlen) < 0)
1225 for (ptr = strtok(cx_supported, " "); ptr != NULL;
1226 ptr = strtok(NULL, " ")) {
1227 if (target == NULL ||
1228 (target != NULL && strcmp(ptr, target) == 0)) {
1229 strlcpy(cpu_idle_cx, ptr, sizeof(cpu_idle_cx));
1230 cpu_idle_cxlen = strlen(cpu_idle_cx) + 1;
1235 if (cpu_idle_cxlen == 0)
1239 printf("cstate orig %s, perf %s, idle %s\n",
1240 orig_global_cx, cpu_perf_cx, cpu_idle_cx);
1246 set_cstate(int cpu, int inc)
1254 len = cpu_perf_cxlen;
1257 len = cpu_idle_cxlen;
1261 printf("cpu%d set cstate %s\n", cpu, cst);
1262 snprintf(sysid, sizeof(sysid), "machdep.mwait.CX.idle%d", cpu);
1263 sysctlbyname(sysid, NULL, NULL, cst, len);
1267 restore_backlight(void)
1269 if (BackLightDown) {
1271 sysctlbyname("hw.backlight_level", NULL, NULL,
1272 &OldBackLightLevel, sizeof(OldBackLightLevel));