2 * Copyright (c) 2010,2016 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * - Monitor the cpu load and adjusts cpu and cpu power domain
38 * performance accordingly.
39 * - Monitor battery life. Alarm alerts and shutdown the machine
40 * if battery life goes low.
43 #define _KERNEL_STRUCTURES
44 #include <sys/types.h>
45 #include <sys/sysctl.h>
46 #include <sys/kinfo.h>
48 #include <sys/queue.h>
49 #include <sys/soundcard.h>
50 #include <sys/sensors.h>
52 #include <machine/cpufunc.h>
53 #include <machine/cpumask.h>
64 #define MAXDOM MAXCPU /* worst case, 1 cpu per domain */
70 TAILQ_ENTRY(cpu_pwrdom) dom_link;
73 cpumask_t dom_cpumask;
78 double cpu_uavg; /* used for speeding up */
79 double cpu_davg; /* used for slowing down */
85 static void usage(void);
86 static void get_ncpus(void);
87 static void mon_cputemp(void);
90 static void get_uschedcpus(void);
91 static void set_uschedcpus(void);
94 static int has_perfbias(void);
95 static void set_perfbias(int, int);
98 static void acpi_getcpufreq_str(int, int *, int *);
99 static int acpi_getcpufreq_bin(int, int *, int *);
100 static void acpi_get_cpufreq(int, int *, int *);
101 static void acpi_set_cpufreq(int, int);
102 static int acpi_get_cpupwrdom(void);
104 /* mwait C-state hint */
105 static int probe_cstate(void);
106 static void set_cstate(int, int);
108 /* Performance monitoring */
109 static void init_perf(void);
110 static void mon_perf(double);
111 static void adj_perf(cpumask_t, cpumask_t);
112 static void adj_cpu_pwrdom(int, int);
113 static void adj_cpu_perf(int, int);
114 static void get_cputime(double);
115 static int get_nstate(struct cpu_state *, double);
116 static void add_spare_cpus(const cpumask_t, int);
117 static void restore_perf(void);
118 static void set_global_freq(int freq);
120 /* Battery monitoring */
121 static int has_battery(void);
122 static int mon_battery(void);
123 static void low_battery_alert(int);
126 static void restore_backlight(void);
128 /* Runtime states for performance monitoring */
129 static int global_pcpu_limit;
130 static struct cpu_state pcpu_state[MAXCPU];
131 static struct cpu_state global_cpu_state;
132 static cpumask_t cpu_used; /* cpus w/ high perf */
133 static cpumask_t cpu_pwrdom_used; /* cpu power domains w/ high perf */
134 static cpumask_t usched_cpu_used; /* cpus for usched */
137 static cpumask_t cpu_pwrdom_mask; /* usable cpu power domains */
138 static int cpu2pwrdom[MAXCPU]; /* cpu to cpu power domain map */
139 static struct cpu_pwrdom *cpu_pwrdomain[MAXDOM];
140 static int NCpus; /* # of cpus */
141 static char orig_global_cx[CST_STRLEN];
142 static char cpu_perf_cx[CST_STRLEN];
143 static int cpu_perf_cxlen;
144 static char cpu_idle_cx[CST_STRLEN];
145 static int cpu_idle_cxlen;
146 static int FreqAry[MAXFREQ];
148 static int SavedPXGlobal;
151 static int TurboOpt = 1;
153 static int Hysteresis = 10; /* percentage */
154 static double TriggerUp = 0.25; /* single-cpu load to force max freq */
155 static double TriggerDown; /* load per cpu to force the min freq */
156 static int HasPerfbias = 0;
157 static int AdjustCpuFreq = 1;
158 static int AdjustCstate = 0;
159 static int HighestCpuFreq;
160 static int LowestCpuFreq;
162 static int AdjustCpuFreqOverride;
164 static volatile int stopped;
166 /* Battery life monitoring */
167 static int BatLifeMin = 2; /* shutdown the box, if low on battery life */
168 static struct timespec BatLifePrevT;
169 static int BatLifePollIntvl = 5; /* unit: sec */
170 static struct timespec BatShutdownStartT;
171 static int BatShutdownLinger = -1;
172 static int BatShutdownLingerSet = 60; /* unit: sec */
173 static int BatShutdownLingerCnt;
174 static int BatShutdownAudioAlert = 1;
175 static int MinTemp = 75;
176 static int MaxTemp = 85;
177 static int BackLightPct = 100;
178 static int OldBackLightLevel;
179 static int BackLightDown;
181 static void sigintr(int signo);
184 main(int ac, char **av)
195 srt = 8.0; /* time for samples - 8 seconds */
196 pollrate = 1.0; /* polling rate in seconds */
198 while ((ch = getopt(ac, av, "b:cdefh:l:p:r:tu:B:H:L:P:QT:")) != -1) {
201 BackLightPct = strtol(optarg, NULL, 10);
216 HighestCpuFreq = strtol(optarg, NULL, 10);
219 LowestCpuFreq = strtol(optarg, NULL, 10);
222 Hysteresis = (int)strtol(optarg, NULL, 10);
225 pollrate = strtod(optarg, NULL);
231 TriggerUp = (double)strtol(optarg, NULL, 10) / 100;
234 BatLifeMin = strtol(optarg, NULL, 10);
237 MaxTemp = strtol(optarg, &p2, 0);
240 MaxTemp = strtol(p2 + 1, NULL, 0);
242 MinTemp = MaxTemp * 9 / 10;
246 BatShutdownLingerSet = strtol(optarg, NULL, 10);
247 if (BatShutdownLingerSet < 0)
248 BatShutdownLingerSet = 0;
251 BatLifePollIntvl = strtol(optarg, NULL, 10);
254 BatShutdownAudioAlert = 0;
257 srt = strtod(optarg, NULL);
269 /* Get number of cpus */
273 acpi_get_cpufreq(0, &lowest, &highest);
275 if (Hysteresis < 0 || Hysteresis > 99) {
276 fprintf(stderr, "Invalid hysteresis value\n");
280 if (TriggerUp < 0 || TriggerUp > 1) {
281 fprintf(stderr, "Invalid load limit value\n");
285 if (BackLightPct > 100 || BackLightPct <= 0) {
286 fprintf(stderr, "Invalid backlight setting, ignore\n");
290 TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100);
293 * Make sure powerd is not already running.
295 PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644);
298 "Cannot create /var/run/powerd.pid, "
299 "continuing anyway\n");
304 r = read(PowerFd, buf, sizeof(buf) - 1);
307 pid = strtol(buf, NULL, 0);
309 if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) {
312 flock(PowerFd, LOCK_EX);
313 fprintf(stderr, "restarting powerd\n");
316 "powerd is already running, "
317 "unable to kill pid for restart\n");
321 lseek(PowerFd, 0L, 0);
325 * Demonize and set pid
329 openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON);
333 ftruncate(PowerFd, 0);
334 snprintf(buf, sizeof(buf), "%d\n", (int)getpid());
335 write(PowerFd, buf, strlen(buf));
338 /* Do we need to monitor battery life? */
339 if (BatLifePollIntvl <= 0)
342 monbat = has_battery();
344 /* Do we have perfbias(4)? */
346 HasPerfbias = has_perfbias();
348 /* Could we adjust C-state? */
350 AdjustCstate = probe_cstate();
353 * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel.
355 * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
356 * taskqueue and ACPI taskqueue is shared across various
357 * ACPI modules, any delay in other modules may cause
358 * hw.acpi.cpu.px_dom* to be created at quite a later time
359 * (e.g. cmbat module's task could take quite a lot of time).
362 /* Prime delta cputime calculation. */
363 get_cputime(pollrate);
365 /* Wait for all cpus to appear */
366 if (acpi_get_cpupwrdom())
368 usleep((int)(pollrate * 1000000.0));
372 * Catch some signals so that max performance could be restored.
374 signal(SIGINT, sigintr);
375 signal(SIGTERM, sigintr);
377 /* Initialize performance states */
380 srt = srt / pollrate; /* convert to sample count */
382 printf("samples for downgrading: %5.2f\n", srt);
389 * Monitor performance
391 get_cputime(pollrate);
399 monbat = mon_battery();
401 usleep((int)(pollrate * 1000000.0));
405 * Set to maximum performance if killed.
407 syslog(LOG_INFO, "killed, setting max and exiting");
409 set_global_freq(SavedPXGlobal);
417 sigintr(int signo __unused)
423 * Figure out the cpu power domains.
426 acpi_get_cpupwrdom(void)
428 struct cpu_pwrdom *dom;
429 cpumask_t pwrdom_mask;
434 int n, i, ncpu = 0, dom_id;
436 memset(cpu2pwrdom, 0, sizeof(cpu2pwrdom));
437 memset(cpu_pwrdomain, 0, sizeof(cpu_pwrdomain));
438 CPUMASK_ASSZERO(cpu_pwrdom_mask);
440 for (i = 0; i < MAXDOM; ++i) {
441 snprintf(buf, sizeof(buf),
442 "hw.acpi.cpu.px_dom%d.available", i);
443 if (sysctlbyname(buf, NULL, NULL, NULL, 0) < 0)
446 dom = calloc(1, sizeof(*dom));
449 if (cpu_pwrdomain[i] != NULL) {
450 fprintf(stderr, "cpu power domain %d exists\n", i);
453 cpu_pwrdomain[i] = dom;
454 CPUMASK_ORBIT(cpu_pwrdom_mask, i);
456 pwrdom_mask = cpu_pwrdom_mask;
458 while (CPUMASK_TESTNZERO(pwrdom_mask)) {
459 dom_id = BSFCPUMASK(pwrdom_mask);
460 CPUMASK_NANDBIT(pwrdom_mask, dom_id);
461 dom = cpu_pwrdomain[dom_id];
463 CPUMASK_ASSZERO(dom->dom_cpumask);
465 snprintf(buf, sizeof(buf),
466 "hw.acpi.cpu.px_dom%d.members", dom->dom_id);
467 msize = sizeof(members);
468 if (sysctlbyname(buf, members, &msize, NULL, 0) < 0) {
469 cpu_pwrdomain[dom_id] = NULL;
475 for (str = strtok(members, " "); str; str = strtok(NULL, " ")) {
477 sscanf(str, "cpu%d", &n);
481 CPUMASK_ORBIT(dom->dom_cpumask, n);
482 cpu2pwrdom[n] = dom->dom_id;
485 if (dom->dom_ncpus == 0) {
486 cpu_pwrdomain[dom_id] = NULL;
491 printf("dom%d cpumask: ", dom->dom_id);
492 for (i = 0; i < (int)NELEM(dom->dom_cpumask.ary); ++i) {
494 (uintmax_t)dom->dom_cpumask.ary[i]);
502 printf("Found %d cpus, expecting %d\n", ncpu, NCpus);
504 pwrdom_mask = cpu_pwrdom_mask;
505 while (CPUMASK_TESTNZERO(pwrdom_mask)) {
506 dom_id = BSFCPUMASK(pwrdom_mask);
507 CPUMASK_NANDBIT(pwrdom_mask, dom_id);
508 dom = cpu_pwrdomain[dom_id];
518 * Save per-cpu load and sum of per-cpu load.
521 get_cputime(double pollrate)
523 static struct kinfo_cputime ocpu_time[MAXCPU];
524 static struct kinfo_cputime ncpu_time[MAXCPU];
530 bcopy(ncpu_time, ocpu_time, sizeof(struct kinfo_cputime) * NCpus);
532 slen = sizeof(ncpu_time);
533 if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) {
534 fprintf(stderr, "kern.cputime sysctl not available\n");
537 ncpu = slen / sizeof(ncpu_time[0]);
540 for (cpu = 0; cpu < ncpu; ++cpu) {
543 d = (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys +
544 ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) -
545 (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys +
546 ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr);
547 pcpu_state[cpu].cpu_qavg = (double)d / (pollrate * 1000000.0);
551 global_cpu_state.cpu_qavg = (double)delta / (pollrate * 1000000.0);
555 acpi_getcpufreq_str(int dom_id, int *highest0, int *lowest0)
557 char buf[256], sysid[64];
560 int v, highest, lowest;
564 * Retrieve availability list
566 snprintf(sysid, sizeof(sysid),
567 "hw.acpi.cpu.px_dom%d.available", dom_id);
568 buflen = sizeof(buf) - 1;
569 if (sysctlbyname(sysid, buf, &buflen, NULL, 0) < 0)
574 * Parse out the highest and lowest cpu frequencies
577 highest = lowest = 0;
579 while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
580 if ((lowest == 0 || lowest > v) &&
581 (LowestCpuFreq <= 0 || v >= LowestCpuFreq))
583 if ((highest == 0 || highest < v) &&
584 (HighestCpuFreq <= 0 || v <= HighestCpuFreq))
589 if (!TurboOpt && highest - v == 1)
602 while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
605 FreqAry[--freqidx] = v;
613 acpi_getcpufreq_bin(int dom_id, int *highest0, int *lowest0)
620 * Retrieve availability list
622 snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.avail", dom_id);
623 freqlen = sizeof(FreqAry);
624 if (sysctlbyname(sysid, FreqAry, &freqlen, NULL, 0) < 0)
627 NFreq = freqcnt = freqlen / sizeof(FreqAry[0]);
631 for (i = freqcnt - 1; i >= 0; --i) {
632 *lowest0 = FreqAry[i];
633 if (LowestCpuFreq <= 0 || *lowest0 >= LowestCpuFreq)
638 *highest0 = FreqAry[0];
639 if (!TurboOpt && freqcnt > 1 && FreqAry[0] - FreqAry[1] == 1) {
641 *highest0 = FreqAry[1];
643 for (; i < freqcnt; ++i) {
644 if (HighestCpuFreq <= 0 || *highest0 <= HighestCpuFreq)
646 *highest0 = FreqAry[i];
652 acpi_get_cpufreq(int dom_id, int *highest, int *lowest)
657 if (acpi_getcpufreq_bin(dom_id, highest, lowest))
659 acpi_getcpufreq_str(dom_id, highest, lowest);
666 fprintf(stderr, "usage: powerd [-cdeftQ] [-p hysteresis] "
667 "[-h highest_freq] [-l lowest_freq] "
668 "[-r poll_interval] [-u trigger_up] "
669 "[-B min_battery_life] [-L low_battery_linger] "
670 "[-P battery_poll_interval] [-T sample_interval] "
676 #define timespecsub(vvp, uvp) \
678 (vvp)->tv_sec -= (uvp)->tv_sec; \
679 (vvp)->tv_nsec -= (uvp)->tv_nsec; \
680 if ((vvp)->tv_nsec < 0) { \
682 (vvp)->tv_nsec += 1000000000; \
687 #define BAT_SYSCTL_TIME_MAX 50000000 /* unit: nanosecond */
692 struct timespec s, e;
696 clock_gettime(CLOCK_MONOTONIC_FAST, &s);
700 if (sysctlbyname("hw.acpi.acline", &val, &len, NULL, 0) < 0) {
701 /* No AC line information */
704 clock_gettime(CLOCK_MONOTONIC_FAST, &e);
707 if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
708 /* hw.acpi.acline takes to long to be useful */
709 syslog(LOG_NOTICE, "hw.acpi.acline takes too long");
713 clock_gettime(CLOCK_MONOTONIC_FAST, &s);
715 if (sysctlbyname("hw.acpi.battery.life", &val, &len, NULL, 0) < 0) {
716 /* No battery life */
719 clock_gettime(CLOCK_MONOTONIC_FAST, &e);
722 if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
723 /* hw.acpi.battery.life takes to long to be useful */
724 syslog(LOG_NOTICE, "hw.acpi.battery.life takes too long");
731 low_battery_alert(int life)
733 int fmt, stereo, freq;
736 syslog(LOG_ALERT, "low battery life %d%%, please plugin AC line, #%d",
737 life, BatShutdownLingerCnt);
738 ++BatShutdownLingerCnt;
740 if (!BatShutdownAudioAlert)
743 fd = open("/dev/dsp", O_WRONLY);
748 if (ioctl(fd, SNDCTL_DSP_SETFMT, &fmt, sizeof(fmt)) < 0)
752 if (ioctl(fd, SNDCTL_DSP_STEREO, &stereo, sizeof(stereo)) < 0)
756 if (ioctl(fd, SNDCTL_DSP_SPEED, &freq, sizeof(freq)) < 0)
759 write(fd, alert1, sizeof(alert1));
760 write(fd, alert1, sizeof(alert1));
769 struct timespec cur, ts;
773 clock_gettime(CLOCK_MONOTONIC_FAST, &cur);
775 timespecsub(&ts, &BatLifePrevT);
776 if (ts.tv_sec < BatLifePollIntvl)
780 len = sizeof(acline);
781 if (sysctlbyname("hw.acpi.acline", &acline, &len, NULL, 0) < 0)
784 BatShutdownLinger = -1;
785 BatShutdownLingerCnt = 0;
790 if (!BackLightDown && BackLightPct != 100) {
791 int backlight_max, backlight;
793 len = sizeof(backlight_max);
794 if (sysctlbyname("hw.backlight_max", &backlight_max, &len,
796 /* No more backlight adjustment */
798 goto after_backlight;
801 len = sizeof(OldBackLightLevel);
802 if (sysctlbyname("hw.backlight_level", &OldBackLightLevel, &len,
804 /* No more backlight adjustment */
806 goto after_backlight;
809 backlight = (backlight_max * BackLightPct) / 100;
810 if (backlight >= OldBackLightLevel) {
811 /* No more backlight adjustment */
813 goto after_backlight;
816 if (sysctlbyname("hw.backlight_level", NULL, NULL,
817 &backlight, sizeof(backlight)) < 0) {
818 /* No more backlight adjustment */
820 goto after_backlight;
827 if (sysctlbyname("hw.acpi.battery.life", &life, &len, NULL, 0) < 0)
830 if (BatShutdownLinger > 0) {
832 timespecsub(&ts, &BatShutdownStartT);
833 if (ts.tv_sec > BatShutdownLinger)
834 BatShutdownLinger = 0;
837 if (life <= BatLifeMin) {
838 if (BatShutdownLinger == 0 || BatShutdownLingerSet == 0) {
839 syslog(LOG_ALERT, "low battery life %d%%, "
840 "shutting down", life);
842 execlp("poweroff", "poweroff", NULL);
844 } else if (BatShutdownLinger < 0) {
845 BatShutdownLinger = BatShutdownLingerSet;
846 BatShutdownStartT = cur;
848 low_battery_alert(life);
858 slen = sizeof(NCpus);
859 if (sysctlbyname("hw.ncpu", &NCpus, &slen, NULL, 0) < 0)
860 err(1, "sysctlbyname hw.ncpu failed");
862 printf("hw.ncpu %d\n", NCpus);
870 slen = sizeof(usched_cpu_used);
871 if (sysctlbyname("kern.usched_global_cpumask", &usched_cpu_used, &slen,
873 err(1, "sysctlbyname kern.usched_global_cpumask failed");
877 printf("usched cpumask was: ");
878 for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i)
879 printf("%jx ", (uintmax_t)usched_cpu_used.ary[i]);
890 printf("usched cpumask: ");
891 for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i) {
893 (uintmax_t)usched_cpu_used.ary[i]);
897 sysctlbyname("kern.usched_global_cpumask", NULL, 0,
898 &usched_cpu_used, sizeof(usched_cpu_used));
908 if (sysctlbyname("machdep.perfbias0.hint", &hint, &len, NULL, 0) < 0)
914 set_perfbias(int cpu, int inc)
916 int hint = inc ? 0 : 15;
920 printf("cpu%d set perfbias hint %d\n", cpu, hint);
921 snprintf(sysid, sizeof(sysid), "machdep.perfbias%d.hint", cpu);
922 sysctlbyname(sysid, NULL, NULL, &hint, sizeof(hint));
928 struct cpu_state *state;
931 /* Get usched cpumask */
935 * Assume everything are used and are maxed out, before we
939 CPUMASK_ASSBMASK(cpu_used, NCpus);
940 cpu_pwrdom_used = cpu_pwrdom_mask;
941 global_pcpu_limit = NCpus;
943 for (cpu = 0; cpu < NCpus; ++cpu) {
944 state = &pcpu_state[cpu];
946 state->cpu_uavg = 0.0;
947 state->cpu_davg = 0.0;
948 state->cpu_limit = 1;
949 state->cpu_count = 1;
950 snprintf(state->cpu_name, sizeof(state->cpu_name), "cpu%d",
954 state = &global_cpu_state;
955 state->cpu_uavg = 0.0;
956 state->cpu_davg = 0.0;
957 state->cpu_limit = NCpus;
958 state->cpu_count = NCpus;
959 strlcpy(state->cpu_name, "global", sizeof(state->cpu_name));
963 get_nstate(struct cpu_state *state, double srt)
965 int ustate, dstate, nstate;
968 state->cpu_uavg = (state->cpu_uavg * 2.0 + state->cpu_qavg) / 3.0;
970 state->cpu_davg = (state->cpu_davg * srt + state->cpu_qavg) / (srt + 1);
971 if (state->cpu_davg < state->cpu_uavg)
972 state->cpu_davg = state->cpu_uavg;
974 ustate = state->cpu_uavg / TriggerUp;
975 if (ustate < state->cpu_limit)
976 ustate = state->cpu_uavg / TriggerDown;
977 dstate = state->cpu_davg / TriggerUp;
978 if (dstate < state->cpu_limit)
979 dstate = state->cpu_davg / TriggerDown;
981 nstate = (ustate > dstate) ? ustate : dstate;
982 if (nstate > state->cpu_count)
983 nstate = state->cpu_count;
986 printf("%s qavg=%5.2f uavg=%5.2f davg=%5.2f "
987 "%2d ncpus=%d\n", state->cpu_name,
988 state->cpu_qavg, state->cpu_uavg, state->cpu_davg,
989 state->cpu_limit, nstate);
997 cpumask_t ocpu_used, ocpu_pwrdom_used;
998 int pnstate = 0, nstate;
1002 * Find cpus requiring performance and their cooresponding power
1003 * domains. Save the number of cpus requiring performance in
1006 ocpu_used = cpu_used;
1007 ocpu_pwrdom_used = cpu_pwrdom_used;
1009 CPUMASK_ASSZERO(cpu_used);
1010 CPUMASK_ASSZERO(cpu_pwrdom_used);
1012 for (cpu = 0; cpu < NCpus; ++cpu) {
1013 struct cpu_state *state = &pcpu_state[cpu];
1016 s = get_nstate(state, srt);
1018 CPUMASK_ORBIT(cpu_used, cpu);
1019 CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
1023 state->cpu_limit = s;
1027 * Calculate nstate, the number of cpus we wish to run at max
1030 nstate = get_nstate(&global_cpu_state, srt);
1032 if (nstate == global_cpu_state.cpu_limit &&
1033 (pnstate == global_pcpu_limit || nstate > pnstate)) {
1034 /* Nothing changed; keep the sets */
1035 cpu_used = ocpu_used;
1036 cpu_pwrdom_used = ocpu_pwrdom_used;
1038 global_pcpu_limit = pnstate;
1041 global_pcpu_limit = pnstate;
1043 if (nstate > pnstate) {
1045 * Add spare cpus to meet global performance requirement.
1047 add_spare_cpus(ocpu_used, nstate - pnstate);
1050 global_cpu_state.cpu_limit = nstate;
1053 * Adjust cpu and cpu power domain performance
1055 adj_perf(ocpu_used, ocpu_pwrdom_used);
1059 add_spare_cpus(const cpumask_t ocpu_used, int ncpu)
1061 cpumask_t saved_pwrdom, xcpu_used;
1065 * Find more cpus in the previous cpu set.
1067 xcpu_used = cpu_used;
1068 CPUMASK_XORMASK(xcpu_used, ocpu_used);
1069 while (CPUMASK_TESTNZERO(xcpu_used)) {
1070 cpu = BSFCPUMASK(xcpu_used);
1071 CPUMASK_NANDBIT(xcpu_used, cpu);
1073 if (CPUMASK_TESTBIT(ocpu_used, cpu)) {
1074 CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
1075 CPUMASK_ORBIT(cpu_used, cpu);
1083 * Find more cpus in the used cpu power domains.
1085 saved_pwrdom = cpu_pwrdom_used;
1087 while (CPUMASK_TESTNZERO(saved_pwrdom)) {
1088 cpumask_t unused_cpumask;
1091 dom = BSFCPUMASK(saved_pwrdom);
1092 CPUMASK_NANDBIT(saved_pwrdom, dom);
1094 unused_cpumask = cpu_pwrdomain[dom]->dom_cpumask;
1095 CPUMASK_NANDMASK(unused_cpumask, cpu_used);
1097 while (CPUMASK_TESTNZERO(unused_cpumask)) {
1098 cpu = BSFCPUMASK(unused_cpumask);
1099 CPUMASK_NANDBIT(unused_cpumask, cpu);
1101 CPUMASK_ORBIT(cpu_pwrdom_used, dom);
1102 CPUMASK_ORBIT(cpu_used, cpu);
1111 * Find more cpus in unused cpu power domains
1113 saved_pwrdom = cpu_pwrdom_mask;
1114 CPUMASK_NANDMASK(saved_pwrdom, cpu_pwrdom_used);
1118 printf("%d cpus not found\n", ncpu);
1122 acpi_set_cpufreq(int dom, int inc)
1124 int lowest, highest, desired;
1127 acpi_get_cpufreq(dom, &highest, &lowest);
1128 if (highest == 0 || lowest == 0)
1130 desired = inc ? highest : lowest;
1133 printf("dom%d set frequency %d\n", dom, desired);
1134 snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.select", dom);
1135 sysctlbyname(sysid, NULL, NULL, &desired, sizeof(desired));
1139 adj_cpu_pwrdom(int dom, int inc)
1141 if (AdjustCpuFreq && (inc == 0 || AdjustCpuFreqOverride == 0))
1142 acpi_set_cpufreq(dom, inc);
1146 adj_cpu_perf(int cpu, int inc)
1150 printf("cpu%d increase perf\n", cpu);
1152 printf("cpu%d decrease perf\n", cpu);
1156 set_perfbias(cpu, inc);
1158 set_cstate(cpu, inc);
1162 adj_perf(cpumask_t xcpu_used, cpumask_t xcpu_pwrdom_used)
1164 cpumask_t old_usched_used;
1168 * Set cpus requiring performance to the userland process
1169 * scheduler. Leave the rest of cpus unmapped.
1171 old_usched_used = usched_cpu_used;
1172 usched_cpu_used = cpu_used;
1173 if (CPUMASK_TESTZERO(usched_cpu_used))
1174 CPUMASK_ORBIT(usched_cpu_used, 0);
1175 if (CPUMASK_CMPMASKNEQ(usched_cpu_used, old_usched_used))
1179 * Adjust per-cpu performance.
1181 CPUMASK_XORMASK(xcpu_used, cpu_used);
1182 while (CPUMASK_TESTNZERO(xcpu_used)) {
1183 cpu = BSFCPUMASK(xcpu_used);
1184 CPUMASK_NANDBIT(xcpu_used, cpu);
1186 if (CPUMASK_TESTBIT(cpu_used, cpu)) {
1187 /* Increase cpu performance */
1190 /* Decrease cpu performance */
1193 adj_cpu_perf(cpu, inc);
1197 * Adjust cpu power domain performance. This could affect
1200 CPUMASK_XORMASK(xcpu_pwrdom_used, cpu_pwrdom_used);
1201 while (CPUMASK_TESTNZERO(xcpu_pwrdom_used)) {
1204 dom = BSFCPUMASK(xcpu_pwrdom_used);
1205 CPUMASK_NANDBIT(xcpu_pwrdom_used, dom);
1207 if (CPUMASK_TESTBIT(cpu_pwrdom_used, dom)) {
1208 /* Increase cpu power domain performance */
1211 /* Decrease cpu power domain performance */
1214 adj_cpu_pwrdom(dom, inc);
1221 cpumask_t ocpu_used, ocpu_pwrdom_used;
1223 /* Remove highest cpu frequency limitation */
1226 ocpu_used = cpu_used;
1227 ocpu_pwrdom_used = cpu_pwrdom_used;
1229 /* Max out all cpus and cpu power domains performance */
1230 CPUMASK_ASSBMASK(cpu_used, NCpus);
1231 cpu_pwrdom_used = cpu_pwrdom_mask;
1233 adj_perf(ocpu_used, ocpu_pwrdom_used);
1237 * Restore the original mwait C-state
1240 printf("global set cstate %s\n", orig_global_cx);
1241 sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1242 orig_global_cx, strlen(orig_global_cx) + 1);
1249 char cx_supported[1024];
1252 int idle_hlt, deep = 1;
1255 len = sizeof(idle_hlt);
1256 if (sysctlbyname("machdep.cpu_idle_hlt", &idle_hlt, &len, NULL, 0) < 0)
1261 len = sizeof(cx_supported);
1262 if (sysctlbyname("machdep.mwait.CX.supported", cx_supported, &len,
1266 len = sizeof(orig_global_cx);
1267 if (sysctlbyname("machdep.mwait.CX.idle", orig_global_cx, &len,
1271 strlcpy(cpu_perf_cx, "AUTODEEP", sizeof(cpu_perf_cx));
1272 cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1273 if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1274 cpu_perf_cx, cpu_perf_cxlen) < 0) {
1275 /* AUTODEEP is not supported; try AUTO */
1277 strlcpy(cpu_perf_cx, "AUTO", sizeof(cpu_perf_cx));
1278 cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1279 if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1280 cpu_perf_cx, cpu_perf_cxlen) < 0)
1288 for (ptr = strtok(cx_supported, " "); ptr != NULL;
1289 ptr = strtok(NULL, " ")) {
1290 if (target == NULL ||
1291 (target != NULL && strcmp(ptr, target) == 0)) {
1292 strlcpy(cpu_idle_cx, ptr, sizeof(cpu_idle_cx));
1293 cpu_idle_cxlen = strlen(cpu_idle_cx) + 1;
1298 if (cpu_idle_cxlen == 0)
1302 printf("cstate orig %s, perf %s, idle %s\n",
1303 orig_global_cx, cpu_perf_cx, cpu_idle_cx);
1309 set_cstate(int cpu, int inc)
1317 len = cpu_perf_cxlen;
1320 len = cpu_idle_cxlen;
1324 printf("cpu%d set cstate %s\n", cpu, cst);
1325 snprintf(sysid, sizeof(sysid), "machdep.mwait.CX.idle%d", cpu);
1326 sysctlbyname(sysid, NULL, NULL, cst, len);
1330 restore_backlight(void)
1332 if (BackLightDown) {
1334 sysctlbyname("hw.backlight_level", NULL, NULL,
1335 &OldBackLightLevel, sizeof(OldBackLightLevel));
1340 * get_cputemp() / mon_cputemp()
1342 * This enforces the maximum cpu frequency based on temperature
1343 * verses MinTemp and MaxTemp.
1349 struct sensor sensor;
1355 for (n = 0; ; ++n) {
1357 snprintf(sysid, sizeof(sysid),
1358 "hw.sensors.cpu_node%d.temp0", n);
1359 sensor_size = sizeof(sensor);
1360 if (sysctlbyname(sysid, &sensor, &sensor_size, NULL, 0) < 0)
1363 if ((sensor.flags & (SENSOR_FINVALID | SENSOR_FUNKNOWN)) == 0) {
1364 t = (int)((sensor.value - 273150000) / 1000000);
1373 * Missing nodeN for some reason, try cpuN.
1375 for (n = 0; ; ++n) {
1377 snprintf(sysid, sizeof(sysid),
1378 "hw.sensors.cpu%d.temp0", n);
1379 sensor_size = sizeof(sensor);
1380 if (sysctlbyname(sysid, &sensor, &sensor_size, NULL, 0) < 0)
1383 if ((sensor.flags & (SENSOR_FINVALID | SENSOR_FUNKNOWN)) == 0) {
1384 t = (int)((sensor.value - 273150000) / 1000000);
1393 set_global_freq(int freq)
1396 sysctlbyname("hw.acpi.cpu.px_global",
1397 NULL, NULL, &freq, sizeof(freq));
1401 get_global_freq(void)
1407 freq_size = sizeof(freq);
1408 sysctlbyname("hw.acpi.cpu.px_global", &freq, &freq_size, NULL, 0);
1416 static int last_temp = -1;
1417 static int last_idx = -1;
1418 int temp = get_cputemp();
1422 static int CurPXGlobal __unused;
1425 * Reseed FreqAry, it can change w/AC power state
1427 acpi_get_cpufreq(0, &lowest, &highest);
1430 * Some cpu frequency steps can cause large shifts in cpu temperature,
1431 * creating an oscillation that min-maxes the temperature in a way
1432 * that is not desireable. To deal with this, we impose an exponential
1433 * average for any temperature change.
1435 * We have to do this in both directions, otherwise (in particular)
1436 * laptop fan responsiveness and temperature sensor response times
1437 * can create major frequency oscillations.
1439 if (last_temp < 0) {
1440 last_temp = temp << 8;
1441 } else if (temp < last_temp) {
1442 last_temp = (last_temp * 15 + (temp << 8)) / 16;
1444 printf("Falling temp %d (use %d)\n",
1445 temp, (last_temp >> 8));
1448 last_temp = (last_temp * 15 + (temp << 8)) / 16;
1450 printf("Rising temp %d (use %d)\n",
1451 temp, (last_temp >> 8));
1454 temp = last_temp >> 8;
1457 * CPU Temp not available or available frequencies not yet
1461 printf("Temp %d {%d-%d} NFreq=%d)\n",
1462 temp, MinTemp, MaxTemp, NFreq);
1469 * Return to normal operation if under the minimum
1471 if (temp <= MinTemp) {
1472 if (AdjustCpuFreqOverride) {
1473 AdjustCpuFreqOverride = 0;
1477 "Temp below %d, returning to normal operation",
1480 set_global_freq(SavedPXGlobal);
1486 * Hysteresis before entering temperature control mode
1488 if (AdjustCpuFreqOverride == 0 &&
1489 temp <= MinTemp + (MaxTemp - MinTemp) / 10 + 1) {
1494 * Override frequency controls (except for idle -> lowest)
1496 if (AdjustCpuFreqOverride == 0) {
1497 AdjustCpuFreqOverride = 1;
1498 SavedPXGlobal = get_global_freq();
1502 "Temp %d {%d-%d}, entering temperature control mode",
1503 temp, MinTemp, MaxTemp);
1505 if (temp > MaxTemp + (MaxTemp - MinTemp) / 10 + 1) {
1507 "Temp %d {%d-%d}, TOO HOT!!!",
1508 temp, MinTemp, MaxTemp);
1510 idx = (temp - MinTemp) * NFreq / (MaxTemp - MinTemp);
1511 if (idx < 0 || idx >= NFreq) /* overtemp */
1515 * Limit frequency shifts to single steps in both directions.
1516 * Some fans react very quickly, this will reduce oscillations.
1519 printf("Temp index %d (use %d)\n", idx, last_idx);
1520 if (last_idx >= 0 && idx < last_idx)
1522 else if (last_idx >= 0 && idx > last_idx)
1527 * One last thing, make sure our frequency adheres to
1528 * HighestCpuFreq. However, override LowestCpuFreq for
1529 * temperature control purposes.
1531 while (HighestCpuFreq > 0 && idx < NFreq &&
1532 FreqAry[idx] > HighestCpuFreq) {
1537 * Currently ignore LowestCpuFreq if temp control thinks it
1540 while (LowestCpuFreq > 0 && idx > 0 &&
1541 FreqAry[idx] < LowestCpuFreq) {
1546 if (FreqAry[idx] != CurPXGlobal) {
1547 CurPXGlobal = FreqAry[idx];
1550 /* this can get noisy so don't log for now */
1552 "Temp %d {%d-%d}, set frequency %d",
1553 temp, MinTemp, MaxTemp, CurPXGlobal);
1556 set_global_freq(CurPXGlobal);