2 * Copyright (c) 2010 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * The powerd daemon monitors the cpu load and adjusts cpu frequencies
37 * via hw.acpi.cpu.px_dom*.
40 #define _KERNEL_STRUCTURES
41 #include <sys/types.h>
42 #include <sys/sysctl.h>
43 #include <sys/kinfo.h>
45 #include <sys/queue.h>
46 #include <sys/soundcard.h>
48 #include <machine/cpufunc.h>
58 #define MAXDOM MAXCPU /* worst case, 1 cpu per domain */
63 TAILQ_ENTRY(cpu_pwrdom) dom_link;
66 cpumask_t dom_cpumask;
68 TAILQ_HEAD(cpu_pwrdom_list, cpu_pwrdom);
70 static void usage(void);
71 static double getcputime(double);
72 static void acpi_setcpufreq(int nstate);
73 static int setupdominfo(void);
74 static int has_battery(void);
75 static int mon_battery(void);
76 static void getncpus(void);
77 static void getuschedmask(void);
78 static int has_perfbias(void);
79 static void setperfbias(cpumask_t, int);
81 static struct cpu_pwrdom_list CpuPwrDomain;
82 static struct cpu_pwrdom *CpuPwrDomLimit;
83 static struct cpu_pwrdom CpuPwrDomLast;
84 static int NCpuPwrDomUsed;
87 static cpumask_t UschedCpumask;
90 int CpuLimit; /* # of cpus at max frequency */
93 int CpuCount[MAXDOM]; /* # of cpus in any given domain */
94 int Hysteresis = 10; /* percentage */
95 double TriggerUp = 0.25;/* single-cpu load to force max freq */
96 double TriggerDown; /* load per cpu to force the min freq */
97 static int BatLifeMin = 2; /* shutdown the box, if low on battery life */
98 static struct timespec BatLifePrevT;
99 static int BatLifePollIntvl = 5; /* unit: sec */
100 static int HasPerfbias = 1;
102 static struct timespec BatShutdownStartT;
103 static int BatShutdownLinger = -1;
104 static int BatShutdownLingerSet = 60; /* unit: sec */
105 static int BatShutdownLingerCnt;
106 static int BatShutdownAudioAlert = 1;
108 static void sigintr(int signo);
111 main(int ac, char **av)
114 double uavg; /* uavg - used for speeding up */
115 double davg; /* davg - used for slowing down */
125 srt = 8.0; /* time for samples - 8 seconds */
126 pollrate = 1.0; /* polling rate in seconds */
128 while ((ch = getopt(ac, av, "dep:r:tu:B:L:P:QT:")) != -1) {
137 Hysteresis = (int)strtol(optarg, NULL, 10);
140 pollrate = strtod(optarg, NULL);
146 TriggerUp = (double)strtol(optarg, NULL, 10) / 100;
149 BatLifeMin = strtol(optarg, NULL, 10);
152 BatShutdownLingerSet = strtol(optarg, NULL, 10);
153 if (BatShutdownLingerSet < 0)
154 BatShutdownLingerSet = 0;
157 BatLifePollIntvl = strtol(optarg, NULL, 10);
160 BatShutdownAudioAlert = 0;
163 srt = strtod(optarg, NULL);
173 /* Get the number of cpus */
176 /* Get usched cpumask */
179 if (0 > Hysteresis || Hysteresis > 99) {
180 fprintf(stderr, "Invalid hysteresis value\n");
184 if (0 > TriggerUp || TriggerUp > 1) {
185 fprintf(stderr, "Invalid load limit value\n");
189 TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100);
192 * Make sure powerd is not already running.
194 PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644);
197 "Cannot create /var/run/powerd.pid, "
198 "continuing anyway\n");
200 if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) {
201 fprintf(stderr, "powerd is already running\n");
207 * Demonize and set pid
211 openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON);
215 ftruncate(PowerFd, 0);
216 snprintf(buf, sizeof(buf), "%d\n", (int)getpid());
217 write(PowerFd, buf, strlen(buf));
220 /* Do we need to monitor battery life? */
221 if (BatLifePollIntvl <= 0)
224 monbat = has_battery();
227 HasPerfbias = has_perfbias();
230 * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel
232 * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
233 * taskqueue and ACPI taskqueue is shared across various
234 * ACPI modules, any delay in other modules may cause
235 * hw.acpi.cpu.px_dom* to be created at quite a later time
236 * (e.g. cmbat module's task could take quite a lot of time).
240 * Prime delta cputime calculation, make sure at least
243 getcputime(pollrate);
246 usleep((int)(pollrate * 1000000.0));
250 * Assume everything are used and are maxed out, before we
253 CpuPwrDomLimit = &CpuPwrDomLast;
257 * Set to maximum performance if killed.
259 signal(SIGINT, sigintr);
260 signal(SIGTERM, sigintr);
264 srt = srt / pollrate; /* convert to sample count */
267 printf("samples for downgrading: %5.2f\n", srt);
272 * Calculate nstate, the number of cpus we wish to run at max
273 * frequency. All remaining cpus will be set to their lowest
274 * frequency and mapped out of the user process scheduler.
277 qavg = getcputime(pollrate);
278 uavg = (uavg * 2.0 + qavg) / 3.0; /* speeding up */
279 davg = (davg * srt + qavg) / (srt + 1); /* slowing down */
283 ustate = uavg / TriggerUp;
284 if (ustate < CpuLimit)
285 ustate = uavg / TriggerDown;
286 dstate = davg / TriggerUp;
287 if (dstate < CpuLimit)
288 dstate = davg / TriggerDown;
290 nstate = (ustate > dstate) ? ustate : dstate;
295 printf("\rqavg=%5.2f uavg=%5.2f davg=%5.2f "
296 "%2d/%2d ncpus=%d\r",
298 CpuLimit, NCpuPwrDomUsed, nstate);
301 if (nstate != CpuLimit)
302 acpi_setcpufreq(nstate);
304 monbat = mon_battery();
305 usleep((int)(pollrate * 1000000.0));
311 sigintr(int signo __unused)
313 syslog(LOG_INFO, "killed, setting max and exiting");
314 acpi_setcpufreq(NCpus);
319 * Figure out the domains and calculate the CpuCount[] array.
324 struct cpu_pwrdom *dom;
325 struct cpu_pwrdom_list tmp_list;
332 TAILQ_INIT(&CpuPwrDomain);
336 TAILQ_INIT(&tmp_list);
337 for (i = 0; i < MAXDOM; ++i) {
338 snprintf(buf, sizeof(buf),
339 "hw.acpi.cpu.px_dom%d.available", i);
340 if (sysctlbyname(buf, NULL, NULL, NULL, 0) < 0)
343 dom = calloc(1, sizeof(*dom));
345 TAILQ_INSERT_TAIL(&tmp_list, dom, dom_link);
348 while ((dom = TAILQ_FIRST(&tmp_list)) != NULL) {
351 TAILQ_REMOVE(&tmp_list, dom, dom_link);
352 CPUMASK_ASSZERO(dom->dom_cpumask);
354 snprintf(buf, sizeof(buf),
355 "hw.acpi.cpu.px_dom%d.members", dom->dom_id);
356 msize = sizeof(members);
357 if (sysctlbyname(buf, members, &msize, NULL, 0) < 0) {
363 for (str = strtok(members, " "); str; str = strtok(NULL, " ")) {
365 sscanf(str, "cpu%d", &n);
371 CPUMASK_ORBIT(dom->dom_cpumask, n);
374 if (dom->dom_ncpus == 0) {
379 printf("dom%d cpumask: ", dom->dom_id);
380 for (i = 0; i < (int)NELEM(dom->dom_cpumask.ary); ++i) {
382 (uintmax_t)dom->dom_cpumask.ary[i]);
390 * Use the power domain containing the BSP as the first
391 * power domain. So if all CPUs are idle, we could
392 * leave BSP to the usched without too much trouble.
394 TAILQ_INSERT_HEAD(&CpuPwrDomain, dom, dom_link);
396 TAILQ_INSERT_TAIL(&CpuPwrDomain, dom, dom_link);
401 if (NCpus != TotalCpus) {
402 while ((dom = TAILQ_FIRST(&CpuPwrDomain)) != NULL) {
403 TAILQ_REMOVE(&CpuPwrDomain, dom, dom_link);
407 printf("Found %d cpus, expecting %d\n",
414 /* Install sentinel */
415 CpuPwrDomLast.dom_id = -1;
416 TAILQ_INSERT_TAIL(&CpuPwrDomain, &CpuPwrDomLast, dom_link);
422 * Return the one-second cpu load. One cpu at 100% will return a value
423 * of 1.0. On a SMP system N cpus running at 100% will return a value of N.
427 getcputime(double pollrate)
429 static struct kinfo_cputime ocpu_time[MAXCPU];
430 static struct kinfo_cputime ncpu_time[MAXCPU];
436 /* NOTE: Don't use NCpus here; it may not be initialized yet */
437 bcopy(ncpu_time, ocpu_time, sizeof(struct kinfo_cputime) * TotalCpus);
439 slen = sizeof(ncpu_time);
440 if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) {
441 fprintf(stderr, "kern.cputime sysctl not available\n");
444 ncpu = slen / sizeof(ncpu_time[0]);
447 for (cpu = 0; cpu < ncpu; ++cpu) {
448 delta += (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys +
449 ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) -
450 (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys +
451 ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr);
453 return((double)delta / (pollrate * 1000000.0));
457 acpi_getcpufreq_str(int dom_id, int *highest0, int *lowest0)
459 char buf[256], sysid[64];
462 int v, highest, lowest;
465 * Retrieve availability list
467 snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.available",
469 buflen = sizeof(buf) - 1;
470 if (sysctlbyname(sysid, buf, &buflen, NULL, 0) < 0)
475 * Parse out the highest and lowest cpu frequencies
478 highest = lowest = 0;
479 while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
480 if (lowest == 0 || lowest > v)
482 if (highest == 0 || highest < v)
487 if (!TurboOpt && highest - v == 1)
496 acpi_getcpufreq_bin(int dom_id, int *highest0, int *lowest0)
504 * Retrieve availability list
506 snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.avail", dom_id);
507 freqlen = sizeof(freq);
508 if (sysctlbyname(sysid, freq, &freqlen, NULL, 0) < 0)
511 freqcnt = freqlen / sizeof(freq[0]);
515 *lowest0 = freq[freqcnt - 1];
518 if (!TurboOpt && freqcnt > 1 && freq[0] - freq[1] == 1)
524 acpi_getcpufreq(int dom_id, int *highest, int *lowest)
529 if (acpi_getcpufreq_bin(dom_id, highest, lowest))
531 acpi_getcpufreq_str(dom_id, highest, lowest);
535 * nstate is the requested number of cpus that we wish to run at full
536 * frequency. We calculate how many domains we have to adjust to reach
539 * This function also sets the user scheduler global cpu mask.
542 acpi_setcpufreq(int nstate)
545 int increasing = (nstate > CpuLimit);
546 struct cpu_pwrdom *dom, *domBeg, *domEnd;
551 int force_uschedbsp = 0;
552 cpumask_t old_cpumask;
554 old_cpumask = UschedCpumask;
557 * Calculate the ending domain if the number of operating cpus
560 * Calculate the starting domain if the number of operating cpus
563 * Calculate the mask of cpus the userland scheduler is allowed
567 CPUMASK_ASSZERO(UschedCpumask);
568 for (dom = TAILQ_FIRST(&CpuPwrDomain); dom != &CpuPwrDomLast;
569 dom = TAILQ_NEXT(dom, dom_link)) {
574 ncpus += dom->dom_ncpus;
577 mask = dom->dom_cpumask;
578 if (ncpus > nstate) {
581 diff = ncpus - nstate;
582 for (i = 0; i < diff; ++i) {
585 c = BSRCPUMASK(mask);
586 CPUMASK_NANDBIT(mask, c);
589 CPUMASK_ORMASK(UschedCpumask, mask);
592 syslog(LOG_INFO, "using %d cpus", nstate);
595 * Set the mask of cpus the userland scheduler is allowed to use.
597 * Make sure that userland scheduler has at least one cpu.
599 if (CPUMASK_TESTZERO(UschedCpumask)) {
600 CPUMASK_ORBIT(UschedCpumask, 0);
606 printf("\nusched cpumask: ");
607 for (i = 0; i < (int)NELEM(UschedCpumask.ary); ++i)
608 printf("%jx ", (uintmax_t)UschedCpumask.ary[i]);
612 sysctlbyname("kern.usched_global_cpumask", NULL, 0,
613 &UschedCpumask, sizeof(UschedCpumask));
615 CPUMASK_NANDBIT(UschedCpumask, 0);
617 CPUMASK_XORMASK(old_cpumask, UschedCpumask);
620 * Set performance-energy bias
623 setperfbias(old_cpumask, increasing);
626 domBeg = CpuPwrDomLimit;
630 domEnd = CpuPwrDomLimit;
632 CpuPwrDomLimit = dom;
636 * Adjust the cpu frequency
638 for (dom = domBeg; dom != domEnd; dom = TAILQ_NEXT(dom, dom_link)) {
639 acpi_getcpufreq(dom->dom_id, &highest, &lowest);
640 if (highest == 0 || lowest == 0)
644 * Calculate the desired cpu frequency, test, and set.
646 desired = increasing ? highest : lowest;
648 snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.select",
651 printf("dom%d set frequency %d\n",
652 dom->dom_id, desired);
654 sysctlbyname(sysid, NULL, NULL, &desired, sizeof(desired));
662 fprintf(stderr, "usage: powerd [-dt] [-p hysteresis] "
663 "[-u trigger_up] [-T sample_interval] [-r poll_interval] "
664 "[-B min_battery_life] [-L low_battery_linger] "
665 "[-P battery_poll_interval] [-Q] [-e]\n");
670 #define timespecsub(vvp, uvp) \
672 (vvp)->tv_sec -= (uvp)->tv_sec; \
673 (vvp)->tv_nsec -= (uvp)->tv_nsec; \
674 if ((vvp)->tv_nsec < 0) { \
676 (vvp)->tv_nsec += 1000000000; \
681 #define BAT_SYSCTL_TIME_MAX 50000000 /* unit: nanosecond */
686 struct timespec s, e;
690 clock_gettime(CLOCK_MONOTONIC_FAST, &s);
694 if (sysctlbyname("hw.acpi.acline", &val, &len, NULL, 0) < 0) {
695 /* No AC line information */
698 clock_gettime(CLOCK_MONOTONIC_FAST, &e);
701 if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
702 /* hw.acpi.acline takes to long to be useful */
703 syslog(LOG_NOTICE, "hw.acpi.acline takes too long");
707 clock_gettime(CLOCK_MONOTONIC_FAST, &s);
709 if (sysctlbyname("hw.acpi.battery.life", &val, &len, NULL, 0) < 0) {
710 /* No battery life */
713 clock_gettime(CLOCK_MONOTONIC_FAST, &e);
716 if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
717 /* hw.acpi.battery.life takes to long to be useful */
718 syslog(LOG_NOTICE, "hw.acpi.battery.life takes too long");
725 low_battery_alert(int life)
727 int fmt, stereo, freq;
730 syslog(LOG_ALERT, "low battery life %d%%, please plugin AC line, #%d",
731 life, BatShutdownLingerCnt);
732 ++BatShutdownLingerCnt;
734 if (!BatShutdownAudioAlert)
737 fd = open("/dev/dsp", O_WRONLY);
742 if (ioctl(fd, SNDCTL_DSP_SETFMT, &fmt, sizeof(fmt)) < 0)
746 if (ioctl(fd, SNDCTL_DSP_STEREO, &stereo, sizeof(stereo)) < 0)
750 if (ioctl(fd, SNDCTL_DSP_SPEED, &freq, sizeof(freq)) < 0)
753 write(fd, alert1, sizeof(alert1));
754 write(fd, alert1, sizeof(alert1));
763 struct timespec cur, ts;
767 clock_gettime(CLOCK_MONOTONIC_FAST, &cur);
769 timespecsub(&ts, &BatLifePrevT);
770 if (ts.tv_sec < BatLifePollIntvl)
774 len = sizeof(acline);
775 if (sysctlbyname("hw.acpi.acline", &acline, &len, NULL, 0) < 0)
778 BatShutdownLinger = -1;
779 BatShutdownLingerCnt = 0;
784 if (sysctlbyname("hw.acpi.battery.life", &life, &len, NULL, 0) < 0)
787 if (BatShutdownLinger > 0) {
789 timespecsub(&ts, &BatShutdownStartT);
790 if (ts.tv_sec > BatShutdownLinger)
791 BatShutdownLinger = 0;
794 if (life <= BatLifeMin) {
795 if (BatShutdownLinger == 0 || BatShutdownLingerSet == 0) {
796 syslog(LOG_ALERT, "low battery life %d%%, "
797 "shutting down", life);
799 execlp("poweroff", "poweroff", NULL);
801 } else if (BatShutdownLinger < 0) {
802 BatShutdownLinger = BatShutdownLingerSet;
803 BatShutdownStartT = cur;
805 low_battery_alert(life);
815 slen = sizeof(TotalCpus);
816 if (sysctlbyname("hw.ncpu", &TotalCpus, &slen, NULL, 0) < 0)
817 err(1, "sysctlbyname hw.ncpu failed");
819 printf("hw.ncpu %d\n", TotalCpus);
827 slen = sizeof(UschedCpumask);
828 if (sysctlbyname("kern.usched_global_cpumask", &UschedCpumask, &slen,
830 err(1, "sysctlbyname kern.usched_global_cpumask failed");
834 printf("usched cpumask was: ");
835 for (i = 0; i < (int)NELEM(UschedCpumask.ary); ++i)
836 printf("%jx ", (uintmax_t)UschedCpumask.ary[i]);
849 if (sysctlbyname("machdep.perfbias0.hint", &hint, &len, NULL, 0) < 0)
855 setperfbias(cpumask_t mask, int increasing)
857 int hint = increasing ? 0 : 15;
859 while (CPUMASK_TESTNZERO(mask)) {
863 cpu = BSFCPUMASK(mask);
864 CPUMASK_NANDBIT(mask, cpu);
866 snprintf(sysid, sizeof(sysid), "machdep.perfbias%d.hint", cpu);
867 sysctlbyname(sysid, NULL, NULL, &hint, sizeof(hint));
869 printf("cpu%d set perfbias hint %d\n", cpu, hint);