2 * Copyright (c) 2010 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * The powerd daemon monitors the cpu load and adjusts cpu frequencies
37 * via hw.acpi.cpu.px_dom*.
40 #define _KERNEL_STRUCTURES
41 #include <sys/types.h>
42 #include <sys/sysctl.h>
43 #include <sys/kinfo.h>
45 #include <sys/queue.h>
46 #include <sys/soundcard.h>
48 #include <machine/cpufunc.h>
58 #define MAXDOM MAXCPU /* worst case, 1 cpu per domain */
63 TAILQ_ENTRY(cpu_pwrdom) dom_link;
66 cpumask_t dom_cpumask;
68 TAILQ_HEAD(cpu_pwrdom_list, cpu_pwrdom);
70 static void usage(void);
71 static double getcputime(double);
72 static void acpi_setcpufreq(int nstate);
73 static int setupdominfo(void);
74 static int has_battery(void);
75 static int mon_battery(void);
76 static void getncpus(void);
77 static void getuschedmask(void);
78 static int has_perfbias(void);
79 static void setperfbias(cpumask_t, int);
81 static struct cpu_pwrdom_list CpuPwrDomain;
82 static struct cpu_pwrdom *CpuPwrDomLimit;
83 static struct cpu_pwrdom CpuPwrDomLast;
84 static int NCpuPwrDomUsed;
87 static cpumask_t UschedCpumask;
89 static int TurboOpt = 1;
90 static int CpuLimit; /* # of cpus at max frequency */
92 static int Hysteresis = 10; /* percentage */
93 static double TriggerUp = 0.25; /* single-cpu load to force max freq */
94 static double TriggerDown; /* load per cpu to force the min freq */
95 static int BatLifeMin = 2; /* shutdown the box, if low on battery life */
96 static struct timespec BatLifePrevT;
97 static int BatLifePollIntvl = 5; /* unit: sec */
98 static int HasPerfbias = 1;
100 static struct timespec BatShutdownStartT;
101 static int BatShutdownLinger = -1;
102 static int BatShutdownLingerSet = 60; /* unit: sec */
103 static int BatShutdownLingerCnt;
104 static int BatShutdownAudioAlert = 1;
106 static void sigintr(int signo);
109 main(int ac, char **av)
112 double uavg; /* uavg - used for speeding up */
113 double davg; /* davg - used for slowing down */
123 srt = 8.0; /* time for samples - 8 seconds */
124 pollrate = 1.0; /* polling rate in seconds */
126 while ((ch = getopt(ac, av, "dep:r:tu:B:L:P:QT:")) != -1) {
135 Hysteresis = (int)strtol(optarg, NULL, 10);
138 pollrate = strtod(optarg, NULL);
144 TriggerUp = (double)strtol(optarg, NULL, 10) / 100;
147 BatLifeMin = strtol(optarg, NULL, 10);
150 BatShutdownLingerSet = strtol(optarg, NULL, 10);
151 if (BatShutdownLingerSet < 0)
152 BatShutdownLingerSet = 0;
155 BatLifePollIntvl = strtol(optarg, NULL, 10);
158 BatShutdownAudioAlert = 0;
161 srt = strtod(optarg, NULL);
171 /* Get the number of cpus */
174 /* Get usched cpumask */
177 if (0 > Hysteresis || Hysteresis > 99) {
178 fprintf(stderr, "Invalid hysteresis value\n");
182 if (0 > TriggerUp || TriggerUp > 1) {
183 fprintf(stderr, "Invalid load limit value\n");
187 TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100);
190 * Make sure powerd is not already running.
192 PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644);
195 "Cannot create /var/run/powerd.pid, "
196 "continuing anyway\n");
198 if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) {
199 fprintf(stderr, "powerd is already running\n");
205 * Demonize and set pid
209 openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON);
213 ftruncate(PowerFd, 0);
214 snprintf(buf, sizeof(buf), "%d\n", (int)getpid());
215 write(PowerFd, buf, strlen(buf));
218 /* Do we need to monitor battery life? */
219 if (BatLifePollIntvl <= 0)
222 monbat = has_battery();
225 HasPerfbias = has_perfbias();
228 * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel
230 * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
231 * taskqueue and ACPI taskqueue is shared across various
232 * ACPI modules, any delay in other modules may cause
233 * hw.acpi.cpu.px_dom* to be created at quite a later time
234 * (e.g. cmbat module's task could take quite a lot of time).
238 * Prime delta cputime calculation, make sure at least
241 getcputime(pollrate);
244 usleep((int)(pollrate * 1000000.0));
248 * Assume everything are used and are maxed out, before we
251 CpuPwrDomLimit = &CpuPwrDomLast;
255 * Set to maximum performance if killed.
257 signal(SIGINT, sigintr);
258 signal(SIGTERM, sigintr);
262 srt = srt / pollrate; /* convert to sample count */
265 printf("samples for downgrading: %5.2f\n", srt);
270 * Calculate nstate, the number of cpus we wish to run at max
271 * frequency. All remaining cpus will be set to their lowest
272 * frequency and mapped out of the user process scheduler.
275 qavg = getcputime(pollrate);
276 uavg = (uavg * 2.0 + qavg) / 3.0; /* speeding up */
277 davg = (davg * srt + qavg) / (srt + 1); /* slowing down */
281 ustate = uavg / TriggerUp;
282 if (ustate < CpuLimit)
283 ustate = uavg / TriggerDown;
284 dstate = davg / TriggerUp;
285 if (dstate < CpuLimit)
286 dstate = davg / TriggerDown;
288 nstate = (ustate > dstate) ? ustate : dstate;
293 printf("\rqavg=%5.2f uavg=%5.2f davg=%5.2f "
294 "%2d/%2d ncpus=%d\r",
296 CpuLimit, NCpuPwrDomUsed, nstate);
299 if (nstate != CpuLimit)
300 acpi_setcpufreq(nstate);
302 monbat = mon_battery();
303 usleep((int)(pollrate * 1000000.0));
309 sigintr(int signo __unused)
311 syslog(LOG_INFO, "killed, setting max and exiting");
312 acpi_setcpufreq(NCpus);
317 * Figure out the CPU power domains.
322 struct cpu_pwrdom *dom;
323 struct cpu_pwrdom_list tmp_list;
330 TAILQ_INIT(&CpuPwrDomain);
333 TAILQ_INIT(&tmp_list);
334 for (i = 0; i < MAXDOM; ++i) {
335 snprintf(buf, sizeof(buf),
336 "hw.acpi.cpu.px_dom%d.available", i);
337 if (sysctlbyname(buf, NULL, NULL, NULL, 0) < 0)
340 dom = calloc(1, sizeof(*dom));
342 TAILQ_INSERT_TAIL(&tmp_list, dom, dom_link);
345 while ((dom = TAILQ_FIRST(&tmp_list)) != NULL) {
348 TAILQ_REMOVE(&tmp_list, dom, dom_link);
349 CPUMASK_ASSZERO(dom->dom_cpumask);
351 snprintf(buf, sizeof(buf),
352 "hw.acpi.cpu.px_dom%d.members", dom->dom_id);
353 msize = sizeof(members);
354 if (sysctlbyname(buf, members, &msize, NULL, 0) < 0) {
360 for (str = strtok(members, " "); str; str = strtok(NULL, " ")) {
362 sscanf(str, "cpu%d", &n);
368 CPUMASK_ORBIT(dom->dom_cpumask, n);
371 if (dom->dom_ncpus == 0) {
376 printf("dom%d cpumask: ", dom->dom_id);
377 for (i = 0; i < (int)NELEM(dom->dom_cpumask.ary); ++i) {
379 (uintmax_t)dom->dom_cpumask.ary[i]);
387 * Use the power domain containing the BSP as the first
388 * power domain. So if all CPUs are idle, we could
389 * leave BSP to the usched without too much trouble.
391 TAILQ_INSERT_HEAD(&CpuPwrDomain, dom, dom_link);
393 TAILQ_INSERT_TAIL(&CpuPwrDomain, dom, dom_link);
399 while ((dom = TAILQ_FIRST(&CpuPwrDomain)) != NULL) {
400 TAILQ_REMOVE(&CpuPwrDomain, dom, dom_link);
404 printf("Found %d cpus, expecting %d\n", ncpu, NCpus);
410 /* Install sentinel */
411 CpuPwrDomLast.dom_id = -1;
412 TAILQ_INSERT_TAIL(&CpuPwrDomain, &CpuPwrDomLast, dom_link);
418 * Return the one-second cpu load. One cpu at 100% will return a value
419 * of 1.0. On a SMP system N cpus running at 100% will return a value of N.
423 getcputime(double pollrate)
425 static struct kinfo_cputime ocpu_time[MAXCPU];
426 static struct kinfo_cputime ncpu_time[MAXCPU];
432 bcopy(ncpu_time, ocpu_time, sizeof(struct kinfo_cputime) * NCpus);
434 slen = sizeof(ncpu_time);
435 if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) {
436 fprintf(stderr, "kern.cputime sysctl not available\n");
439 ncpu = slen / sizeof(ncpu_time[0]);
442 for (cpu = 0; cpu < ncpu; ++cpu) {
443 delta += (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys +
444 ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) -
445 (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys +
446 ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr);
448 return((double)delta / (pollrate * 1000000.0));
452 acpi_getcpufreq_str(int dom_id, int *highest0, int *lowest0)
454 char buf[256], sysid[64];
457 int v, highest, lowest;
460 * Retrieve availability list
462 snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.available",
464 buflen = sizeof(buf) - 1;
465 if (sysctlbyname(sysid, buf, &buflen, NULL, 0) < 0)
470 * Parse out the highest and lowest cpu frequencies
473 highest = lowest = 0;
474 while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
475 if (lowest == 0 || lowest > v)
477 if (highest == 0 || highest < v)
482 if (!TurboOpt && highest - v == 1)
491 acpi_getcpufreq_bin(int dom_id, int *highest0, int *lowest0)
499 * Retrieve availability list
501 snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.avail", dom_id);
502 freqlen = sizeof(freq);
503 if (sysctlbyname(sysid, freq, &freqlen, NULL, 0) < 0)
506 freqcnt = freqlen / sizeof(freq[0]);
510 *lowest0 = freq[freqcnt - 1];
513 if (!TurboOpt && freqcnt > 1 && freq[0] - freq[1] == 1)
519 acpi_getcpufreq(int dom_id, int *highest, int *lowest)
524 if (acpi_getcpufreq_bin(dom_id, highest, lowest))
526 acpi_getcpufreq_str(dom_id, highest, lowest);
530 * nstate is the requested number of cpus that we wish to run at full
531 * frequency. We calculate how many domains we have to adjust to reach
534 * This function also sets the user scheduler global cpu mask.
537 acpi_setcpufreq(int nstate)
540 int increasing = (nstate > CpuLimit);
541 struct cpu_pwrdom *dom, *domBeg, *domEnd;
546 int force_uschedbsp = 0;
547 cpumask_t old_cpumask;
549 old_cpumask = UschedCpumask;
552 * Calculate the ending domain if the number of operating cpus
555 * Calculate the starting domain if the number of operating cpus
558 * Calculate the mask of cpus the userland scheduler is allowed
562 CPUMASK_ASSZERO(UschedCpumask);
563 for (dom = TAILQ_FIRST(&CpuPwrDomain); dom != &CpuPwrDomLast;
564 dom = TAILQ_NEXT(dom, dom_link)) {
569 ncpus += dom->dom_ncpus;
572 mask = dom->dom_cpumask;
573 if (ncpus > nstate) {
576 diff = ncpus - nstate;
577 for (i = 0; i < diff; ++i) {
580 c = BSRCPUMASK(mask);
581 CPUMASK_NANDBIT(mask, c);
584 CPUMASK_ORMASK(UschedCpumask, mask);
587 syslog(LOG_INFO, "using %d cpus", nstate);
590 * Set the mask of cpus the userland scheduler is allowed to use.
592 * Make sure that userland scheduler has at least one cpu.
594 if (CPUMASK_TESTZERO(UschedCpumask)) {
595 CPUMASK_ORBIT(UschedCpumask, 0);
601 printf("\nusched cpumask: ");
602 for (i = 0; i < (int)NELEM(UschedCpumask.ary); ++i)
603 printf("%jx ", (uintmax_t)UschedCpumask.ary[i]);
607 sysctlbyname("kern.usched_global_cpumask", NULL, 0,
608 &UschedCpumask, sizeof(UschedCpumask));
610 CPUMASK_NANDBIT(UschedCpumask, 0);
612 CPUMASK_XORMASK(old_cpumask, UschedCpumask);
615 * Set performance-energy bias
618 setperfbias(old_cpumask, increasing);
621 domBeg = CpuPwrDomLimit;
625 domEnd = CpuPwrDomLimit;
627 CpuPwrDomLimit = dom;
631 * Adjust the cpu frequency
633 for (dom = domBeg; dom != domEnd; dom = TAILQ_NEXT(dom, dom_link)) {
634 acpi_getcpufreq(dom->dom_id, &highest, &lowest);
635 if (highest == 0 || lowest == 0)
639 * Calculate the desired cpu frequency, test, and set.
641 desired = increasing ? highest : lowest;
643 snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.select",
646 printf("dom%d set frequency %d\n",
647 dom->dom_id, desired);
649 sysctlbyname(sysid, NULL, NULL, &desired, sizeof(desired));
657 fprintf(stderr, "usage: powerd [-dt] [-p hysteresis] "
658 "[-u trigger_up] [-T sample_interval] [-r poll_interval] "
659 "[-B min_battery_life] [-L low_battery_linger] "
660 "[-P battery_poll_interval] [-Q] [-e]\n");
665 #define timespecsub(vvp, uvp) \
667 (vvp)->tv_sec -= (uvp)->tv_sec; \
668 (vvp)->tv_nsec -= (uvp)->tv_nsec; \
669 if ((vvp)->tv_nsec < 0) { \
671 (vvp)->tv_nsec += 1000000000; \
676 #define BAT_SYSCTL_TIME_MAX 50000000 /* unit: nanosecond */
681 struct timespec s, e;
685 clock_gettime(CLOCK_MONOTONIC_FAST, &s);
689 if (sysctlbyname("hw.acpi.acline", &val, &len, NULL, 0) < 0) {
690 /* No AC line information */
693 clock_gettime(CLOCK_MONOTONIC_FAST, &e);
696 if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
697 /* hw.acpi.acline takes to long to be useful */
698 syslog(LOG_NOTICE, "hw.acpi.acline takes too long");
702 clock_gettime(CLOCK_MONOTONIC_FAST, &s);
704 if (sysctlbyname("hw.acpi.battery.life", &val, &len, NULL, 0) < 0) {
705 /* No battery life */
708 clock_gettime(CLOCK_MONOTONIC_FAST, &e);
711 if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
712 /* hw.acpi.battery.life takes to long to be useful */
713 syslog(LOG_NOTICE, "hw.acpi.battery.life takes too long");
720 low_battery_alert(int life)
722 int fmt, stereo, freq;
725 syslog(LOG_ALERT, "low battery life %d%%, please plugin AC line, #%d",
726 life, BatShutdownLingerCnt);
727 ++BatShutdownLingerCnt;
729 if (!BatShutdownAudioAlert)
732 fd = open("/dev/dsp", O_WRONLY);
737 if (ioctl(fd, SNDCTL_DSP_SETFMT, &fmt, sizeof(fmt)) < 0)
741 if (ioctl(fd, SNDCTL_DSP_STEREO, &stereo, sizeof(stereo)) < 0)
745 if (ioctl(fd, SNDCTL_DSP_SPEED, &freq, sizeof(freq)) < 0)
748 write(fd, alert1, sizeof(alert1));
749 write(fd, alert1, sizeof(alert1));
758 struct timespec cur, ts;
762 clock_gettime(CLOCK_MONOTONIC_FAST, &cur);
764 timespecsub(&ts, &BatLifePrevT);
765 if (ts.tv_sec < BatLifePollIntvl)
769 len = sizeof(acline);
770 if (sysctlbyname("hw.acpi.acline", &acline, &len, NULL, 0) < 0)
773 BatShutdownLinger = -1;
774 BatShutdownLingerCnt = 0;
779 if (sysctlbyname("hw.acpi.battery.life", &life, &len, NULL, 0) < 0)
782 if (BatShutdownLinger > 0) {
784 timespecsub(&ts, &BatShutdownStartT);
785 if (ts.tv_sec > BatShutdownLinger)
786 BatShutdownLinger = 0;
789 if (life <= BatLifeMin) {
790 if (BatShutdownLinger == 0 || BatShutdownLingerSet == 0) {
791 syslog(LOG_ALERT, "low battery life %d%%, "
792 "shutting down", life);
794 execlp("poweroff", "poweroff", NULL);
796 } else if (BatShutdownLinger < 0) {
797 BatShutdownLinger = BatShutdownLingerSet;
798 BatShutdownStartT = cur;
800 low_battery_alert(life);
810 slen = sizeof(NCpus);
811 if (sysctlbyname("hw.ncpu", &NCpus, &slen, NULL, 0) < 0)
812 err(1, "sysctlbyname hw.ncpu failed");
814 printf("hw.ncpu %d\n", NCpus);
822 slen = sizeof(UschedCpumask);
823 if (sysctlbyname("kern.usched_global_cpumask", &UschedCpumask, &slen,
825 err(1, "sysctlbyname kern.usched_global_cpumask failed");
829 printf("usched cpumask was: ");
830 for (i = 0; i < (int)NELEM(UschedCpumask.ary); ++i)
831 printf("%jx ", (uintmax_t)UschedCpumask.ary[i]);
844 if (sysctlbyname("machdep.perfbias0.hint", &hint, &len, NULL, 0) < 0)
850 setperfbias(cpumask_t mask, int increasing)
852 int hint = increasing ? 0 : 15;
854 while (CPUMASK_TESTNZERO(mask)) {
858 cpu = BSFCPUMASK(mask);
859 CPUMASK_NANDBIT(mask, cpu);
861 snprintf(sysid, sizeof(sysid), "machdep.perfbias%d.hint", cpu);
862 sysctlbyname(sysid, NULL, NULL, &hint, sizeof(hint));
864 printf("cpu%d set perfbias hint %d\n", cpu, hint);