2 * Copyright (c) 2010 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * The powerd daemon monitors the cpu load and adjusts cpu frequencies
37 * via hw.acpi.cpu.px_dom*.
40 #define _KERNEL_STRUCTURES
41 #include <sys/types.h>
42 #include <sys/sysctl.h>
43 #include <sys/kinfo.h>
45 #include <sys/queue.h>
46 #include <sys/soundcard.h>
48 #include <machine/cpufunc.h>
58 #define MAXDOM MAXCPU /* worst case, 1 cpu per domain */
63 TAILQ_ENTRY(cpu_pwrdom) dom_link;
66 cpumask_t dom_cpumask;
68 TAILQ_HEAD(cpu_pwrdom_list, cpu_pwrdom);
70 static void usage(void);
71 static double getcputime(double);
72 static void acpi_setcpufreq(int nstate);
73 static int setupdominfo(void);
74 static int has_battery(void);
75 static int mon_battery(void);
76 static void getncpus(void);
78 static struct cpu_pwrdom_list CpuPwrDomain;
79 static struct cpu_pwrdom *CpuPwrDomLimit;
80 static struct cpu_pwrdom CpuPwrDomLast;
81 static int NCpuPwrDomUsed;
86 int CpuLimit; /* # of cpus at max frequency */
89 int CpuCount[MAXDOM]; /* # of cpus in any given domain */
90 int Hysteresis = 10; /* percentage */
91 double TriggerUp = 0.25;/* single-cpu load to force max freq */
92 double TriggerDown; /* load per cpu to force the min freq */
93 static int BatLifeMin = 2; /* shutdown the box, if low on battery life */
94 static struct timespec BatLifePrevT;
95 static int BatLifePollIntvl = 5; /* unit: sec */
97 static struct timespec BatShutdownStartT;
98 static int BatShutdownLinger = -1;
99 static int BatShutdownLingerSet = 60; /* unit: sec */
100 static int BatShutdownLingerCnt;
101 static int BatShutdownAudioAlert = 1;
103 static void sigintr(int signo);
106 main(int ac, char **av)
109 double uavg; /* uavg - used for speeding up */
110 double davg; /* davg - used for slowing down */
120 srt = 8.0; /* time for samples - 8 seconds */
121 pollrate = 1.0; /* polling rate in seconds */
123 while ((ch = getopt(ac, av, "dp:r:tu:B:L:P:QT:")) != -1) {
129 Hysteresis = (int)strtol(optarg, NULL, 10);
132 pollrate = strtod(optarg, NULL);
138 TriggerUp = (double)strtol(optarg, NULL, 10) / 100;
141 BatLifeMin = strtol(optarg, NULL, 10);
144 BatShutdownLingerSet = strtol(optarg, NULL, 10);
145 if (BatShutdownLingerSet < 0)
146 BatShutdownLingerSet = 0;
149 BatLifePollIntvl = strtol(optarg, NULL, 10);
152 BatShutdownAudioAlert = 0;
155 srt = strtod(optarg, NULL);
165 /* Get the number of cpus */
168 if (0 > Hysteresis || Hysteresis > 99) {
169 fprintf(stderr, "Invalid hysteresis value\n");
173 if (0 > TriggerUp || TriggerUp > 1) {
174 fprintf(stderr, "Invalid load limit value\n");
178 TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100);
181 * Make sure powerd is not already running.
183 PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644);
186 "Cannot create /var/run/powerd.pid, "
187 "continuing anyway\n");
189 if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) {
190 fprintf(stderr, "powerd is already running\n");
196 * Demonize and set pid
200 openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON);
204 ftruncate(PowerFd, 0);
205 snprintf(buf, sizeof(buf), "%d\n", (int)getpid());
206 write(PowerFd, buf, strlen(buf));
209 /* Do we need to monitor battery life? */
210 if (BatLifePollIntvl <= 0)
213 monbat = has_battery();
216 * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel
218 * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
219 * taskqueue and ACPI taskqueue is shared across various
220 * ACPI modules, any delay in other modules may cause
221 * hw.acpi.cpu.px_dom* to be created at quite a later time
222 * (e.g. cmbat module's task could take quite a lot of time).
226 * Prime delta cputime calculation, make sure at least
229 getcputime(pollrate);
232 usleep((int)(pollrate * 1000000.0));
236 * Assume everything are used and are maxed out, before we
239 CpuPwrDomLimit = &CpuPwrDomLast;
243 * Set to maximum performance if killed.
245 signal(SIGINT, sigintr);
246 signal(SIGTERM, sigintr);
250 srt = srt / pollrate; /* convert to sample count */
253 printf("samples for downgrading: %5.2f\n", srt);
258 * Calculate nstate, the number of cpus we wish to run at max
259 * frequency. All remaining cpus will be set to their lowest
260 * frequency and mapped out of the user process scheduler.
263 qavg = getcputime(pollrate);
264 uavg = (uavg * 2.0 + qavg) / 3.0; /* speeding up */
265 davg = (davg * srt + qavg) / (srt + 1); /* slowing down */
269 ustate = uavg / TriggerUp;
270 if (ustate < CpuLimit)
271 ustate = uavg / TriggerDown;
272 dstate = davg / TriggerUp;
273 if (dstate < CpuLimit)
274 dstate = davg / TriggerDown;
276 nstate = (ustate > dstate) ? ustate : dstate;
281 printf("\rqavg=%5.2f uavg=%5.2f davg=%5.2f "
282 "%2d/%2d ncpus=%d\r",
284 CpuLimit, NCpuPwrDomUsed, nstate);
287 if (nstate != CpuLimit)
288 acpi_setcpufreq(nstate);
290 monbat = mon_battery();
291 usleep((int)(pollrate * 1000000.0));
297 sigintr(int signo __unused)
299 syslog(LOG_INFO, "killed, setting max and exiting");
300 acpi_setcpufreq(NCpus);
305 * Figure out the domains and calculate the CpuCount[] array.
310 struct cpu_pwrdom *dom;
311 struct cpu_pwrdom_list tmp_list;
318 TAILQ_INIT(&CpuPwrDomain);
322 TAILQ_INIT(&tmp_list);
323 for (i = 0; i < MAXDOM; ++i) {
324 snprintf(buf, sizeof(buf),
325 "hw.acpi.cpu.px_dom%d.available", i);
326 if (sysctlbyname(buf, NULL, NULL, NULL, 0) < 0)
329 dom = calloc(1, sizeof(*dom));
331 TAILQ_INSERT_TAIL(&tmp_list, dom, dom_link);
334 while ((dom = TAILQ_FIRST(&tmp_list)) != NULL) {
337 TAILQ_REMOVE(&tmp_list, dom, dom_link);
338 CPUMASK_ASSZERO(dom->dom_cpumask);
340 snprintf(buf, sizeof(buf),
341 "hw.acpi.cpu.px_dom%d.members", dom->dom_id);
342 msize = sizeof(members);
343 if (sysctlbyname(buf, members, &msize, NULL, 0) < 0) {
349 for (str = strtok(members, " "); str; str = strtok(NULL, " ")) {
351 sscanf(str, "cpu%d", &n);
357 CPUMASK_ORBIT(dom->dom_cpumask, n);
360 if (dom->dom_ncpus == 0) {
365 printf("dom%d cpumask: ", dom->dom_id);
366 for (i = 0; i < (int)NELEM(dom->dom_cpumask.ary); ++i) {
368 (uintmax_t)dom->dom_cpumask.ary[i]);
376 * Use the power domain containing the BSP as the first
377 * power domain. So if all CPUs are idle, we could
378 * leave BSP to the usched without too much trouble.
380 TAILQ_INSERT_HEAD(&CpuPwrDomain, dom, dom_link);
382 TAILQ_INSERT_TAIL(&CpuPwrDomain, dom, dom_link);
387 if (NCpus != TotalCpus) {
388 while ((dom = TAILQ_FIRST(&CpuPwrDomain)) != NULL) {
389 TAILQ_REMOVE(&CpuPwrDomain, dom, dom_link);
393 printf("Found %d cpus, expecting %d\n",
400 /* Install sentinel */
401 CpuPwrDomLast.dom_id = -1;
402 TAILQ_INSERT_TAIL(&CpuPwrDomain, &CpuPwrDomLast, dom_link);
408 * Return the one-second cpu load. One cpu at 100% will return a value
409 * of 1.0. On a SMP system N cpus running at 100% will return a value of N.
413 getcputime(double pollrate)
415 static struct kinfo_cputime ocpu_time[MAXCPU];
416 static struct kinfo_cputime ncpu_time[MAXCPU];
422 /* NOTE: Don't use NCpus here; it may not be initialized yet */
423 bcopy(ncpu_time, ocpu_time, sizeof(struct kinfo_cputime) * TotalCpus);
425 slen = sizeof(ncpu_time);
426 if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) {
427 fprintf(stderr, "kern.cputime sysctl not available\n");
430 ncpu = slen / sizeof(ncpu_time[0]);
433 for (cpu = 0; cpu < ncpu; ++cpu) {
434 delta += (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys +
435 ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) -
436 (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys +
437 ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr);
439 return((double)delta / (pollrate * 1000000.0));
443 acpi_getcpufreq_str(int dom_id, int *highest0, int *lowest0)
445 char buf[256], sysid[64];
448 int v, highest, lowest;
451 * Retrieve availability list
453 snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.available",
455 buflen = sizeof(buf) - 1;
456 if (sysctlbyname(sysid, buf, &buflen, NULL, 0) < 0)
461 * Parse out the highest and lowest cpu frequencies
464 highest = lowest = 0;
465 while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
466 if (lowest == 0 || lowest > v)
468 if (highest == 0 || highest < v)
473 if (!TurboOpt && highest - v == 1)
482 acpi_getcpufreq_bin(int dom_id, int *highest0, int *lowest0)
490 * Retrieve availability list
492 snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.avail", dom_id);
493 freqlen = sizeof(freq);
494 if (sysctlbyname(sysid, freq, &freqlen, NULL, 0) < 0)
497 freqcnt = freqlen / sizeof(freq[0]);
501 *lowest0 = freq[freqcnt - 1];
504 if (!TurboOpt && freqcnt > 1 && freq[0] - freq[1] == 1)
510 acpi_getcpufreq(int dom_id, int *highest, int *lowest)
515 if (acpi_getcpufreq_bin(dom_id, highest, lowest))
517 acpi_getcpufreq_str(dom_id, highest, lowest);
521 * nstate is the requested number of cpus that we wish to run at full
522 * frequency. We calculate how many domains we have to adjust to reach
525 * This function also sets the user scheduler global cpu mask.
528 acpi_setcpufreq(int nstate)
531 int increasing = (nstate > CpuLimit);
532 struct cpu_pwrdom *dom, *domBeg, *domEnd;
537 cpumask_t global_cpumask;
540 * Calculate the ending domain if the number of operating cpus
543 * Calculate the starting domain if the number of operating cpus
546 * Calculate the mask of cpus the userland scheduler is allowed
550 CPUMASK_ASSZERO(global_cpumask);
551 for (dom = TAILQ_FIRST(&CpuPwrDomain); dom != &CpuPwrDomLast;
552 dom = TAILQ_NEXT(dom, dom_link)) {
557 ncpus += dom->dom_ncpus;
560 mask = dom->dom_cpumask;
561 if (ncpus > nstate) {
564 diff = ncpus - nstate;
565 for (i = 0; i < diff; ++i) {
568 c = BSRCPUMASK(mask);
569 CPUMASK_NANDBIT(mask, c);
572 CPUMASK_ORMASK(global_cpumask, mask);
576 * Make sure that userland scheduler has at least one cpu.
578 if (CPUMASK_TESTZERO(global_cpumask))
579 CPUMASK_ORBIT(global_cpumask, 0);
583 printf("\nusched cpumask: ");
584 for (i = 0; i < (int)NELEM(global_cpumask.ary); ++i)
585 printf("%jx ", (uintmax_t)global_cpumask.ary[i]);
590 syslog(LOG_INFO, "using %d cpus", nstate);
593 * Set the mask of cpus the userland scheduler is allowed to use.
595 sysctlbyname("kern.usched_global_cpumask", NULL, 0,
596 &global_cpumask, sizeof(global_cpumask));
599 domBeg = CpuPwrDomLimit;
603 domEnd = CpuPwrDomLimit;
605 CpuPwrDomLimit = dom;
609 * Adjust the cpu frequency
611 for (dom = domBeg; dom != domEnd; dom = TAILQ_NEXT(dom, dom_link)) {
612 acpi_getcpufreq(dom->dom_id, &highest, &lowest);
613 if (highest == 0 || lowest == 0)
617 * Calculate the desired cpu frequency, test, and set.
619 desired = increasing ? highest : lowest;
621 snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.select",
624 printf("dom%d set frequency %d\n",
625 dom->dom_id, desired);
627 sysctlbyname(sysid, NULL, NULL, &desired, sizeof(desired));
635 fprintf(stderr, "usage: powerd [-dt] [-p hysteresis] "
636 "[-u trigger_up] [-T sample_interval] [-r poll_interval] "
637 "[-B min_battery_life] [-L low_battery_linger] "
638 "[-P battery_poll_interval] [-Q]\n");
643 #define timespecsub(vvp, uvp) \
645 (vvp)->tv_sec -= (uvp)->tv_sec; \
646 (vvp)->tv_nsec -= (uvp)->tv_nsec; \
647 if ((vvp)->tv_nsec < 0) { \
649 (vvp)->tv_nsec += 1000000000; \
654 #define BAT_SYSCTL_TIME_MAX 50000000 /* unit: nanosecond */
659 struct timespec s, e;
663 clock_gettime(CLOCK_MONOTONIC_FAST, &s);
667 if (sysctlbyname("hw.acpi.acline", &val, &len, NULL, 0) < 0) {
668 /* No AC line information */
671 clock_gettime(CLOCK_MONOTONIC_FAST, &e);
674 if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
675 /* hw.acpi.acline takes to long to be useful */
676 syslog(LOG_NOTICE, "hw.acpi.acline takes too long");
680 clock_gettime(CLOCK_MONOTONIC_FAST, &s);
682 if (sysctlbyname("hw.acpi.battery.life", &val, &len, NULL, 0) < 0) {
683 /* No battery life */
686 clock_gettime(CLOCK_MONOTONIC_FAST, &e);
689 if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
690 /* hw.acpi.battery.life takes to long to be useful */
691 syslog(LOG_NOTICE, "hw.acpi.battery.life takes too long");
698 low_battery_alert(int life)
700 int fmt, stereo, freq;
703 syslog(LOG_ALERT, "low battery life %d%%, please plugin AC line, #%d",
704 life, BatShutdownLingerCnt);
705 ++BatShutdownLingerCnt;
707 if (!BatShutdownAudioAlert)
710 fd = open("/dev/dsp", O_WRONLY);
715 if (ioctl(fd, SNDCTL_DSP_SETFMT, &fmt, sizeof(fmt)) < 0)
719 if (ioctl(fd, SNDCTL_DSP_STEREO, &stereo, sizeof(stereo)) < 0)
723 if (ioctl(fd, SNDCTL_DSP_SPEED, &freq, sizeof(freq)) < 0)
726 write(fd, alert1, sizeof(alert1));
727 write(fd, alert1, sizeof(alert1));
736 struct timespec cur, ts;
740 clock_gettime(CLOCK_MONOTONIC_FAST, &cur);
742 timespecsub(&ts, &BatLifePrevT);
743 if (ts.tv_sec < BatLifePollIntvl)
747 len = sizeof(acline);
748 if (sysctlbyname("hw.acpi.acline", &acline, &len, NULL, 0) < 0)
751 BatShutdownLinger = -1;
752 BatShutdownLingerCnt = 0;
757 if (sysctlbyname("hw.acpi.battery.life", &life, &len, NULL, 0) < 0)
760 if (BatShutdownLinger > 0) {
762 timespecsub(&ts, &BatShutdownStartT);
763 if (ts.tv_sec > BatShutdownLinger)
764 BatShutdownLinger = 0;
767 if (life <= BatLifeMin) {
768 if (BatShutdownLinger == 0 || BatShutdownLingerSet == 0) {
769 syslog(LOG_ALERT, "low battery life %d%%, "
770 "shutting down", life);
772 execlp("poweroff", "poweroff", NULL);
774 } else if (BatShutdownLinger < 0) {
775 BatShutdownLinger = BatShutdownLingerSet;
776 BatShutdownStartT = cur;
778 low_battery_alert(life);
788 slen = sizeof(TotalCpus);
789 if (sysctlbyname("hw.ncpu", &TotalCpus, &slen, NULL, 0) < 0)
790 err(1, "sysctlbyname hw.ncpu failed");
792 printf("hw.ncpu %d\n", TotalCpus);