614ce5e2b6330f64e7756ab7b79907a648cf576d
[dragonfly.git] / usr.sbin / powerd / powerd.c
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 /*
36  * The powerd daemon :
37  * - Monitor the cpu load and adjusts cpu and cpu power domain
38  *   performance accordingly.
39  * - Monitor battery life.  Alarm alerts and shutdown the machine
40  *   if battery life goes low.
41  */
42
43 #define _KERNEL_STRUCTURES
44 #include <sys/types.h>
45 #include <sys/sysctl.h>
46 #include <sys/kinfo.h>
47 #include <sys/file.h>
48 #include <sys/queue.h>
49 #include <sys/soundcard.h>
50 #include <sys/time.h>
51 #include <machine/cpufunc.h>
52 #include <err.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <unistd.h>
56 #include <string.h>
57 #include <syslog.h>
58
59 #include "alert1.h"
60
61 #define MAXDOM          MAXCPU  /* worst case, 1 cpu per domain */
62
63 #define MAXFREQ         64
64 #define CST_STRLEN      16
65
66 struct cpu_pwrdom {
67         TAILQ_ENTRY(cpu_pwrdom) dom_link;
68         int                     dom_id;
69         int                     dom_ncpus;
70         cpumask_t               dom_cpumask;
71 };
72
73 struct cpu_state {
74         double                  cpu_qavg;
75         double                  cpu_uavg;       /* used for speeding up */
76         double                  cpu_davg;       /* used for slowing down */
77         int                     cpu_limit;
78         int                     cpu_count;
79         char                    cpu_name[8];
80 };
81
82 static void usage(void);
83 static void get_ncpus(void);
84
85 /* usched cpumask */
86 static void get_uschedcpus(void);
87 static void set_uschedcpus(void);
88
89 /* perfbias(4) */
90 static int has_perfbias(void);
91 static void set_perfbias(int, int);
92
93 /* acpi(4) P-state */
94 static void acpi_getcpufreq_str(int, int *, int *);
95 static int acpi_getcpufreq_bin(int, int *, int *);
96 static void acpi_get_cpufreq(int, int *, int *);
97 static void acpi_set_cpufreq(int, int);
98 static int acpi_get_cpupwrdom(void);
99
100 /* mwait C-state hint */
101 static int probe_cstate(void);
102 static void set_cstate(int, int);
103
104 /* Performance monitoring */
105 static void init_perf(void);
106 static void mon_perf(double);
107 static void adj_perf(cpumask_t, cpumask_t);
108 static void adj_cpu_pwrdom(int, int);
109 static void adj_cpu_perf(int, int);
110 static void get_cputime(double);
111 static int get_nstate(struct cpu_state *, double);
112 static void add_spare_cpus(const cpumask_t, int);
113 static void restore_perf(void);
114
115 /* Battery monitoring */
116 static int has_battery(void);
117 static int mon_battery(void);
118 static void low_battery_alert(int);
119
120 /* Backlight */
121 static void restore_backlight(void);
122
123 /* Runtime states for performance monitoring */
124 static int global_pcpu_limit;
125 static struct cpu_state pcpu_state[MAXCPU];
126 static struct cpu_state global_cpu_state;
127 static cpumask_t cpu_used;              /* cpus w/ high perf */
128 static cpumask_t cpu_pwrdom_used;       /* cpu power domains w/ high perf */
129 static cpumask_t usched_cpu_used;       /* cpus for usched */
130
131 /* Constants */
132 static cpumask_t cpu_pwrdom_mask;       /* usable cpu power domains */
133 static int cpu2pwrdom[MAXCPU];          /* cpu to cpu power domain map */
134 static struct cpu_pwrdom *cpu_pwrdomain[MAXDOM];
135 static int NCpus;                       /* # of cpus */
136 static char orig_global_cx[CST_STRLEN];
137 static char cpu_perf_cx[CST_STRLEN];
138 static int cpu_perf_cxlen;
139 static char cpu_idle_cx[CST_STRLEN];
140 static int cpu_idle_cxlen;
141
142 static int DebugOpt;
143 static int TurboOpt = 1;
144 static int PowerFd;
145 static int Hysteresis = 10;     /* percentage */
146 static double TriggerUp = 0.25; /* single-cpu load to force max freq */
147 static double TriggerDown;      /* load per cpu to force the min freq */
148 static int HasPerfbias = 0;
149 static int AdjustCpuFreq = 1;
150 static int AdjustCstate = 0;
151 static int HighestCpuFreq;
152 static int LowestCpuFreq;
153
154 static volatile int stopped;
155
156 /* Battery life monitoring */
157 static int BatLifeMin = 2;      /* shutdown the box, if low on battery life */
158 static struct timespec BatLifePrevT;
159 static int BatLifePollIntvl = 5; /* unit: sec */
160 static struct timespec BatShutdownStartT;
161 static int BatShutdownLinger = -1;
162 static int BatShutdownLingerSet = 60; /* unit: sec */
163 static int BatShutdownLingerCnt;
164 static int BatShutdownAudioAlert = 1;
165 static int BackLightPct = 100;
166 static int OldBackLightLevel;
167 static int BackLightDown;
168
169 static void sigintr(int signo);
170
171 int
172 main(int ac, char **av)
173 {
174         double srt;
175         double pollrate;
176         int ch;
177         char buf[64];
178         int monbat;
179
180         srt = 8.0;      /* time for samples - 8 seconds */
181         pollrate = 1.0; /* polling rate in seconds */
182
183         while ((ch = getopt(ac, av, "b:cdefh:l:p:r:tu:B:L:P:QT:")) != -1) {
184                 switch(ch) {
185                 case 'b':
186                         BackLightPct = strtol(optarg, NULL, 10);
187                         break;
188                 case 'c':
189                         AdjustCstate = 1;
190                         break;
191                 case 'd':
192                         DebugOpt = 1;
193                         break;
194                 case 'e':
195                         HasPerfbias = 1;
196                         break;
197                 case 'f':
198                         AdjustCpuFreq = 0;
199                         break;
200                 case 'h':
201                         HighestCpuFreq = strtol(optarg, NULL, 10);
202                         break;
203                 case 'l':
204                         LowestCpuFreq = strtol(optarg, NULL, 10);
205                         break;
206                 case 'p':
207                         Hysteresis = (int)strtol(optarg, NULL, 10);
208                         break;
209                 case 'r':
210                         pollrate = strtod(optarg, NULL);
211                         break;
212                 case 't':
213                         TurboOpt = 0;
214                         break;
215                 case 'u':
216                         TriggerUp = (double)strtol(optarg, NULL, 10) / 100;
217                         break;
218                 case 'B':
219                         BatLifeMin = strtol(optarg, NULL, 10);
220                         break;
221                 case 'L':
222                         BatShutdownLingerSet = strtol(optarg, NULL, 10);
223                         if (BatShutdownLingerSet < 0)
224                                 BatShutdownLingerSet = 0;
225                         break;
226                 case 'P':
227                         BatLifePollIntvl = strtol(optarg, NULL, 10);
228                         break;
229                 case 'Q':
230                         BatShutdownAudioAlert = 0;
231                         break;
232                 case 'T':
233                         srt = strtod(optarg, NULL);
234                         break;
235                 default:
236                         usage();
237                         /* NOT REACHED */
238                 }
239         }
240         ac -= optind;
241         av += optind;
242
243         setlinebuf(stdout);
244
245         /* Get number of cpus */
246         get_ncpus();
247
248         if (0 > Hysteresis || Hysteresis > 99) {
249                 fprintf(stderr, "Invalid hysteresis value\n");
250                 exit(1);
251         }
252
253         if (0 > TriggerUp || TriggerUp > 1) {
254                 fprintf(stderr, "Invalid load limit value\n");
255                 exit(1);
256         }
257
258         if (BackLightPct > 100 || BackLightPct <= 0) {
259                 fprintf(stderr, "Invalid backlight setting, ignore\n");
260                 BackLightPct = 100;
261         }
262
263         TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100);
264
265         /*
266          * Make sure powerd is not already running.
267          */
268         PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644);
269         if (PowerFd < 0) {
270                 fprintf(stderr,
271                         "Cannot create /var/run/powerd.pid, "
272                         "continuing anyway\n");
273         } else {
274                 if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) {
275                         fprintf(stderr, "powerd is already running\n");
276                         exit(1);
277                 }
278         }
279
280         /*
281          * Demonize and set pid
282          */
283         if (DebugOpt == 0) {
284                 daemon(0, 0);
285                 openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON);
286         }
287
288         if (PowerFd >= 0) {
289                 ftruncate(PowerFd, 0);
290                 snprintf(buf, sizeof(buf), "%d\n", (int)getpid());
291                 write(PowerFd, buf, strlen(buf));
292         }
293
294         /* Do we need to monitor battery life? */
295         if (BatLifePollIntvl <= 0)
296                 monbat = 0;
297         else
298                 monbat = has_battery();
299
300         /* Do we have perfbias(4)? */
301         if (HasPerfbias)
302                 HasPerfbias = has_perfbias();
303
304         /* Could we adjust C-state? */
305         if (AdjustCstate)
306                 AdjustCstate = probe_cstate();
307
308         /*
309          * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel.
310          *
311          * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
312          * taskqueue and ACPI taskqueue is shared across various
313          * ACPI modules, any delay in other modules may cause
314          * hw.acpi.cpu.px_dom* to be created at quite a later time
315          * (e.g. cmbat module's task could take quite a lot of time).
316          */
317         for (;;) {
318                 /* Prime delta cputime calculation. */
319                 get_cputime(pollrate);
320
321                 /* Wait for all cpus to appear */
322                 if (acpi_get_cpupwrdom())
323                         break;
324                 usleep((int)(pollrate * 1000000.0));
325         }
326
327         /*
328          * Catch some signals so that max performance could be restored.
329          */
330         signal(SIGINT, sigintr);
331         signal(SIGTERM, sigintr);
332
333         /* Initialize performance states */
334         init_perf();
335
336         srt = srt / pollrate;   /* convert to sample count */
337         if (DebugOpt)
338                 printf("samples for downgrading: %5.2f\n", srt);
339
340         /*
341          * Monitoring loop
342          */
343         while (!stopped) {
344                 /*
345                  * Monitor performance
346                  */
347                 get_cputime(pollrate);
348                 mon_perf(srt);
349
350                 /*
351                  * Monitor battery
352                  */
353                 if (monbat)
354                         monbat = mon_battery();
355
356                 usleep((int)(pollrate * 1000000.0));
357         }
358
359         /*
360          * Set to maximum performance if killed.
361          */
362         syslog(LOG_INFO, "killed, setting max and exiting");
363         restore_perf();
364         restore_backlight();
365
366         exit(0);
367 }
368
369 static void
370 sigintr(int signo __unused)
371 {
372         stopped = 1;
373 }
374
375 /*
376  * Figure out the cpu power domains.
377  */
378 static int
379 acpi_get_cpupwrdom(void)
380 {
381         struct cpu_pwrdom *dom;
382         cpumask_t pwrdom_mask;
383         char buf[64];
384         char members[1024];
385         char *str;
386         size_t msize;
387         int n, i, ncpu = 0, dom_id;
388
389         memset(cpu2pwrdom, 0, sizeof(cpu2pwrdom));
390         memset(cpu_pwrdomain, 0, sizeof(cpu_pwrdomain));
391         CPUMASK_ASSZERO(cpu_pwrdom_mask);
392
393         for (i = 0; i < MAXDOM; ++i) {
394                 snprintf(buf, sizeof(buf),
395                          "hw.acpi.cpu.px_dom%d.available", i);
396                 if (sysctlbyname(buf, NULL, NULL, NULL, 0) < 0)
397                         continue;
398
399                 dom = calloc(1, sizeof(*dom));
400                 dom->dom_id = i;
401
402                 if (cpu_pwrdomain[i] != NULL) {
403                         fprintf(stderr, "cpu power domain %d exists\n", i);
404                         exit(1);
405                 }
406                 cpu_pwrdomain[i] = dom;
407                 CPUMASK_ORBIT(cpu_pwrdom_mask, i);
408         }
409         pwrdom_mask = cpu_pwrdom_mask;
410
411         while (CPUMASK_TESTNZERO(pwrdom_mask)) {
412                 dom_id = BSFCPUMASK(pwrdom_mask);
413                 CPUMASK_NANDBIT(pwrdom_mask, dom_id);
414                 dom = cpu_pwrdomain[dom_id];
415
416                 CPUMASK_ASSZERO(dom->dom_cpumask);
417
418                 snprintf(buf, sizeof(buf),
419                          "hw.acpi.cpu.px_dom%d.members", dom->dom_id);
420                 msize = sizeof(members);
421                 if (sysctlbyname(buf, members, &msize, NULL, 0) < 0) {
422                         cpu_pwrdomain[dom_id] = NULL;
423                         free(dom);
424                         continue;
425                 }
426
427                 members[msize] = 0;
428                 for (str = strtok(members, " "); str; str = strtok(NULL, " ")) {
429                         n = -1;
430                         sscanf(str, "cpu%d", &n);
431                         if (n >= 0) {
432                                 ++ncpu;
433                                 ++dom->dom_ncpus;
434                                 CPUMASK_ORBIT(dom->dom_cpumask, n);
435                                 cpu2pwrdom[n] = dom->dom_id;
436                         }
437                 }
438                 if (dom->dom_ncpus == 0) {
439                         cpu_pwrdomain[dom_id] = NULL;
440                         free(dom);
441                         continue;
442                 }
443                 if (DebugOpt) {
444                         printf("dom%d cpumask: ", dom->dom_id);
445                         for (i = 0; i < (int)NELEM(dom->dom_cpumask.ary); ++i) {
446                                 printf("%jx ",
447                                     (uintmax_t)dom->dom_cpumask.ary[i]);
448                         }
449                         printf("\n");
450                 }
451         }
452
453         if (ncpu != NCpus) {
454                 if (DebugOpt)
455                         printf("Found %d cpus, expecting %d\n", ncpu, NCpus);
456
457                 pwrdom_mask = cpu_pwrdom_mask;
458                 while (CPUMASK_TESTNZERO(pwrdom_mask)) {
459                         dom_id = BSFCPUMASK(pwrdom_mask);
460                         CPUMASK_NANDBIT(pwrdom_mask, dom_id);
461                         dom = cpu_pwrdomain[dom_id];
462                         if (dom != NULL)
463                                 free(dom);
464                 }
465                 return 0;
466         }
467         return 1;
468 }
469
470 /*
471  * Save per-cpu load and sum of per-cpu load.
472  */
473 static void
474 get_cputime(double pollrate)
475 {
476         static struct kinfo_cputime ocpu_time[MAXCPU];
477         static struct kinfo_cputime ncpu_time[MAXCPU];
478         size_t slen;
479         int ncpu;
480         int cpu;
481         uint64_t delta;
482
483         bcopy(ncpu_time, ocpu_time, sizeof(struct kinfo_cputime) * NCpus);
484
485         slen = sizeof(ncpu_time);
486         if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) {
487                 fprintf(stderr, "kern.cputime sysctl not available\n");
488                 exit(1);
489         }
490         ncpu = slen / sizeof(ncpu_time[0]);
491
492         delta = 0;
493         for (cpu = 0; cpu < ncpu; ++cpu) {
494                 uint64_t d;
495
496                 d = (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys +
497                      ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) -
498                     (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys +
499                      ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr);
500                 pcpu_state[cpu].cpu_qavg = (double)d / (pollrate * 1000000.0);
501
502                 delta += d;
503         }
504         global_cpu_state.cpu_qavg = (double)delta / (pollrate * 1000000.0);
505 }
506
507 static void
508 acpi_getcpufreq_str(int dom_id, int *highest0, int *lowest0)
509 {
510         char buf[256], sysid[64];
511         size_t buflen;
512         char *ptr;
513         int v, highest, lowest;
514
515         /*
516          * Retrieve availability list
517          */
518         snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.available",
519             dom_id);
520         buflen = sizeof(buf) - 1;
521         if (sysctlbyname(sysid, buf, &buflen, NULL, 0) < 0)
522                 return;
523         buf[buflen] = 0;
524
525         /*
526          * Parse out the highest and lowest cpu frequencies
527          */
528         ptr = buf;
529         highest = lowest = 0;
530         while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
531                 if ((lowest == 0 || lowest > v) &&
532                     (LowestCpuFreq <= 0 || v >= LowestCpuFreq))
533                         lowest = v;
534                 if ((highest == 0 || highest < v) &&
535                     (HighestCpuFreq <= 0 || v <= HighestCpuFreq))
536                         highest = v;
537                 /* 
538                  * Detect turbo mode
539                  */
540                 if (!TurboOpt && highest - v == 1)
541                         highest = v;
542         }
543
544         *highest0 = highest;
545         *lowest0 = lowest;
546 }
547
548 static int
549 acpi_getcpufreq_bin(int dom_id, int *highest0, int *lowest0)
550 {
551         char sysid[64];
552         int freq[MAXFREQ];
553         size_t freqlen;
554         int freqcnt, i;
555
556         /*
557          * Retrieve availability list
558          */
559         snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.avail", dom_id);
560         freqlen = sizeof(freq);
561         if (sysctlbyname(sysid, freq, &freqlen, NULL, 0) < 0)
562                 return 0;
563
564         freqcnt = freqlen / sizeof(freq[0]);
565         if (freqcnt == 0)
566                 return 0;
567
568         for (i = freqcnt - 1; i >= 0; --i) {
569                 *lowest0 = freq[i];
570                 if (LowestCpuFreq <= 0 || *lowest0 >= LowestCpuFreq)
571                         break;
572         }
573
574         i = 0;
575         *highest0 = freq[0];
576         if (!TurboOpt && freqcnt > 1 && freq[0] - freq[1] == 1) {
577                 i = 1;
578                 *highest0 = freq[1];
579         }
580         for (; i < freqcnt; ++i) {
581                 if (HighestCpuFreq <= 0 || *highest0 <= HighestCpuFreq)
582                         break;
583                 *highest0 = freq[i];
584         }
585         return 1;
586 }
587
588 static void
589 acpi_get_cpufreq(int dom_id, int *highest, int *lowest)
590 {
591         *highest = 0;
592         *lowest = 0;
593
594         if (acpi_getcpufreq_bin(dom_id, highest, lowest))
595                 return;
596         acpi_getcpufreq_str(dom_id, highest, lowest);
597 }
598
599 static
600 void
601 usage(void)
602 {
603         fprintf(stderr, "usage: powerd [-cdeftQ] [-p hysteresis] "
604             "[-h highest_freq] [-l lowest_freq] "
605             "[-r poll_interval] [-u trigger_up] "
606             "[-B min_battery_life] [-L low_battery_linger] "
607             "[-P battery_poll_interval] [-T sample_interval] "
608             "[-b backlight]\n");
609         exit(1);
610 }
611
612 #ifndef timespecsub
613 #define timespecsub(vvp, uvp)                                           \
614         do {                                                            \
615                 (vvp)->tv_sec -= (uvp)->tv_sec;                         \
616                 (vvp)->tv_nsec -= (uvp)->tv_nsec;                       \
617                 if ((vvp)->tv_nsec < 0) {                               \
618                         (vvp)->tv_sec--;                                \
619                         (vvp)->tv_nsec += 1000000000;                   \
620                 }                                                       \
621         } while (0)
622 #endif
623
624 #define BAT_SYSCTL_TIME_MAX     50000000 /* unit: nanosecond */
625
626 static int
627 has_battery(void)
628 {
629         struct timespec s, e;
630         size_t len;
631         int val;
632
633         clock_gettime(CLOCK_MONOTONIC_FAST, &s);
634         BatLifePrevT = s;
635
636         len = sizeof(val);
637         if (sysctlbyname("hw.acpi.acline", &val, &len, NULL, 0) < 0) {
638                 /* No AC line information */
639                 return 0;
640         }
641         clock_gettime(CLOCK_MONOTONIC_FAST, &e);
642
643         timespecsub(&e, &s);
644         if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
645                 /* hw.acpi.acline takes to long to be useful */
646                 syslog(LOG_NOTICE, "hw.acpi.acline takes too long");
647                 return 0;
648         }
649
650         clock_gettime(CLOCK_MONOTONIC_FAST, &s);
651         len = sizeof(val);
652         if (sysctlbyname("hw.acpi.battery.life", &val, &len, NULL, 0) < 0) {
653                 /* No battery life */
654                 return 0;
655         }
656         clock_gettime(CLOCK_MONOTONIC_FAST, &e);
657
658         timespecsub(&e, &s);
659         if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
660                 /* hw.acpi.battery.life takes to long to be useful */
661                 syslog(LOG_NOTICE, "hw.acpi.battery.life takes too long");
662                 return 0;
663         }
664         return 1;
665 }
666
667 static void
668 low_battery_alert(int life)
669 {
670         int fmt, stereo, freq;
671         int fd;
672
673         syslog(LOG_ALERT, "low battery life %d%%, please plugin AC line, #%d",
674             life, BatShutdownLingerCnt);
675         ++BatShutdownLingerCnt;
676
677         if (!BatShutdownAudioAlert)
678                 return;
679
680         fd = open("/dev/dsp", O_WRONLY);
681         if (fd < 0)
682                 return;
683
684         fmt = AFMT_S16_LE;
685         if (ioctl(fd, SNDCTL_DSP_SETFMT, &fmt, sizeof(fmt)) < 0)
686                 goto done;
687
688         stereo = 0;
689         if (ioctl(fd, SNDCTL_DSP_STEREO, &stereo, sizeof(stereo)) < 0)
690                 goto done;
691
692         freq = 44100;
693         if (ioctl(fd, SNDCTL_DSP_SPEED, &freq, sizeof(freq)) < 0)
694                 goto done;
695
696         write(fd, alert1, sizeof(alert1));
697         write(fd, alert1, sizeof(alert1));
698
699 done:
700         close(fd);
701 }
702
703 static int
704 mon_battery(void)
705 {
706         struct timespec cur, ts;
707         int acline, life;
708         size_t len;
709
710         clock_gettime(CLOCK_MONOTONIC_FAST, &cur);
711         ts = cur;
712         timespecsub(&ts, &BatLifePrevT);
713         if (ts.tv_sec < BatLifePollIntvl)
714                 return 1;
715         BatLifePrevT = cur;
716
717         len = sizeof(acline);
718         if (sysctlbyname("hw.acpi.acline", &acline, &len, NULL, 0) < 0)
719                 return 1;
720         if (acline) {
721                 BatShutdownLinger = -1;
722                 BatShutdownLingerCnt = 0;
723                 restore_backlight();
724                 return 1;
725         }
726
727         if (!BackLightDown && BackLightPct != 100) {
728                 int backlight_max, backlight;
729
730                 len = sizeof(backlight_max);
731                 if (sysctlbyname("hw.backlight_max", &backlight_max, &len,
732                     NULL, 0) < 0) {
733                         /* No more backlight adjustment */
734                         BackLightPct = 100;
735                         goto after_backlight;
736                 }
737
738                 len = sizeof(OldBackLightLevel);
739                 if (sysctlbyname("hw.backlight_level", &OldBackLightLevel, &len,
740                     NULL, 0) < 0) {
741                         /* No more backlight adjustment */
742                         BackLightPct = 100;
743                         goto after_backlight;
744                 }
745
746                 backlight = (backlight_max * BackLightPct) / 100;
747                 if (backlight >= OldBackLightLevel) {
748                         /* No more backlight adjustment */
749                         BackLightPct = 100;
750                         goto after_backlight;
751                 }
752
753                 if (sysctlbyname("hw.backlight_level", NULL, NULL,
754                     &backlight, sizeof(backlight)) < 0) {
755                         /* No more backlight adjustment */
756                         BackLightPct = 100;
757                         goto after_backlight;
758                 }
759                 BackLightDown = 1;
760         }
761 after_backlight:
762
763         len = sizeof(life);
764         if (sysctlbyname("hw.acpi.battery.life", &life, &len, NULL, 0) < 0)
765                 return 1;
766
767         if (BatShutdownLinger > 0) {
768                 ts = cur;
769                 timespecsub(&ts, &BatShutdownStartT);
770                 if (ts.tv_sec > BatShutdownLinger)
771                         BatShutdownLinger = 0;
772         }
773
774         if (life <= BatLifeMin) {
775                 if (BatShutdownLinger == 0 || BatShutdownLingerSet == 0) {
776                         syslog(LOG_ALERT, "low battery life %d%%, "
777                             "shutting down", life);
778                         if (vfork() == 0)
779                                 execlp("poweroff", "poweroff", NULL);
780                         return 0;
781                 } else if (BatShutdownLinger < 0) {
782                         BatShutdownLinger = BatShutdownLingerSet;
783                         BatShutdownStartT = cur;
784                 }
785                 low_battery_alert(life);
786         }
787         return 1;
788 }
789
790 static void
791 get_ncpus(void)
792 {
793         size_t slen;
794
795         slen = sizeof(NCpus);
796         if (sysctlbyname("hw.ncpu", &NCpus, &slen, NULL, 0) < 0)
797                 err(1, "sysctlbyname hw.ncpu failed");
798         if (DebugOpt)
799                 printf("hw.ncpu %d\n", NCpus);
800 }
801
802 static void
803 get_uschedcpus(void)
804 {
805         size_t slen;
806
807         slen = sizeof(usched_cpu_used);
808         if (sysctlbyname("kern.usched_global_cpumask", &usched_cpu_used, &slen,
809             NULL, 0) < 0)
810                 err(1, "sysctlbyname kern.usched_global_cpumask failed");
811         if (DebugOpt) {
812                 int i;
813
814                 printf("usched cpumask was: ");
815                 for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i)
816                         printf("%jx ", (uintmax_t)usched_cpu_used.ary[i]);
817                 printf("\n");
818         }
819 }
820
821 static void
822 set_uschedcpus(void)
823 {
824         if (DebugOpt) {
825                 int i;
826
827                 printf("usched cpumask: ");
828                 for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i) {
829                         printf("%jx ",
830                             (uintmax_t)usched_cpu_used.ary[i]);
831                 }
832                 printf("\n");
833         }
834         sysctlbyname("kern.usched_global_cpumask", NULL, 0,
835             &usched_cpu_used, sizeof(usched_cpu_used));
836 }
837
838 static int
839 has_perfbias(void)
840 {
841         size_t len;
842         int hint;
843
844         len = sizeof(hint);
845         if (sysctlbyname("machdep.perfbias0.hint", &hint, &len, NULL, 0) < 0)
846                 return 0;
847         return 1;
848 }
849
850 static void
851 set_perfbias(int cpu, int inc)
852 {
853         int hint = inc ? 0 : 15;
854         char sysid[64];
855
856         if (DebugOpt)
857                 printf("cpu%d set perfbias hint %d\n", cpu, hint);
858         snprintf(sysid, sizeof(sysid), "machdep.perfbias%d.hint", cpu);
859         sysctlbyname(sysid, NULL, NULL, &hint, sizeof(hint));
860 }
861
862 static void
863 init_perf(void)
864 {
865         struct cpu_state *state;
866         int cpu;
867
868         /* Get usched cpumask */
869         get_uschedcpus();
870
871         /*
872          * Assume everything are used and are maxed out, before we
873          * start.
874          */
875
876         CPUMASK_ASSBMASK(cpu_used, NCpus);
877         cpu_pwrdom_used = cpu_pwrdom_mask;
878         global_pcpu_limit = NCpus;
879
880         for (cpu = 0; cpu < NCpus; ++cpu) {
881                 state = &pcpu_state[cpu];
882
883                 state->cpu_uavg = 0.0;
884                 state->cpu_davg = 0.0;
885                 state->cpu_limit = 1;
886                 state->cpu_count = 1;
887                 snprintf(state->cpu_name, sizeof(state->cpu_name), "cpu%d",
888                     cpu);
889         }
890
891         state = &global_cpu_state;
892         state->cpu_uavg = 0.0;
893         state->cpu_davg = 0.0;
894         state->cpu_limit = NCpus;
895         state->cpu_count = NCpus;
896         strlcpy(state->cpu_name, "global", sizeof(state->cpu_name));
897 }
898
899 static int
900 get_nstate(struct cpu_state *state, double srt)
901 {
902         int ustate, dstate, nstate;
903
904         /* speeding up */
905         state->cpu_uavg = (state->cpu_uavg * 2.0 + state->cpu_qavg) / 3.0;
906         /* slowing down */
907         state->cpu_davg = (state->cpu_davg * srt + state->cpu_qavg) / (srt + 1);
908         if (state->cpu_davg < state->cpu_uavg)
909                 state->cpu_davg = state->cpu_uavg;
910
911         ustate = state->cpu_uavg / TriggerUp;
912         if (ustate < state->cpu_limit)
913                 ustate = state->cpu_uavg / TriggerDown;
914         dstate = state->cpu_davg / TriggerUp;
915         if (dstate < state->cpu_limit)
916                 dstate = state->cpu_davg / TriggerDown;
917
918         nstate = (ustate > dstate) ? ustate : dstate;
919         if (nstate > state->cpu_count)
920                 nstate = state->cpu_count;
921
922         if (DebugOpt) {
923                 printf("%s qavg=%5.2f uavg=%5.2f davg=%5.2f "
924                     "%2d ncpus=%d\n", state->cpu_name,
925                     state->cpu_qavg, state->cpu_uavg, state->cpu_davg,
926                     state->cpu_limit, nstate);
927         }
928         return nstate;
929 }
930
931 static void
932 mon_perf(double srt)
933 {
934         cpumask_t ocpu_used, ocpu_pwrdom_used;
935         int pnstate = 0, nstate;
936         int cpu;
937
938         /*
939          * Find cpus requiring performance and their cooresponding power
940          * domains.  Save the number of cpus requiring performance in
941          * pnstate.
942          */
943         ocpu_used = cpu_used;
944         ocpu_pwrdom_used = cpu_pwrdom_used;
945
946         CPUMASK_ASSZERO(cpu_used);
947         CPUMASK_ASSZERO(cpu_pwrdom_used);
948
949         for (cpu = 0; cpu < NCpus; ++cpu) {
950                 struct cpu_state *state = &pcpu_state[cpu];
951                 int s;
952
953                 s = get_nstate(state, srt);
954                 if (s) {
955                         CPUMASK_ORBIT(cpu_used, cpu);
956                         CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
957                 }
958                 pnstate += s;
959
960                 state->cpu_limit = s;
961         }
962
963         /*
964          * Calculate nstate, the number of cpus we wish to run at max
965          * performance.
966          */
967         nstate = get_nstate(&global_cpu_state, srt);
968
969         if (nstate == global_cpu_state.cpu_limit &&
970             (pnstate == global_pcpu_limit || nstate > pnstate)) {
971                 /* Nothing changed; keep the sets */
972                 cpu_used = ocpu_used;
973                 cpu_pwrdom_used = ocpu_pwrdom_used;
974
975                 global_pcpu_limit = pnstate;
976                 return;
977         }
978         global_pcpu_limit = pnstate;
979
980         if (nstate > pnstate) {
981                 /*
982                  * Add spare cpus to meet global performance requirement.
983                  */
984                 add_spare_cpus(ocpu_used, nstate - pnstate);
985         }
986
987         global_cpu_state.cpu_limit = nstate;
988
989         /*
990          * Adjust cpu and cpu power domain performance
991          */
992         adj_perf(ocpu_used, ocpu_pwrdom_used);
993 }
994
995 static void
996 add_spare_cpus(const cpumask_t ocpu_used, int ncpu)
997 {
998         cpumask_t saved_pwrdom, xcpu_used;
999         int done = 0, cpu;
1000
1001         /*
1002          * Find more cpus in the previous cpu set.
1003          */
1004         xcpu_used = cpu_used;
1005         CPUMASK_XORMASK(xcpu_used, ocpu_used);
1006         while (CPUMASK_TESTNZERO(xcpu_used)) {
1007                 cpu = BSFCPUMASK(xcpu_used);
1008                 CPUMASK_NANDBIT(xcpu_used, cpu);
1009
1010                 if (CPUMASK_TESTBIT(ocpu_used, cpu)) {
1011                         CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
1012                         CPUMASK_ORBIT(cpu_used, cpu);
1013                         --ncpu;
1014                         if (ncpu == 0)
1015                                 return;
1016                 }
1017         }
1018
1019         /*
1020          * Find more cpus in the used cpu power domains.
1021          */
1022         saved_pwrdom = cpu_pwrdom_used;
1023 again:
1024         while (CPUMASK_TESTNZERO(saved_pwrdom)) {
1025                 cpumask_t unused_cpumask;
1026                 int dom;
1027
1028                 dom = BSFCPUMASK(saved_pwrdom);
1029                 CPUMASK_NANDBIT(saved_pwrdom, dom);
1030
1031                 unused_cpumask = cpu_pwrdomain[dom]->dom_cpumask;
1032                 CPUMASK_NANDMASK(unused_cpumask, cpu_used);
1033
1034                 while (CPUMASK_TESTNZERO(unused_cpumask)) {
1035                         cpu = BSFCPUMASK(unused_cpumask);
1036                         CPUMASK_NANDBIT(unused_cpumask, cpu);
1037
1038                         CPUMASK_ORBIT(cpu_pwrdom_used, dom);
1039                         CPUMASK_ORBIT(cpu_used, cpu);
1040                         --ncpu;
1041                         if (ncpu == 0)
1042                                 return;
1043                 }
1044         }
1045         if (!done) {
1046                 done = 1;
1047                 /*
1048                  * Find more cpus in unused cpu power domains
1049                  */
1050                 saved_pwrdom = cpu_pwrdom_mask;
1051                 CPUMASK_NANDMASK(saved_pwrdom, cpu_pwrdom_used);
1052                 goto again;
1053         }
1054         if (DebugOpt)
1055                 printf("%d cpus not found\n", ncpu);
1056 }
1057
1058 static void
1059 acpi_set_cpufreq(int dom, int inc)
1060 {
1061         int lowest, highest, desired;
1062         char sysid[64];
1063
1064         acpi_get_cpufreq(dom, &highest, &lowest);
1065         if (highest == 0 || lowest == 0)
1066                 return;
1067         desired = inc ? highest : lowest;
1068
1069         if (DebugOpt)
1070                 printf("dom%d set frequency %d\n", dom, desired);
1071         snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.select", dom);
1072         sysctlbyname(sysid, NULL, NULL, &desired, sizeof(desired));
1073 }
1074
1075 static void
1076 adj_cpu_pwrdom(int dom, int inc)
1077 {
1078         if (AdjustCpuFreq)
1079                 acpi_set_cpufreq(dom, inc);
1080 }
1081
1082 static void
1083 adj_cpu_perf(int cpu, int inc)
1084 {
1085         if (DebugOpt) {
1086                 if (inc)
1087                         printf("cpu%d increase perf\n", cpu);
1088                 else
1089                         printf("cpu%d decrease perf\n", cpu);
1090         }
1091
1092         if (HasPerfbias)
1093                 set_perfbias(cpu, inc);
1094         if (AdjustCstate)
1095                 set_cstate(cpu, inc);
1096 }
1097
1098 static void
1099 adj_perf(cpumask_t xcpu_used, cpumask_t xcpu_pwrdom_used)
1100 {
1101         cpumask_t old_usched_used;
1102         int cpu, inc;
1103
1104         /*
1105          * Set cpus requiring performance to the userland process
1106          * scheduler.  Leave the rest of cpus unmapped.
1107          */
1108         old_usched_used = usched_cpu_used;
1109         usched_cpu_used = cpu_used;
1110         if (CPUMASK_TESTZERO(usched_cpu_used))
1111                 CPUMASK_ORBIT(usched_cpu_used, 0);
1112         if (CPUMASK_CMPMASKNEQ(usched_cpu_used, old_usched_used))
1113                 set_uschedcpus();
1114
1115         /*
1116          * Adjust per-cpu performance.
1117          */
1118         CPUMASK_XORMASK(xcpu_used, cpu_used);
1119         while (CPUMASK_TESTNZERO(xcpu_used)) {
1120                 cpu = BSFCPUMASK(xcpu_used);
1121                 CPUMASK_NANDBIT(xcpu_used, cpu);
1122
1123                 if (CPUMASK_TESTBIT(cpu_used, cpu)) {
1124                         /* Increase cpu performance */
1125                         inc = 1;
1126                 } else {
1127                         /* Decrease cpu performance */
1128                         inc = 0;
1129                 }
1130                 adj_cpu_perf(cpu, inc);
1131         }
1132
1133         /*
1134          * Adjust cpu power domain performance.  This could affect
1135          * a set of cpus.
1136          */
1137         CPUMASK_XORMASK(xcpu_pwrdom_used, cpu_pwrdom_used);
1138         while (CPUMASK_TESTNZERO(xcpu_pwrdom_used)) {
1139                 int dom;
1140
1141                 dom = BSFCPUMASK(xcpu_pwrdom_used);
1142                 CPUMASK_NANDBIT(xcpu_pwrdom_used, dom);
1143
1144                 if (CPUMASK_TESTBIT(cpu_pwrdom_used, dom)) {
1145                         /* Increase cpu power domain performance */
1146                         inc = 1;
1147                 } else {
1148                         /* Decrease cpu power domain performance */
1149                         inc = 0;
1150                 }
1151                 adj_cpu_pwrdom(dom, inc);
1152         }
1153 }
1154
1155 static void
1156 restore_perf(void)
1157 {
1158         cpumask_t ocpu_used, ocpu_pwrdom_used;
1159
1160         /* Remove highest cpu frequency limitation */
1161         HighestCpuFreq = 0;
1162
1163         ocpu_used = cpu_used;
1164         ocpu_pwrdom_used = cpu_pwrdom_used;
1165
1166         /* Max out all cpus and cpu power domains performance */
1167         CPUMASK_ASSBMASK(cpu_used, NCpus);
1168         cpu_pwrdom_used = cpu_pwrdom_mask;
1169
1170         adj_perf(ocpu_used, ocpu_pwrdom_used);
1171
1172         if (AdjustCstate) {
1173                 /*
1174                  * Restore the original mwait C-state
1175                  */
1176                 if (DebugOpt)
1177                         printf("global set cstate %s\n", orig_global_cx);
1178                 sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1179                     orig_global_cx, strlen(orig_global_cx) + 1);
1180         }
1181 }
1182
1183 static int
1184 probe_cstate(void)
1185 {
1186         char cx_supported[1024];
1187         const char *target;
1188         char *ptr;
1189         int idle_hlt, deep = 1;
1190         size_t len;
1191
1192         len = sizeof(idle_hlt);
1193         if (sysctlbyname("machdep.cpu_idle_hlt", &idle_hlt, &len, NULL, 0) < 0)
1194                 return 0;
1195         if (idle_hlt != 1)
1196                 return 0;
1197
1198         len = sizeof(cx_supported);
1199         if (sysctlbyname("machdep.mwait.CX.supported", cx_supported, &len,
1200             NULL, 0) < 0)
1201                 return 0;
1202
1203         len = sizeof(orig_global_cx);
1204         if (sysctlbyname("machdep.mwait.CX.idle", orig_global_cx, &len,
1205             NULL, 0) < 0)
1206                 return 0;
1207
1208         strlcpy(cpu_perf_cx, "AUTODEEP", sizeof(cpu_perf_cx));
1209         cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1210         if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1211             cpu_perf_cx, cpu_perf_cxlen) < 0) {
1212                 /* AUTODEEP is not supported; try AUTO */
1213                 deep = 0;
1214                 strlcpy(cpu_perf_cx, "AUTO", sizeof(cpu_perf_cx));
1215                 cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1216                 if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1217                     cpu_perf_cx, cpu_perf_cxlen) < 0)
1218                         return 0;
1219         }
1220
1221         if (!deep)
1222                 target = "C2/0";
1223         else
1224                 target = NULL;
1225         for (ptr = strtok(cx_supported, " "); ptr != NULL;
1226              ptr = strtok(NULL, " ")) {
1227                 if (target == NULL ||
1228                     (target != NULL && strcmp(ptr, target) == 0)) {
1229                         strlcpy(cpu_idle_cx, ptr, sizeof(cpu_idle_cx));
1230                         cpu_idle_cxlen = strlen(cpu_idle_cx) + 1;
1231                         if (target != NULL)
1232                                 break;
1233                 }
1234         }
1235         if (cpu_idle_cxlen == 0)
1236                 return 0;
1237
1238         if (DebugOpt) {
1239                 printf("cstate orig %s, perf %s, idle %s\n",
1240                     orig_global_cx, cpu_perf_cx, cpu_idle_cx);
1241         }
1242         return 1;
1243 }
1244
1245 static void
1246 set_cstate(int cpu, int inc)
1247 {
1248         const char *cst;
1249         char sysid[64];
1250         size_t len;
1251
1252         if (inc) {
1253                 cst = cpu_perf_cx;
1254                 len = cpu_perf_cxlen;
1255         } else {
1256                 cst = cpu_idle_cx;
1257                 len = cpu_idle_cxlen;
1258         }
1259
1260         if (DebugOpt)
1261                 printf("cpu%d set cstate %s\n", cpu, cst);
1262         snprintf(sysid, sizeof(sysid), "machdep.mwait.CX.idle%d", cpu);
1263         sysctlbyname(sysid, NULL, NULL, cst, len);
1264 }
1265
1266 static void
1267 restore_backlight(void)
1268 {
1269         if (BackLightDown) {
1270                 BackLightDown = 0;
1271                 sysctlbyname("hw.backlight_level", NULL, NULL,
1272                     &OldBackLightLevel, sizeof(OldBackLightLevel));
1273         }
1274 }