Merge branch 'vendor/TRE'
[dragonfly.git] / usr.sbin / powerd / powerd.c
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 /*
36  * The powerd daemon :
37  * - Monitor the cpu load and adjusts cpu and cpu power domain
38  *   performance accordingly.
39  * - Monitor battery life.  Alarm alerts and shutdown the machine
40  *   if battery life goes low.
41  */
42
43 #define _KERNEL_STRUCTURES
44 #include <sys/types.h>
45 #include <sys/sysctl.h>
46 #include <sys/kinfo.h>
47 #include <sys/file.h>
48 #include <sys/queue.h>
49 #include <sys/soundcard.h>
50 #include <sys/time.h>
51 #include <machine/cpufunc.h>
52 #include <err.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <unistd.h>
56 #include <string.h>
57 #include <syslog.h>
58
59 #include "alert1.h"
60
61 #define MAXDOM          MAXCPU  /* worst case, 1 cpu per domain */
62
63 #define MAXFREQ         64
64 #define CST_STRLEN      16
65
66 struct cpu_pwrdom {
67         TAILQ_ENTRY(cpu_pwrdom) dom_link;
68         int                     dom_id;
69         int                     dom_ncpus;
70         cpumask_t               dom_cpumask;
71 };
72
73 struct cpu_state {
74         double                  cpu_qavg;
75         double                  cpu_uavg;       /* used for speeding up */
76         double                  cpu_davg;       /* used for slowing down */
77         int                     cpu_limit;
78         int                     cpu_count;
79         char                    cpu_name[8];
80 };
81
82 static void usage(void);
83 static void get_ncpus(void);
84
85 /* usched cpumask */
86 static void get_uschedcpus(void);
87 static void set_uschedcpus(void);
88
89 /* perfbias(4) */
90 static int has_perfbias(void);
91 static void set_perfbias(int, int);
92
93 /* acpi(4) P-state */
94 static void acpi_getcpufreq_str(int, int *, int *);
95 static int acpi_getcpufreq_bin(int, int *, int *);
96 static void acpi_get_cpufreq(int, int *, int *);
97 static void acpi_set_cpufreq(int, int);
98 static int acpi_get_cpupwrdom(void);
99
100 /* mwait C-state hint */
101 static int probe_cstate(void);
102 static void set_cstate(int, int);
103
104 /* Performance monitoring */
105 static void init_perf(void);
106 static void mon_perf(double);
107 static void adj_perf(cpumask_t, cpumask_t);
108 static void adj_cpu_pwrdom(int, int);
109 static void adj_cpu_perf(int, int);
110 static void get_cputime(double);
111 static int get_nstate(struct cpu_state *, double);
112 static void add_spare_cpus(const cpumask_t, int);
113 static void restore_perf(void);
114
115 /* Battery monitoring */
116 static int has_battery(void);
117 static int mon_battery(void);
118 static void low_battery_alert(int);
119
120 /* Runtime states for performance monitoring */
121 static int global_pcpu_limit;
122 static struct cpu_state pcpu_state[MAXCPU];
123 static struct cpu_state global_cpu_state;
124 static cpumask_t cpu_used;              /* cpus w/ high perf */
125 static cpumask_t cpu_pwrdom_used;       /* cpu power domains w/ high perf */
126 static cpumask_t usched_cpu_used;       /* cpus for usched */
127
128 /* Constants */
129 static cpumask_t cpu_pwrdom_mask;       /* usable cpu power domains */
130 static int cpu2pwrdom[MAXCPU];          /* cpu to cpu power domain map */
131 static struct cpu_pwrdom *cpu_pwrdomain[MAXDOM];
132 static int NCpus;                       /* # of cpus */
133 static char orig_global_cx[CST_STRLEN];
134 static char cpu_perf_cx[CST_STRLEN];
135 static int cpu_perf_cxlen;
136 static char cpu_idle_cx[CST_STRLEN];
137 static int cpu_idle_cxlen;
138
139 static int DebugOpt;
140 static int TurboOpt = 1;
141 static int PowerFd;
142 static int Hysteresis = 10;     /* percentage */
143 static double TriggerUp = 0.25; /* single-cpu load to force max freq */
144 static double TriggerDown;      /* load per cpu to force the min freq */
145 static int HasPerfbias = 0;
146 static int AdjustCpuFreq = 1;
147 static int AdjustCstate = 0;
148 static int HighestCpuFreq;
149 static int LowestCpuFreq;
150
151 static volatile int stopped;
152
153 /* Battery life monitoring */
154 static int BatLifeMin = 2;      /* shutdown the box, if low on battery life */
155 static struct timespec BatLifePrevT;
156 static int BatLifePollIntvl = 5; /* unit: sec */
157 static struct timespec BatShutdownStartT;
158 static int BatShutdownLinger = -1;
159 static int BatShutdownLingerSet = 60; /* unit: sec */
160 static int BatShutdownLingerCnt;
161 static int BatShutdownAudioAlert = 1;
162
163 static void sigintr(int signo);
164
165 int
166 main(int ac, char **av)
167 {
168         double srt;
169         double pollrate;
170         int ch;
171         char buf[64];
172         int monbat;
173
174         srt = 8.0;      /* time for samples - 8 seconds */
175         pollrate = 1.0; /* polling rate in seconds */
176
177         while ((ch = getopt(ac, av, "cdefh:l:p:r:tu:B:L:P:QT:")) != -1) {
178                 switch(ch) {
179                 case 'c':
180                         AdjustCstate = 1;
181                         break;
182                 case 'd':
183                         DebugOpt = 1;
184                         break;
185                 case 'e':
186                         HasPerfbias = 1;
187                         break;
188                 case 'f':
189                         AdjustCpuFreq = 0;
190                         break;
191                 case 'h':
192                         HighestCpuFreq = strtol(optarg, NULL, 10);
193                         break;
194                 case 'l':
195                         LowestCpuFreq = strtol(optarg, NULL, 10);
196                         break;
197                 case 'p':
198                         Hysteresis = (int)strtol(optarg, NULL, 10);
199                         break;
200                 case 'r':
201                         pollrate = strtod(optarg, NULL);
202                         break;
203                 case 't':
204                         TurboOpt = 0;
205                         break;
206                 case 'u':
207                         TriggerUp = (double)strtol(optarg, NULL, 10) / 100;
208                         break;
209                 case 'B':
210                         BatLifeMin = strtol(optarg, NULL, 10);
211                         break;
212                 case 'L':
213                         BatShutdownLingerSet = strtol(optarg, NULL, 10);
214                         if (BatShutdownLingerSet < 0)
215                                 BatShutdownLingerSet = 0;
216                         break;
217                 case 'P':
218                         BatLifePollIntvl = strtol(optarg, NULL, 10);
219                         break;
220                 case 'Q':
221                         BatShutdownAudioAlert = 0;
222                         break;
223                 case 'T':
224                         srt = strtod(optarg, NULL);
225                         break;
226                 default:
227                         usage();
228                         /* NOT REACHED */
229                 }
230         }
231         ac -= optind;
232         av += optind;
233
234         setlinebuf(stdout);
235
236         /* Get number of cpus */
237         get_ncpus();
238
239         if (0 > Hysteresis || Hysteresis > 99) {
240                 fprintf(stderr, "Invalid hysteresis value\n");
241                 exit(1);
242         }
243
244         if (0 > TriggerUp || TriggerUp > 1) {
245                 fprintf(stderr, "Invalid load limit value\n");
246                 exit(1);
247         }
248
249         TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100);
250
251         /*
252          * Make sure powerd is not already running.
253          */
254         PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644);
255         if (PowerFd < 0) {
256                 fprintf(stderr,
257                         "Cannot create /var/run/powerd.pid, "
258                         "continuing anyway\n");
259         } else {
260                 if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) {
261                         fprintf(stderr, "powerd is already running\n");
262                         exit(1);
263                 }
264         }
265
266         /*
267          * Demonize and set pid
268          */
269         if (DebugOpt == 0) {
270                 daemon(0, 0);
271                 openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON);
272         }
273
274         if (PowerFd >= 0) {
275                 ftruncate(PowerFd, 0);
276                 snprintf(buf, sizeof(buf), "%d\n", (int)getpid());
277                 write(PowerFd, buf, strlen(buf));
278         }
279
280         /* Do we need to monitor battery life? */
281         if (BatLifePollIntvl <= 0)
282                 monbat = 0;
283         else
284                 monbat = has_battery();
285
286         /* Do we have perfbias(4)? */
287         if (HasPerfbias)
288                 HasPerfbias = has_perfbias();
289
290         /* Could we adjust C-state? */
291         if (AdjustCstate)
292                 AdjustCstate = probe_cstate();
293
294         /*
295          * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel.
296          *
297          * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
298          * taskqueue and ACPI taskqueue is shared across various
299          * ACPI modules, any delay in other modules may cause
300          * hw.acpi.cpu.px_dom* to be created at quite a later time
301          * (e.g. cmbat module's task could take quite a lot of time).
302          */
303         for (;;) {
304                 /* Prime delta cputime calculation. */
305                 get_cputime(pollrate);
306
307                 /* Wait for all cpus to appear */
308                 if (acpi_get_cpupwrdom())
309                         break;
310                 usleep((int)(pollrate * 1000000.0));
311         }
312
313         /*
314          * Catch some signals so that max performance could be restored.
315          */
316         signal(SIGINT, sigintr);
317         signal(SIGTERM, sigintr);
318
319         /* Initialize performance states */
320         init_perf();
321
322         srt = srt / pollrate;   /* convert to sample count */
323         if (DebugOpt)
324                 printf("samples for downgrading: %5.2f\n", srt);
325
326         /*
327          * Monitoring loop
328          */
329         while (!stopped) {
330                 /*
331                  * Monitor performance
332                  */
333                 get_cputime(pollrate);
334                 mon_perf(srt);
335
336                 /*
337                  * Monitor battery
338                  */
339                 if (monbat)
340                         monbat = mon_battery();
341
342                 usleep((int)(pollrate * 1000000.0));
343         }
344
345         /*
346          * Set to maximum performance if killed.
347          */
348         syslog(LOG_INFO, "killed, setting max and exiting");
349         restore_perf();
350
351         exit(0);
352 }
353
354 static void
355 sigintr(int signo __unused)
356 {
357         stopped = 1;
358 }
359
360 /*
361  * Figure out the cpu power domains.
362  */
363 static int
364 acpi_get_cpupwrdom(void)
365 {
366         struct cpu_pwrdom *dom;
367         cpumask_t pwrdom_mask;
368         char buf[64];
369         char members[1024];
370         char *str;
371         size_t msize;
372         int n, i, ncpu = 0, dom_id;
373
374         memset(cpu2pwrdom, 0, sizeof(cpu2pwrdom));
375         memset(cpu_pwrdomain, 0, sizeof(cpu_pwrdomain));
376         CPUMASK_ASSZERO(cpu_pwrdom_mask);
377
378         for (i = 0; i < MAXDOM; ++i) {
379                 snprintf(buf, sizeof(buf),
380                          "hw.acpi.cpu.px_dom%d.available", i);
381                 if (sysctlbyname(buf, NULL, NULL, NULL, 0) < 0)
382                         continue;
383
384                 dom = calloc(1, sizeof(*dom));
385                 dom->dom_id = i;
386
387                 if (cpu_pwrdomain[i] != NULL) {
388                         fprintf(stderr, "cpu power domain %d exists\n", i);
389                         exit(1);
390                 }
391                 cpu_pwrdomain[i] = dom;
392                 CPUMASK_ORBIT(cpu_pwrdom_mask, i);
393         }
394         pwrdom_mask = cpu_pwrdom_mask;
395
396         while (CPUMASK_TESTNZERO(pwrdom_mask)) {
397                 dom_id = BSFCPUMASK(pwrdom_mask);
398                 CPUMASK_NANDBIT(pwrdom_mask, dom_id);
399                 dom = cpu_pwrdomain[dom_id];
400
401                 CPUMASK_ASSZERO(dom->dom_cpumask);
402
403                 snprintf(buf, sizeof(buf),
404                          "hw.acpi.cpu.px_dom%d.members", dom->dom_id);
405                 msize = sizeof(members);
406                 if (sysctlbyname(buf, members, &msize, NULL, 0) < 0) {
407                         cpu_pwrdomain[dom_id] = NULL;
408                         free(dom);
409                         continue;
410                 }
411
412                 members[msize] = 0;
413                 for (str = strtok(members, " "); str; str = strtok(NULL, " ")) {
414                         n = -1;
415                         sscanf(str, "cpu%d", &n);
416                         if (n >= 0) {
417                                 ++ncpu;
418                                 ++dom->dom_ncpus;
419                                 CPUMASK_ORBIT(dom->dom_cpumask, n);
420                                 cpu2pwrdom[n] = dom->dom_id;
421                         }
422                 }
423                 if (dom->dom_ncpus == 0) {
424                         cpu_pwrdomain[dom_id] = NULL;
425                         free(dom);
426                         continue;
427                 }
428                 if (DebugOpt) {
429                         printf("dom%d cpumask: ", dom->dom_id);
430                         for (i = 0; i < (int)NELEM(dom->dom_cpumask.ary); ++i) {
431                                 printf("%jx ",
432                                     (uintmax_t)dom->dom_cpumask.ary[i]);
433                         }
434                         printf("\n");
435                 }
436         }
437
438         if (ncpu != NCpus) {
439                 if (DebugOpt)
440                         printf("Found %d cpus, expecting %d\n", ncpu, NCpus);
441
442                 pwrdom_mask = cpu_pwrdom_mask;
443                 while (CPUMASK_TESTNZERO(pwrdom_mask)) {
444                         dom_id = BSFCPUMASK(pwrdom_mask);
445                         CPUMASK_NANDBIT(pwrdom_mask, dom_id);
446                         dom = cpu_pwrdomain[dom_id];
447                         if (dom != NULL)
448                                 free(dom);
449                 }
450                 return 0;
451         }
452         return 1;
453 }
454
455 /*
456  * Save per-cpu load and sum of per-cpu load.
457  */
458 static void
459 get_cputime(double pollrate)
460 {
461         static struct kinfo_cputime ocpu_time[MAXCPU];
462         static struct kinfo_cputime ncpu_time[MAXCPU];
463         size_t slen;
464         int ncpu;
465         int cpu;
466         uint64_t delta;
467
468         bcopy(ncpu_time, ocpu_time, sizeof(struct kinfo_cputime) * NCpus);
469
470         slen = sizeof(ncpu_time);
471         if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) {
472                 fprintf(stderr, "kern.cputime sysctl not available\n");
473                 exit(1);
474         }
475         ncpu = slen / sizeof(ncpu_time[0]);
476
477         delta = 0;
478         for (cpu = 0; cpu < ncpu; ++cpu) {
479                 uint64_t d;
480
481                 d = (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys +
482                      ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) -
483                     (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys +
484                      ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr);
485                 pcpu_state[cpu].cpu_qavg = (double)d / (pollrate * 1000000.0);
486
487                 delta += d;
488         }
489         global_cpu_state.cpu_qavg = (double)delta / (pollrate * 1000000.0);
490 }
491
492 static void
493 acpi_getcpufreq_str(int dom_id, int *highest0, int *lowest0)
494 {
495         char buf[256], sysid[64];
496         size_t buflen;
497         char *ptr;
498         int v, highest, lowest;
499
500         /*
501          * Retrieve availability list
502          */
503         snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.available",
504             dom_id);
505         buflen = sizeof(buf) - 1;
506         if (sysctlbyname(sysid, buf, &buflen, NULL, 0) < 0)
507                 return;
508         buf[buflen] = 0;
509
510         /*
511          * Parse out the highest and lowest cpu frequencies
512          */
513         ptr = buf;
514         highest = lowest = 0;
515         while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
516                 if ((lowest == 0 || lowest > v) &&
517                     (LowestCpuFreq <= 0 || v >= LowestCpuFreq))
518                         lowest = v;
519                 if ((highest == 0 || highest < v) &&
520                     (HighestCpuFreq <= 0 || v <= HighestCpuFreq))
521                         highest = v;
522                 /* 
523                  * Detect turbo mode
524                  */
525                 if (!TurboOpt && highest - v == 1)
526                         highest = v;
527         }
528
529         *highest0 = highest;
530         *lowest0 = lowest;
531 }
532
533 static int
534 acpi_getcpufreq_bin(int dom_id, int *highest0, int *lowest0)
535 {
536         char sysid[64];
537         int freq[MAXFREQ];
538         size_t freqlen;
539         int freqcnt, i;
540
541         /*
542          * Retrieve availability list
543          */
544         snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.avail", dom_id);
545         freqlen = sizeof(freq);
546         if (sysctlbyname(sysid, freq, &freqlen, NULL, 0) < 0)
547                 return 0;
548
549         freqcnt = freqlen / sizeof(freq[0]);
550         if (freqcnt == 0)
551                 return 0;
552
553         for (i = freqcnt - 1; i >= 0; --i) {
554                 *lowest0 = freq[i];
555                 if (LowestCpuFreq <= 0 || *lowest0 >= LowestCpuFreq)
556                         break;
557         }
558
559         i = 0;
560         *highest0 = freq[0];
561         if (!TurboOpt && freqcnt > 1 && freq[0] - freq[1] == 1) {
562                 i = 1;
563                 *highest0 = freq[1];
564         }
565         for (; i < freqcnt; ++i) {
566                 if (HighestCpuFreq <= 0 || *highest0 <= HighestCpuFreq)
567                         break;
568                 *highest0 = freq[i];
569         }
570         return 1;
571 }
572
573 static void
574 acpi_get_cpufreq(int dom_id, int *highest, int *lowest)
575 {
576         *highest = 0;
577         *lowest = 0;
578
579         if (acpi_getcpufreq_bin(dom_id, highest, lowest))
580                 return;
581         acpi_getcpufreq_str(dom_id, highest, lowest);
582 }
583
584 static
585 void
586 usage(void)
587 {
588         fprintf(stderr, "usage: powerd [-cdeftQ] [-p hysteresis] "
589             "[-h highest_freq] [-l lowest_freq] "
590             "[-r poll_interval] [-u trigger_up] "
591             "[-B min_battery_life] [-L low_battery_linger] "
592             "[-P battery_poll_interval] [-T sample_interval]\n");
593         exit(1);
594 }
595
596 #ifndef timespecsub
597 #define timespecsub(vvp, uvp)                                           \
598         do {                                                            \
599                 (vvp)->tv_sec -= (uvp)->tv_sec;                         \
600                 (vvp)->tv_nsec -= (uvp)->tv_nsec;                       \
601                 if ((vvp)->tv_nsec < 0) {                               \
602                         (vvp)->tv_sec--;                                \
603                         (vvp)->tv_nsec += 1000000000;                   \
604                 }                                                       \
605         } while (0)
606 #endif
607
608 #define BAT_SYSCTL_TIME_MAX     50000000 /* unit: nanosecond */
609
610 static int
611 has_battery(void)
612 {
613         struct timespec s, e;
614         size_t len;
615         int val;
616
617         clock_gettime(CLOCK_MONOTONIC_FAST, &s);
618         BatLifePrevT = s;
619
620         len = sizeof(val);
621         if (sysctlbyname("hw.acpi.acline", &val, &len, NULL, 0) < 0) {
622                 /* No AC line information */
623                 return 0;
624         }
625         clock_gettime(CLOCK_MONOTONIC_FAST, &e);
626
627         timespecsub(&e, &s);
628         if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
629                 /* hw.acpi.acline takes to long to be useful */
630                 syslog(LOG_NOTICE, "hw.acpi.acline takes too long");
631                 return 0;
632         }
633
634         clock_gettime(CLOCK_MONOTONIC_FAST, &s);
635         len = sizeof(val);
636         if (sysctlbyname("hw.acpi.battery.life", &val, &len, NULL, 0) < 0) {
637                 /* No battery life */
638                 return 0;
639         }
640         clock_gettime(CLOCK_MONOTONIC_FAST, &e);
641
642         timespecsub(&e, &s);
643         if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
644                 /* hw.acpi.battery.life takes to long to be useful */
645                 syslog(LOG_NOTICE, "hw.acpi.battery.life takes too long");
646                 return 0;
647         }
648         return 1;
649 }
650
651 static void
652 low_battery_alert(int life)
653 {
654         int fmt, stereo, freq;
655         int fd;
656
657         syslog(LOG_ALERT, "low battery life %d%%, please plugin AC line, #%d",
658             life, BatShutdownLingerCnt);
659         ++BatShutdownLingerCnt;
660
661         if (!BatShutdownAudioAlert)
662                 return;
663
664         fd = open("/dev/dsp", O_WRONLY);
665         if (fd < 0)
666                 return;
667
668         fmt = AFMT_S16_LE;
669         if (ioctl(fd, SNDCTL_DSP_SETFMT, &fmt, sizeof(fmt)) < 0)
670                 goto done;
671
672         stereo = 0;
673         if (ioctl(fd, SNDCTL_DSP_STEREO, &stereo, sizeof(stereo)) < 0)
674                 goto done;
675
676         freq = 44100;
677         if (ioctl(fd, SNDCTL_DSP_SPEED, &freq, sizeof(freq)) < 0)
678                 goto done;
679
680         write(fd, alert1, sizeof(alert1));
681         write(fd, alert1, sizeof(alert1));
682
683 done:
684         close(fd);
685 }
686
687 static int
688 mon_battery(void)
689 {
690         struct timespec cur, ts;
691         int acline, life;
692         size_t len;
693
694         clock_gettime(CLOCK_MONOTONIC_FAST, &cur);
695         ts = cur;
696         timespecsub(&ts, &BatLifePrevT);
697         if (ts.tv_sec < BatLifePollIntvl)
698                 return 1;
699         BatLifePrevT = cur;
700
701         len = sizeof(acline);
702         if (sysctlbyname("hw.acpi.acline", &acline, &len, NULL, 0) < 0)
703                 return 1;
704         if (acline) {
705                 BatShutdownLinger = -1;
706                 BatShutdownLingerCnt = 0;
707                 return 1;
708         }
709
710         len = sizeof(life);
711         if (sysctlbyname("hw.acpi.battery.life", &life, &len, NULL, 0) < 0)
712                 return 1;
713
714         if (BatShutdownLinger > 0) {
715                 ts = cur;
716                 timespecsub(&ts, &BatShutdownStartT);
717                 if (ts.tv_sec > BatShutdownLinger)
718                         BatShutdownLinger = 0;
719         }
720
721         if (life <= BatLifeMin) {
722                 if (BatShutdownLinger == 0 || BatShutdownLingerSet == 0) {
723                         syslog(LOG_ALERT, "low battery life %d%%, "
724                             "shutting down", life);
725                         if (vfork() == 0)
726                                 execlp("poweroff", "poweroff", NULL);
727                         return 0;
728                 } else if (BatShutdownLinger < 0) {
729                         BatShutdownLinger = BatShutdownLingerSet;
730                         BatShutdownStartT = cur;
731                 }
732                 low_battery_alert(life);
733         }
734         return 1;
735 }
736
737 static void
738 get_ncpus(void)
739 {
740         size_t slen;
741
742         slen = sizeof(NCpus);
743         if (sysctlbyname("hw.ncpu", &NCpus, &slen, NULL, 0) < 0)
744                 err(1, "sysctlbyname hw.ncpu failed");
745         if (DebugOpt)
746                 printf("hw.ncpu %d\n", NCpus);
747 }
748
749 static void
750 get_uschedcpus(void)
751 {
752         size_t slen;
753
754         slen = sizeof(usched_cpu_used);
755         if (sysctlbyname("kern.usched_global_cpumask", &usched_cpu_used, &slen,
756             NULL, 0) < 0)
757                 err(1, "sysctlbyname kern.usched_global_cpumask failed");
758         if (DebugOpt) {
759                 int i;
760
761                 printf("usched cpumask was: ");
762                 for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i)
763                         printf("%jx ", (uintmax_t)usched_cpu_used.ary[i]);
764                 printf("\n");
765         }
766 }
767
768 static void
769 set_uschedcpus(void)
770 {
771         if (DebugOpt) {
772                 int i;
773
774                 printf("usched cpumask: ");
775                 for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i) {
776                         printf("%jx ",
777                             (uintmax_t)usched_cpu_used.ary[i]);
778                 }
779                 printf("\n");
780         }
781         sysctlbyname("kern.usched_global_cpumask", NULL, 0,
782             &usched_cpu_used, sizeof(usched_cpu_used));
783 }
784
785 static int
786 has_perfbias(void)
787 {
788         size_t len;
789         int hint;
790
791         len = sizeof(hint);
792         if (sysctlbyname("machdep.perfbias0.hint", &hint, &len, NULL, 0) < 0)
793                 return 0;
794         return 1;
795 }
796
797 static void
798 set_perfbias(int cpu, int inc)
799 {
800         int hint = inc ? 0 : 15;
801         char sysid[64];
802
803         if (DebugOpt)
804                 printf("cpu%d set perfbias hint %d\n", cpu, hint);
805         snprintf(sysid, sizeof(sysid), "machdep.perfbias%d.hint", cpu);
806         sysctlbyname(sysid, NULL, NULL, &hint, sizeof(hint));
807 }
808
809 static void
810 init_perf(void)
811 {
812         struct cpu_state *state;
813         int cpu;
814
815         /* Get usched cpumask */
816         get_uschedcpus();
817
818         /*
819          * Assume everything are used and are maxed out, before we
820          * start.
821          */
822
823         CPUMASK_ASSBMASK(cpu_used, NCpus);
824         cpu_pwrdom_used = cpu_pwrdom_mask;
825         global_pcpu_limit = NCpus;
826
827         for (cpu = 0; cpu < NCpus; ++cpu) {
828                 state = &pcpu_state[cpu];
829
830                 state->cpu_uavg = 0.0;
831                 state->cpu_davg = 0.0;
832                 state->cpu_limit = 1;
833                 state->cpu_count = 1;
834                 snprintf(state->cpu_name, sizeof(state->cpu_name), "cpu%d",
835                     cpu);
836         }
837
838         state = &global_cpu_state;
839         state->cpu_uavg = 0.0;
840         state->cpu_davg = 0.0;
841         state->cpu_limit = NCpus;
842         state->cpu_count = NCpus;
843         strlcpy(state->cpu_name, "global", sizeof(state->cpu_name));
844 }
845
846 static int
847 get_nstate(struct cpu_state *state, double srt)
848 {
849         int ustate, dstate, nstate;
850
851         /* speeding up */
852         state->cpu_uavg = (state->cpu_uavg * 2.0 + state->cpu_qavg) / 3.0;
853         /* slowing down */
854         state->cpu_davg = (state->cpu_davg * srt + state->cpu_qavg) / (srt + 1);
855         if (state->cpu_davg < state->cpu_uavg)
856                 state->cpu_davg = state->cpu_uavg;
857
858         ustate = state->cpu_uavg / TriggerUp;
859         if (ustate < state->cpu_limit)
860                 ustate = state->cpu_uavg / TriggerDown;
861         dstate = state->cpu_davg / TriggerUp;
862         if (dstate < state->cpu_limit)
863                 dstate = state->cpu_davg / TriggerDown;
864
865         nstate = (ustate > dstate) ? ustate : dstate;
866         if (nstate > state->cpu_count)
867                 nstate = state->cpu_count;
868
869         if (DebugOpt) {
870                 printf("%s qavg=%5.2f uavg=%5.2f davg=%5.2f "
871                     "%2d ncpus=%d\n", state->cpu_name,
872                     state->cpu_qavg, state->cpu_uavg, state->cpu_davg,
873                     state->cpu_limit, nstate);
874         }
875         return nstate;
876 }
877
878 static void
879 mon_perf(double srt)
880 {
881         cpumask_t ocpu_used, ocpu_pwrdom_used;
882         int pnstate = 0, nstate;
883         int cpu;
884
885         /*
886          * Find cpus requiring performance and their cooresponding power
887          * domains.  Save the number of cpus requiring performance in
888          * pnstate.
889          */
890         ocpu_used = cpu_used;
891         ocpu_pwrdom_used = cpu_pwrdom_used;
892
893         CPUMASK_ASSZERO(cpu_used);
894         CPUMASK_ASSZERO(cpu_pwrdom_used);
895
896         for (cpu = 0; cpu < NCpus; ++cpu) {
897                 struct cpu_state *state = &pcpu_state[cpu];
898                 int s;
899
900                 s = get_nstate(state, srt);
901                 if (s) {
902                         CPUMASK_ORBIT(cpu_used, cpu);
903                         CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
904                 }
905                 pnstate += s;
906
907                 state->cpu_limit = s;
908         }
909
910         /*
911          * Calculate nstate, the number of cpus we wish to run at max
912          * performance.
913          */
914         nstate = get_nstate(&global_cpu_state, srt);
915
916         if (nstate == global_cpu_state.cpu_limit &&
917             (pnstate == global_pcpu_limit || nstate > pnstate)) {
918                 /* Nothing changed; keep the sets */
919                 cpu_used = ocpu_used;
920                 cpu_pwrdom_used = ocpu_pwrdom_used;
921
922                 global_pcpu_limit = pnstate;
923                 return;
924         }
925         global_pcpu_limit = pnstate;
926
927         if (nstate > pnstate) {
928                 /*
929                  * Add spare cpus to meet global performance requirement.
930                  */
931                 add_spare_cpus(ocpu_used, nstate - pnstate);
932         }
933
934         global_cpu_state.cpu_limit = nstate;
935
936         /*
937          * Adjust cpu and cpu power domain performance
938          */
939         adj_perf(ocpu_used, ocpu_pwrdom_used);
940 }
941
942 static void
943 add_spare_cpus(const cpumask_t ocpu_used, int ncpu)
944 {
945         cpumask_t saved_pwrdom, xcpu_used;
946         int done = 0, cpu;
947
948         /*
949          * Find more cpus in the previous cpu set.
950          */
951         xcpu_used = cpu_used;
952         CPUMASK_XORMASK(xcpu_used, ocpu_used);
953         while (CPUMASK_TESTNZERO(xcpu_used)) {
954                 cpu = BSFCPUMASK(xcpu_used);
955                 CPUMASK_NANDBIT(xcpu_used, cpu);
956
957                 if (CPUMASK_TESTBIT(ocpu_used, cpu)) {
958                         CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
959                         CPUMASK_ORBIT(cpu_used, cpu);
960                         --ncpu;
961                         if (ncpu == 0)
962                                 return;
963                 }
964         }
965
966         /*
967          * Find more cpus in the used cpu power domains.
968          */
969         saved_pwrdom = cpu_pwrdom_used;
970 again:
971         while (CPUMASK_TESTNZERO(saved_pwrdom)) {
972                 cpumask_t unused_cpumask;
973                 int dom;
974
975                 dom = BSFCPUMASK(saved_pwrdom);
976                 CPUMASK_NANDBIT(saved_pwrdom, dom);
977
978                 unused_cpumask = cpu_pwrdomain[dom]->dom_cpumask;
979                 CPUMASK_NANDMASK(unused_cpumask, cpu_used);
980
981                 while (CPUMASK_TESTNZERO(unused_cpumask)) {
982                         cpu = BSFCPUMASK(unused_cpumask);
983                         CPUMASK_NANDBIT(unused_cpumask, cpu);
984
985                         CPUMASK_ORBIT(cpu_pwrdom_used, dom);
986                         CPUMASK_ORBIT(cpu_used, cpu);
987                         --ncpu;
988                         if (ncpu == 0)
989                                 return;
990                 }
991         }
992         if (!done) {
993                 done = 1;
994                 /*
995                  * Find more cpus in unused cpu power domains
996                  */
997                 saved_pwrdom = cpu_pwrdom_mask;
998                 CPUMASK_NANDMASK(saved_pwrdom, cpu_pwrdom_used);
999                 goto again;
1000         }
1001         if (DebugOpt)
1002                 printf("%d cpus not found\n", ncpu);
1003 }
1004
1005 static void
1006 acpi_set_cpufreq(int dom, int inc)
1007 {
1008         int lowest, highest, desired;
1009         char sysid[64];
1010
1011         acpi_get_cpufreq(dom, &highest, &lowest);
1012         if (highest == 0 || lowest == 0)
1013                 return;
1014         desired = inc ? highest : lowest;
1015
1016         if (DebugOpt)
1017                 printf("dom%d set frequency %d\n", dom, desired);
1018         snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.select", dom);
1019         sysctlbyname(sysid, NULL, NULL, &desired, sizeof(desired));
1020 }
1021
1022 static void
1023 adj_cpu_pwrdom(int dom, int inc)
1024 {
1025         if (AdjustCpuFreq)
1026                 acpi_set_cpufreq(dom, inc);
1027 }
1028
1029 static void
1030 adj_cpu_perf(int cpu, int inc)
1031 {
1032         if (DebugOpt) {
1033                 if (inc)
1034                         printf("cpu%d increase perf\n", cpu);
1035                 else
1036                         printf("cpu%d decrease perf\n", cpu);
1037         }
1038
1039         if (HasPerfbias)
1040                 set_perfbias(cpu, inc);
1041         if (AdjustCstate)
1042                 set_cstate(cpu, inc);
1043 }
1044
1045 static void
1046 adj_perf(cpumask_t xcpu_used, cpumask_t xcpu_pwrdom_used)
1047 {
1048         cpumask_t old_usched_used;
1049         int cpu, inc;
1050
1051         /*
1052          * Set cpus requiring performance to the userland process
1053          * scheduler.  Leave the rest of cpus unmapped.
1054          */
1055         old_usched_used = usched_cpu_used;
1056         usched_cpu_used = cpu_used;
1057         if (CPUMASK_TESTZERO(usched_cpu_used))
1058                 CPUMASK_ORBIT(usched_cpu_used, 0);
1059         if (CPUMASK_CMPMASKNEQ(usched_cpu_used, old_usched_used))
1060                 set_uschedcpus();
1061
1062         /*
1063          * Adjust per-cpu performance.
1064          */
1065         CPUMASK_XORMASK(xcpu_used, cpu_used);
1066         while (CPUMASK_TESTNZERO(xcpu_used)) {
1067                 cpu = BSFCPUMASK(xcpu_used);
1068                 CPUMASK_NANDBIT(xcpu_used, cpu);
1069
1070                 if (CPUMASK_TESTBIT(cpu_used, cpu)) {
1071                         /* Increase cpu performance */
1072                         inc = 1;
1073                 } else {
1074                         /* Decrease cpu performance */
1075                         inc = 0;
1076                 }
1077                 adj_cpu_perf(cpu, inc);
1078         }
1079
1080         /*
1081          * Adjust cpu power domain performance.  This could affect
1082          * a set of cpus.
1083          */
1084         CPUMASK_XORMASK(xcpu_pwrdom_used, cpu_pwrdom_used);
1085         while (CPUMASK_TESTNZERO(xcpu_pwrdom_used)) {
1086                 int dom;
1087
1088                 dom = BSFCPUMASK(xcpu_pwrdom_used);
1089                 CPUMASK_NANDBIT(xcpu_pwrdom_used, dom);
1090
1091                 if (CPUMASK_TESTBIT(cpu_pwrdom_used, dom)) {
1092                         /* Increase cpu power domain performance */
1093                         inc = 1;
1094                 } else {
1095                         /* Decrease cpu power domain performance */
1096                         inc = 0;
1097                 }
1098                 adj_cpu_pwrdom(dom, inc);
1099         }
1100 }
1101
1102 static void
1103 restore_perf(void)
1104 {
1105         cpumask_t ocpu_used, ocpu_pwrdom_used;
1106
1107         /* Remove highest cpu frequency limitation */
1108         HighestCpuFreq = 0;
1109
1110         ocpu_used = cpu_used;
1111         ocpu_pwrdom_used = cpu_pwrdom_used;
1112
1113         /* Max out all cpus and cpu power domains performance */
1114         CPUMASK_ASSBMASK(cpu_used, NCpus);
1115         cpu_pwrdom_used = cpu_pwrdom_mask;
1116
1117         adj_perf(ocpu_used, ocpu_pwrdom_used);
1118
1119         if (AdjustCstate) {
1120                 /*
1121                  * Restore the original mwait C-state
1122                  */
1123                 if (DebugOpt)
1124                         printf("global set cstate %s\n", orig_global_cx);
1125                 sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1126                     orig_global_cx, strlen(orig_global_cx) + 1);
1127         }
1128 }
1129
1130 static int
1131 probe_cstate(void)
1132 {
1133         char cx_supported[1024];
1134         const char *target;
1135         char *ptr;
1136         int idle_hlt, deep = 1;
1137         size_t len;
1138
1139         len = sizeof(idle_hlt);
1140         if (sysctlbyname("machdep.cpu_idle_hlt", &idle_hlt, &len, NULL, 0) < 0)
1141                 return 0;
1142         if (idle_hlt != 1)
1143                 return 0;
1144
1145         len = sizeof(cx_supported);
1146         if (sysctlbyname("machdep.mwait.CX.supported", cx_supported, &len,
1147             NULL, 0) < 0)
1148                 return 0;
1149
1150         len = sizeof(orig_global_cx);
1151         if (sysctlbyname("machdep.mwait.CX.idle", orig_global_cx, &len,
1152             NULL, 0) < 0)
1153                 return 0;
1154
1155         strlcpy(cpu_perf_cx, "AUTODEEP", sizeof(cpu_perf_cx));
1156         cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1157         if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1158             cpu_perf_cx, cpu_perf_cxlen) < 0) {
1159                 /* AUTODEEP is not supported; try AUTO */
1160                 deep = 0;
1161                 strlcpy(cpu_perf_cx, "AUTO", sizeof(cpu_perf_cx));
1162                 cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1163                 if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1164                     cpu_perf_cx, cpu_perf_cxlen) < 0)
1165                         return 0;
1166         }
1167
1168         if (!deep)
1169                 target = "C2/0";
1170         else
1171                 target = NULL;
1172         for (ptr = strtok(cx_supported, " "); ptr != NULL;
1173              ptr = strtok(NULL, " ")) {
1174                 if (target == NULL ||
1175                     (target != NULL && strcmp(ptr, target) == 0)) {
1176                         strlcpy(cpu_idle_cx, ptr, sizeof(cpu_idle_cx));
1177                         cpu_idle_cxlen = strlen(cpu_idle_cx) + 1;
1178                         if (target != NULL)
1179                                 break;
1180                 }
1181         }
1182         if (cpu_idle_cxlen == 0)
1183                 return 0;
1184
1185         if (DebugOpt) {
1186                 printf("cstate orig %s, perf %s, idle %s\n",
1187                     orig_global_cx, cpu_perf_cx, cpu_idle_cx);
1188         }
1189         return 1;
1190 }
1191
1192 static void
1193 set_cstate(int cpu, int inc)
1194 {
1195         const char *cst;
1196         char sysid[64];
1197         size_t len;
1198
1199         if (inc) {
1200                 cst = cpu_perf_cx;
1201                 len = cpu_perf_cxlen;
1202         } else {
1203                 cst = cpu_idle_cx;
1204                 len = cpu_idle_cxlen;
1205         }
1206
1207         if (DebugOpt)
1208                 printf("cpu%d set cstate %s\n", cpu, cst);
1209         snprintf(sysid, sizeof(sysid), "machdep.mwait.CX.idle%d", cpu);
1210         sysctlbyname(sysid, NULL, NULL, cst, len);
1211 }