<sys/time.h>: Add 3rd arg to timespecadd()/sub() and make them public.
[dragonfly.git] / usr.sbin / powerd / powerd.c
1 /*
2  * Copyright (c) 2010,2016 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 /*
36  * The powerd daemon :
37  * - Monitor the cpu load and adjusts cpu and cpu power domain
38  *   performance accordingly.
39  * - Monitor battery life.  Alarm alerts and shutdown the machine
40  *   if battery life goes low.
41  */
42
43 #define _KERNEL_STRUCTURES
44 #include <sys/types.h>
45 #include <sys/sysctl.h>
46 #include <sys/kinfo.h>
47 #include <sys/file.h>
48 #include <sys/queue.h>
49 #include <sys/soundcard.h>
50 #include <sys/sensors.h>
51 #include <sys/time.h>
52 #include <machine/cpufunc.h>
53 #include <machine/cpumask.h>
54 #include <err.h>
55 #include <signal.h>
56 #include <stdio.h>
57 #include <stdlib.h>
58 #include <unistd.h>
59 #include <string.h>
60 #include <syslog.h>
61
62 #include "alert1.h"
63
64 #define MAXDOM          MAXCPU  /* worst case, 1 cpu per domain */
65
66 #define MAXFREQ         64
67 #define CST_STRLEN      16
68
69 #define NFREQ_MONPERF   0x0001
70 #define NFREQ_ADJPERF   0x0002
71 #define NFREQ_CPUTEMP   0x0004
72
73 #define NFREQ_ALL       (NFREQ_MONPERF | NFREQ_ADJPERF | NFREQ_CPUTEMP)
74
75 struct cpu_pwrdom {
76         TAILQ_ENTRY(cpu_pwrdom) dom_link;
77         int                     dom_id;
78         int                     dom_ncpus;
79         cpumask_t               dom_cpumask;
80 };
81
82 struct cpu_state {
83         double                  cpu_qavg;
84         double                  cpu_uavg;       /* used for speeding up */
85         double                  cpu_davg;       /* used for slowing down */
86         int                     cpu_limit;
87         int                     cpu_count;
88         char                    cpu_name[8];
89 };
90
91 static void usage(void);
92 static void get_ncpus(void);
93 static void mon_cputemp(void);
94
95 /* usched cpumask */
96 static void get_uschedcpus(void);
97 static void set_uschedcpus(void);
98
99 /* perfbias(4) */
100 static int has_perfbias(void);
101 static void set_perfbias(int, int);
102
103 /* acpi(4) P-state */
104 static void acpi_getcpufreq_str(int, int *, int *);
105 static int acpi_getcpufreq_bin(int, int *, int *);
106 static void acpi_get_cpufreq(int, int *, int *);
107 static void acpi_set_cpufreq(int, int);
108 static int acpi_get_cpupwrdom(void);
109
110 /* mwait C-state hint */
111 static int probe_cstate(void);
112 static void set_cstate(int, int);
113
114 /* Performance monitoring */
115 static void init_perf(void);
116 static void mon_perf(double);
117 static void adj_perf(cpumask_t, cpumask_t);
118 static void adj_cpu_pwrdom(int, int);
119 static void adj_cpu_perf(int, int);
120 static void get_cputime(double);
121 static int get_nstate(struct cpu_state *, double);
122 static void add_spare_cpus(const cpumask_t, int);
123 static void restore_perf(void);
124 static void set_global_freq(int freq);
125
126 /* Battery monitoring */
127 static int has_battery(void);
128 static int mon_battery(void);
129 static void low_battery_alert(int);
130
131 /* Backlight */
132 static void restore_backlight(void);
133
134 /* Runtime states for performance monitoring */
135 static int global_pcpu_limit;
136 static struct cpu_state pcpu_state[MAXCPU];
137 static struct cpu_state global_cpu_state;
138 static cpumask_t cpu_used;              /* cpus w/ high perf */
139 static cpumask_t cpu_pwrdom_used;       /* cpu power domains w/ high perf */
140 static cpumask_t usched_cpu_used;       /* cpus for usched */
141
142 /* Constants */
143 static cpumask_t cpu_pwrdom_mask;       /* usable cpu power domains */
144 static int cpu2pwrdom[MAXCPU];          /* cpu to cpu power domain map */
145 static struct cpu_pwrdom *cpu_pwrdomain[MAXDOM];
146 static int NCpus;                       /* # of cpus */
147 static char orig_global_cx[CST_STRLEN];
148 static char cpu_perf_cx[CST_STRLEN];
149 static int cpu_perf_cxlen;
150 static char cpu_idle_cx[CST_STRLEN];
151 static int cpu_idle_cxlen;
152 static int FreqAry[MAXFREQ];
153 static int NFreq;
154 static int NFreqChanged = NFREQ_ALL;
155 static int SavedPXGlobal;
156
157 static int DebugOpt;
158 static int TurboOpt = 1;
159 static int PowerFd;
160 static int Hysteresis = 10;     /* percentage */
161 static double TriggerUp = 0.25; /* single-cpu load to force max freq */
162 static double TriggerDown;      /* load per cpu to force the min freq */
163 static int HasPerfbias = 0;
164 static int AdjustCpuFreq = 1;
165 static int AdjustCstate = 0;
166 static int HighestCpuFreq;
167 static int LowestCpuFreq;
168 static int AdjustUsched = 1;
169
170 static int AdjustCpuFreqOverride;
171
172 static volatile int stopped;
173
174 /* Battery life monitoring */
175 static int BatLifeMin = 2;      /* shutdown the box, if low on battery life */
176 static struct timespec BatLifePrevT;
177 static int BatLifePollIntvl = 5; /* unit: sec */
178 static struct timespec BatShutdownStartT;
179 static int BatShutdownLinger = -1;
180 static int BatShutdownLingerSet = 60; /* unit: sec */
181 static int BatShutdownLingerCnt;
182 static int BatShutdownAudioAlert = 1;
183 static int MinTemp = 75;
184 static int MaxTemp = 85;
185 static int BackLightPct = 100;
186 static int OldBackLightLevel;
187 static int BackLightDown;
188
189 static void sigintr(int signo);
190
191 int
192 main(int ac, char **av)
193 {
194         double srt;
195         double pollrate;
196         int ch;
197         int lowest;
198         int highest;
199         char buf[64];
200         int monbat;
201         char *p2;
202
203         srt = 8.0;      /* time for samples - 8 seconds */
204         pollrate = 1.0; /* polling rate in seconds */
205
206         while ((ch = getopt(ac, av, "b:cdefh:l:p:r:tu:B:H:L:P:QT:U")) != -1) {
207                 switch(ch) {
208                 case 'b':
209                         BackLightPct = strtol(optarg, NULL, 10);
210                         break;
211                 case 'c':
212                         AdjustCstate = 1;
213                         break;
214                 case 'd':
215                         DebugOpt = 1;
216                         break;
217                 case 'e':
218                         HasPerfbias = 1;
219                         break;
220                 case 'f':
221                         AdjustCpuFreq = 0;
222                         break;
223                 case 'h':
224                         HighestCpuFreq = strtol(optarg, NULL, 10);
225                         break;
226                 case 'l':
227                         LowestCpuFreq = strtol(optarg, NULL, 10);
228                         break;
229                 case 'p':
230                         Hysteresis = (int)strtol(optarg, NULL, 10);
231                         break;
232                 case 'r':
233                         pollrate = strtod(optarg, NULL);
234                         break;
235                 case 't':
236                         TurboOpt = 0;
237                         break;
238                 case 'u':
239                         TriggerUp = (double)strtol(optarg, NULL, 10) / 100;
240                         break;
241                 case 'B':
242                         BatLifeMin = strtol(optarg, NULL, 10);
243                         break;
244                 case 'H':
245                         MaxTemp = strtol(optarg, &p2, 0);
246                         if (*p2 == ':') {
247                                 MinTemp = MaxTemp;
248                                 MaxTemp = strtol(p2 + 1, NULL, 0);
249                         } else {
250                                 MinTemp = MaxTemp * 9 / 10;
251                         }
252                         break;
253                 case 'L':
254                         BatShutdownLingerSet = strtol(optarg, NULL, 10);
255                         if (BatShutdownLingerSet < 0)
256                                 BatShutdownLingerSet = 0;
257                         break;
258                 case 'P':
259                         BatLifePollIntvl = strtol(optarg, NULL, 10);
260                         break;
261                 case 'Q':
262                         BatShutdownAudioAlert = 0;
263                         break;
264                 case 'T':
265                         srt = strtod(optarg, NULL);
266                         break;
267                 case 'U':
268                         AdjustUsched = 0;
269                         break;
270                 default:
271                         usage();
272                         /* NOT REACHED */
273                 }
274         }
275         ac -= optind;
276         av += optind;
277
278         setlinebuf(stdout);
279
280         /* Get number of cpus */
281         get_ncpus();
282
283         /* Seed FreqAry[] */
284         acpi_get_cpufreq(0, &lowest, &highest);
285
286         if (Hysteresis < 0 || Hysteresis > 99) {
287                 fprintf(stderr, "Invalid hysteresis value\n");
288                 exit(1);
289         }
290
291         if (TriggerUp < 0 || TriggerUp > 1) {
292                 fprintf(stderr, "Invalid load limit value\n");
293                 exit(1);
294         }
295
296         if (BackLightPct > 100 || BackLightPct <= 0) {
297                 fprintf(stderr, "Invalid backlight setting, ignore\n");
298                 BackLightPct = 100;
299         }
300
301         TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100);
302
303         /*
304          * Make sure powerd is not already running.
305          */
306         PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644);
307         if (PowerFd < 0) {
308                 fprintf(stderr,
309                         "Cannot create /var/run/powerd.pid, "
310                         "continuing anyway\n");
311         } else {
312                 ssize_t r;
313                 pid_t pid = -1;
314
315                 r = read(PowerFd, buf, sizeof(buf) - 1);
316                 if (r > 0) {
317                         buf[r] = 0;
318                         pid = strtol(buf, NULL, 0);
319                 }
320                 if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) {
321                         if (pid > 0) {
322                                 kill(pid, SIGTERM);
323                                 flock(PowerFd, LOCK_EX);
324                                 fprintf(stderr, "restarting powerd\n");
325                         } else {
326                                 fprintf(stderr,
327                                         "powerd is already running, "
328                                         "unable to kill pid for restart\n");
329                                 exit(1);
330                         }
331                 }
332                 lseek(PowerFd, 0L, 0);
333         }
334
335         /*
336          * Demonize and set pid
337          */
338         if (DebugOpt == 0) {
339                 daemon(0, 0);
340                 openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON);
341         }
342
343         if (PowerFd >= 0) {
344                 ftruncate(PowerFd, 0);
345                 snprintf(buf, sizeof(buf), "%d\n", (int)getpid());
346                 write(PowerFd, buf, strlen(buf));
347         }
348
349         /* Do we need to monitor battery life? */
350         if (BatLifePollIntvl <= 0)
351                 monbat = 0;
352         else
353                 monbat = has_battery();
354
355         /* Do we have perfbias(4)? */
356         if (HasPerfbias)
357                 HasPerfbias = has_perfbias();
358
359         /* Could we adjust C-state? */
360         if (AdjustCstate)
361                 AdjustCstate = probe_cstate();
362
363         /*
364          * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel.
365          *
366          * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
367          * taskqueue and ACPI taskqueue is shared across various
368          * ACPI modules, any delay in other modules may cause
369          * hw.acpi.cpu.px_dom* to be created at quite a later time
370          * (e.g. cmbat module's task could take quite a lot of time).
371          */
372         for (;;) {
373                 /* Prime delta cputime calculation. */
374                 get_cputime(pollrate);
375
376                 /* Wait for all cpus to appear */
377                 if (acpi_get_cpupwrdom())
378                         break;
379                 usleep((int)(pollrate * 1000000.0));
380         }
381
382         /*
383          * Catch some signals so that max performance could be restored.
384          */
385         signal(SIGINT, sigintr);
386         signal(SIGTERM, sigintr);
387
388         /* Initialize performance states */
389         init_perf();
390
391         srt = srt / pollrate;   /* convert to sample count */
392         if (DebugOpt)
393                 printf("samples for downgrading: %5.2f\n", srt);
394
395         /*
396          * Monitoring loop
397          */
398         while (!stopped) {
399                 /*
400                  * Monitor performance
401                  */
402                 get_cputime(pollrate);
403                 mon_cputemp();
404                 mon_perf(srt);
405
406                 /*
407                  * Monitor battery
408                  */
409                 if (monbat)
410                         monbat = mon_battery();
411
412                 usleep((int)(pollrate * 1000000.0));
413         }
414
415         /*
416          * Set to maximum performance if killed.
417          */
418         syslog(LOG_INFO, "killed, setting max and exiting");
419         if (SavedPXGlobal)
420                 set_global_freq(SavedPXGlobal);
421         restore_perf();
422         restore_backlight();
423
424         exit(0);
425 }
426
427 static void
428 sigintr(int signo __unused)
429 {
430         stopped = 1;
431 }
432
433 /*
434  * Figure out the cpu power domains.
435  */
436 static int
437 acpi_get_cpupwrdom(void)
438 {
439         struct cpu_pwrdom *dom;
440         cpumask_t pwrdom_mask;
441         char buf[64];
442         char members[1024];
443         char *str;
444         size_t msize;
445         int n, i, ncpu = 0, dom_id;
446
447         memset(cpu2pwrdom, 0, sizeof(cpu2pwrdom));
448         memset(cpu_pwrdomain, 0, sizeof(cpu_pwrdomain));
449         CPUMASK_ASSZERO(cpu_pwrdom_mask);
450
451         for (i = 0; i < MAXDOM; ++i) {
452                 snprintf(buf, sizeof(buf),
453                          "hw.acpi.cpu.px_dom%d.available", i);
454                 if (sysctlbyname(buf, NULL, NULL, NULL, 0) < 0)
455                         continue;
456
457                 dom = calloc(1, sizeof(*dom));
458                 dom->dom_id = i;
459
460                 if (cpu_pwrdomain[i] != NULL) {
461                         fprintf(stderr, "cpu power domain %d exists\n", i);
462                         exit(1);
463                 }
464                 cpu_pwrdomain[i] = dom;
465                 CPUMASK_ORBIT(cpu_pwrdom_mask, i);
466         }
467         pwrdom_mask = cpu_pwrdom_mask;
468
469         while (CPUMASK_TESTNZERO(pwrdom_mask)) {
470                 dom_id = BSFCPUMASK(pwrdom_mask);
471                 CPUMASK_NANDBIT(pwrdom_mask, dom_id);
472                 dom = cpu_pwrdomain[dom_id];
473
474                 CPUMASK_ASSZERO(dom->dom_cpumask);
475
476                 snprintf(buf, sizeof(buf),
477                          "hw.acpi.cpu.px_dom%d.members", dom->dom_id);
478                 msize = sizeof(members);
479                 if (sysctlbyname(buf, members, &msize, NULL, 0) < 0) {
480                         cpu_pwrdomain[dom_id] = NULL;
481                         free(dom);
482                         continue;
483                 }
484
485                 members[msize] = 0;
486                 for (str = strtok(members, " "); str; str = strtok(NULL, " ")) {
487                         n = -1;
488                         sscanf(str, "cpu%d", &n);
489                         if (n >= 0) {
490                                 ++ncpu;
491                                 ++dom->dom_ncpus;
492                                 CPUMASK_ORBIT(dom->dom_cpumask, n);
493                                 cpu2pwrdom[n] = dom->dom_id;
494                         }
495                 }
496                 if (dom->dom_ncpus == 0) {
497                         cpu_pwrdomain[dom_id] = NULL;
498                         free(dom);
499                         continue;
500                 }
501                 if (DebugOpt) {
502                         printf("dom%d cpumask: ", dom->dom_id);
503                         for (i = 0; i < (int)NELEM(dom->dom_cpumask.ary); ++i) {
504                                 printf("%jx ",
505                                     (uintmax_t)dom->dom_cpumask.ary[i]);
506                         }
507                         printf("\n");
508                 }
509         }
510
511         if (ncpu != NCpus) {
512                 if (DebugOpt)
513                         printf("Found %d cpus, expecting %d\n", ncpu, NCpus);
514
515                 pwrdom_mask = cpu_pwrdom_mask;
516                 while (CPUMASK_TESTNZERO(pwrdom_mask)) {
517                         dom_id = BSFCPUMASK(pwrdom_mask);
518                         CPUMASK_NANDBIT(pwrdom_mask, dom_id);
519                         dom = cpu_pwrdomain[dom_id];
520                         if (dom != NULL)
521                                 free(dom);
522                 }
523                 return 0;
524         }
525         return 1;
526 }
527
528 /*
529  * Save per-cpu load and sum of per-cpu load.
530  */
531 static void
532 get_cputime(double pollrate)
533 {
534         static struct kinfo_cputime ocpu_time[MAXCPU];
535         static struct kinfo_cputime ncpu_time[MAXCPU];
536         size_t slen;
537         int ncpu;
538         int cpu;
539         uint64_t delta;
540
541         bcopy(ncpu_time, ocpu_time, sizeof(struct kinfo_cputime) * NCpus);
542
543         slen = sizeof(ncpu_time);
544         if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) {
545                 fprintf(stderr, "kern.cputime sysctl not available\n");
546                 exit(1);
547         }
548         ncpu = slen / sizeof(ncpu_time[0]);
549
550         delta = 0;
551         for (cpu = 0; cpu < ncpu; ++cpu) {
552                 uint64_t d;
553
554                 d = (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys +
555                      ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) -
556                     (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys +
557                      ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr);
558                 pcpu_state[cpu].cpu_qavg = (double)d / (pollrate * 1000000.0);
559
560                 delta += d;
561         }
562         global_cpu_state.cpu_qavg = (double)delta / (pollrate * 1000000.0);
563 }
564
565 static void
566 acpi_getcpufreq_str(int dom_id, int *highest0, int *lowest0)
567 {
568         char buf[256], sysid[64];
569         size_t buflen;
570         char *ptr;
571         int v, highest, lowest;
572         int freqidx;
573
574         /*
575          * Retrieve availability list
576          */
577         snprintf(sysid, sizeof(sysid),
578                  "hw.acpi.cpu.px_dom%d.available", dom_id);
579         buflen = sizeof(buf) - 1;
580         if (sysctlbyname(sysid, buf, &buflen, NULL, 0) < 0)
581                 return;
582         buf[buflen] = 0;
583
584         /*
585          * Parse out the highest and lowest cpu frequencies
586          */
587         ptr = buf;
588         highest = lowest = 0;
589         freqidx = 0;
590         while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
591                 if ((lowest == 0 || lowest > v) &&
592                     (LowestCpuFreq <= 0 || v >= LowestCpuFreq))
593                         lowest = v;
594                 if ((highest == 0 || highest < v) &&
595                     (HighestCpuFreq <= 0 || v <= HighestCpuFreq))
596                         highest = v;
597                 /* 
598                  * Detect turbo mode
599                  */
600                 if (!TurboOpt && highest - v == 1)
601                         highest = v;
602                 ++freqidx;
603         }
604
605         /*
606          * Frequency array
607          */
608         if (freqidx > MAXFREQ)
609                 freqidx = MAXFREQ;
610         if (NFreq != freqidx) {
611                 NFreq = freqidx;
612                 NFreqChanged = NFREQ_ALL;
613         }
614         ptr = buf;
615         while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
616                 if (freqidx == 0)
617                         break;
618                 if (FreqAry[freqidx - 1] != v)
619                         NFreqChanged = NFREQ_ALL;
620                 FreqAry[--freqidx] = v;
621         }
622
623         *highest0 = highest;
624         *lowest0 = lowest;
625 }
626
627 static int
628 acpi_getcpufreq_bin(int dom_id, int *highest0, int *lowest0)
629 {
630         char sysid[64];
631         size_t freqlen;
632         int freqcnt, i;
633         int freqary[MAXFREQ];
634
635         /*
636          * Retrieve availability list
637          */
638         snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.avail", dom_id);
639         freqlen = sizeof(FreqAry);
640         bzero(freqary, sizeof(freqary));
641         if (sysctlbyname(sysid, freqary, &freqlen, NULL, 0) < 0)
642                 return 0;
643
644         freqcnt = freqlen / sizeof(freqary[0]);
645         if (NFreq != freqcnt) {
646                 NFreq = freqcnt;
647                 NFreqChanged = NFREQ_ALL;
648         }
649         if (bcmp(freqary, FreqAry, sizeof(FreqAry)) != 0)
650                 NFreqChanged = NFREQ_ALL;
651         bcopy(freqary, FreqAry, sizeof(FreqAry));
652         if (freqcnt == 0)
653                 return 0;
654
655         for (i = freqcnt - 1; i >= 0; --i) {
656                 *lowest0 = FreqAry[i];
657                 if (LowestCpuFreq <= 0 || *lowest0 >= LowestCpuFreq)
658                         break;
659         }
660
661         i = 0;
662         *highest0 = FreqAry[0];
663         if (!TurboOpt && freqcnt > 1 && FreqAry[0] - FreqAry[1] == 1) {
664                 i = 1;
665                 *highest0 = FreqAry[1];
666         }
667         for (; i < freqcnt; ++i) {
668                 if (HighestCpuFreq <= 0 || *highest0 <= HighestCpuFreq)
669                         break;
670                 *highest0 = FreqAry[i];
671         }
672         return 1;
673 }
674
675 static void
676 acpi_get_cpufreq(int dom_id, int *highest, int *lowest)
677 {
678         *highest = 0;
679         *lowest = 0;
680
681         if (acpi_getcpufreq_bin(dom_id, highest, lowest))
682                 return;
683         acpi_getcpufreq_str(dom_id, highest, lowest);
684 }
685
686 static
687 void
688 usage(void)
689 {
690         fprintf(stderr, "usage: powerd [-cdeftQU] [-p hysteresis] "
691             "[-h highest_freq] [-l lowest_freq] "
692             "[-r poll_interval] [-u trigger_up] "
693             "[-B min_battery_life] [-L low_battery_linger] "
694             "[-P battery_poll_interval] [-T sample_interval] "
695             "[-b backlight]\n");
696         exit(1);
697 }
698
699 #define BAT_SYSCTL_TIME_MAX     50000000 /* unit: nanosecond */
700
701 static int
702 has_battery(void)
703 {
704         struct timespec s, e;
705         size_t len;
706         int val;
707
708         clock_gettime(CLOCK_MONOTONIC_FAST, &s);
709         BatLifePrevT = s;
710
711         len = sizeof(val);
712         if (sysctlbyname("hw.acpi.acline", &val, &len, NULL, 0) < 0) {
713                 /* No AC line information */
714                 return 0;
715         }
716         clock_gettime(CLOCK_MONOTONIC_FAST, &e);
717
718         timespecsub(&e, &s, &e);
719         if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
720                 /* hw.acpi.acline takes to long to be useful */
721                 syslog(LOG_NOTICE, "hw.acpi.acline takes too long");
722                 return 0;
723         }
724
725         clock_gettime(CLOCK_MONOTONIC_FAST, &s);
726         len = sizeof(val);
727         if (sysctlbyname("hw.acpi.battery.life", &val, &len, NULL, 0) < 0) {
728                 /* No battery life */
729                 return 0;
730         }
731         clock_gettime(CLOCK_MONOTONIC_FAST, &e);
732
733         timespecsub(&e, &s, &e);
734         if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
735                 /* hw.acpi.battery.life takes to long to be useful */
736                 syslog(LOG_NOTICE, "hw.acpi.battery.life takes too long");
737                 return 0;
738         }
739         return 1;
740 }
741
742 static void
743 low_battery_alert(int life)
744 {
745         int fmt, stereo, freq;
746         int fd;
747
748         syslog(LOG_ALERT, "low battery life %d%%, please plugin AC line, #%d",
749             life, BatShutdownLingerCnt);
750         ++BatShutdownLingerCnt;
751
752         if (!BatShutdownAudioAlert)
753                 return;
754
755         fd = open("/dev/dsp", O_WRONLY);
756         if (fd < 0)
757                 return;
758
759         fmt = AFMT_S16_LE;
760         if (ioctl(fd, SNDCTL_DSP_SETFMT, &fmt, sizeof(fmt)) < 0)
761                 goto done;
762
763         stereo = 0;
764         if (ioctl(fd, SNDCTL_DSP_STEREO, &stereo, sizeof(stereo)) < 0)
765                 goto done;
766
767         freq = 44100;
768         if (ioctl(fd, SNDCTL_DSP_SPEED, &freq, sizeof(freq)) < 0)
769                 goto done;
770
771         write(fd, alert1, sizeof(alert1));
772         write(fd, alert1, sizeof(alert1));
773
774 done:
775         close(fd);
776 }
777
778 static int
779 mon_battery(void)
780 {
781         struct timespec cur, ts;
782         int acline, life;
783         size_t len;
784
785         clock_gettime(CLOCK_MONOTONIC_FAST, &cur);
786         timespecsub(&cur, &BatLifePrevT, &ts);
787         if (ts.tv_sec < BatLifePollIntvl)
788                 return 1;
789         BatLifePrevT = cur;
790
791         len = sizeof(acline);
792         if (sysctlbyname("hw.acpi.acline", &acline, &len, NULL, 0) < 0)
793                 return 1;
794         if (acline) {
795                 BatShutdownLinger = -1;
796                 BatShutdownLingerCnt = 0;
797                 restore_backlight();
798                 return 1;
799         }
800
801         if (!BackLightDown && BackLightPct != 100) {
802                 int backlight_max, backlight;
803
804                 len = sizeof(backlight_max);
805                 if (sysctlbyname("hw.backlight_max", &backlight_max, &len,
806                     NULL, 0) < 0) {
807                         /* No more backlight adjustment */
808                         BackLightPct = 100;
809                         goto after_backlight;
810                 }
811
812                 len = sizeof(OldBackLightLevel);
813                 if (sysctlbyname("hw.backlight_level", &OldBackLightLevel, &len,
814                     NULL, 0) < 0) {
815                         /* No more backlight adjustment */
816                         BackLightPct = 100;
817                         goto after_backlight;
818                 }
819
820                 backlight = (backlight_max * BackLightPct) / 100;
821                 if (backlight >= OldBackLightLevel) {
822                         /* No more backlight adjustment */
823                         BackLightPct = 100;
824                         goto after_backlight;
825                 }
826
827                 if (sysctlbyname("hw.backlight_level", NULL, NULL,
828                     &backlight, sizeof(backlight)) < 0) {
829                         /* No more backlight adjustment */
830                         BackLightPct = 100;
831                         goto after_backlight;
832                 }
833                 BackLightDown = 1;
834         }
835 after_backlight:
836
837         len = sizeof(life);
838         if (sysctlbyname("hw.acpi.battery.life", &life, &len, NULL, 0) < 0)
839                 return 1;
840
841         if (BatShutdownLinger > 0) {
842                 timespecsub(&cur, &BatShutdownStartT, &ts);
843                 if (ts.tv_sec > BatShutdownLinger)
844                         BatShutdownLinger = 0;
845         }
846
847         if (life <= BatLifeMin) {
848                 if (BatShutdownLinger == 0 || BatShutdownLingerSet == 0) {
849                         syslog(LOG_ALERT, "low battery life %d%%, "
850                             "shutting down", life);
851                         if (vfork() == 0)
852                                 execlp("poweroff", "poweroff", NULL);
853                         return 0;
854                 } else if (BatShutdownLinger < 0) {
855                         BatShutdownLinger = BatShutdownLingerSet;
856                         BatShutdownStartT = cur;
857                 }
858                 low_battery_alert(life);
859         }
860         return 1;
861 }
862
863 static void
864 get_ncpus(void)
865 {
866         size_t slen;
867
868         slen = sizeof(NCpus);
869         if (sysctlbyname("hw.ncpu", &NCpus, &slen, NULL, 0) < 0)
870                 err(1, "sysctlbyname hw.ncpu failed");
871         if (DebugOpt)
872                 printf("hw.ncpu %d\n", NCpus);
873 }
874
875 static void
876 get_uschedcpus(void)
877 {
878         size_t slen;
879
880         slen = sizeof(usched_cpu_used);
881         if (sysctlbyname("kern.usched_global_cpumask", &usched_cpu_used, &slen,
882             NULL, 0) < 0)
883                 err(1, "sysctlbyname kern.usched_global_cpumask failed");
884         if (DebugOpt) {
885                 int i;
886
887                 printf("usched cpumask was: ");
888                 for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i)
889                         printf("%jx ", (uintmax_t)usched_cpu_used.ary[i]);
890                 printf("\n");
891         }
892 }
893
894 static void
895 set_uschedcpus(void)
896 {
897         if (DebugOpt) {
898                 int i;
899
900                 printf("usched cpumask: ");
901                 for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i) {
902                         printf("%jx ",
903                             (uintmax_t)usched_cpu_used.ary[i]);
904                 }
905                 printf("\n");
906         }
907         sysctlbyname("kern.usched_global_cpumask", NULL, 0,
908             &usched_cpu_used, sizeof(usched_cpu_used));
909 }
910
911 static int
912 has_perfbias(void)
913 {
914         size_t len;
915         int hint;
916
917         len = sizeof(hint);
918         if (sysctlbyname("machdep.perfbias0.hint", &hint, &len, NULL, 0) < 0)
919                 return 0;
920         return 1;
921 }
922
923 static void
924 set_perfbias(int cpu, int inc)
925 {
926         int hint = inc ? 0 : 15;
927         char sysid[64];
928
929         if (DebugOpt)
930                 printf("cpu%d set perfbias hint %d\n", cpu, hint);
931         snprintf(sysid, sizeof(sysid), "machdep.perfbias%d.hint", cpu);
932         sysctlbyname(sysid, NULL, NULL, &hint, sizeof(hint));
933 }
934
935 static void
936 init_perf(void)
937 {
938         struct cpu_state *state;
939         int cpu;
940
941         /* Get usched cpumask */
942         get_uschedcpus();
943
944         /*
945          * Assume everything are used and are maxed out, before we
946          * start.
947          */
948         CPUMASK_ASSBMASK(cpu_used, NCpus);
949         cpu_pwrdom_used = cpu_pwrdom_mask;
950         global_pcpu_limit = NCpus;
951
952         for (cpu = 0; cpu < NCpus; ++cpu) {
953                 state = &pcpu_state[cpu];
954
955                 state->cpu_uavg = 0.0;
956                 state->cpu_davg = 0.0;
957                 state->cpu_limit = 1;
958                 state->cpu_count = 1;
959                 snprintf(state->cpu_name, sizeof(state->cpu_name), "cpu%d",
960                     cpu);
961         }
962
963         state = &global_cpu_state;
964         state->cpu_uavg = 0.0;
965         state->cpu_davg = 0.0;
966         state->cpu_limit = NCpus;
967         state->cpu_count = NCpus;
968         strlcpy(state->cpu_name, "global", sizeof(state->cpu_name));
969 }
970
971 static int
972 get_nstate(struct cpu_state *state, double srt)
973 {
974         int ustate, dstate, nstate;
975
976         /* speeding up */
977         state->cpu_uavg = (state->cpu_uavg * 2.0 + state->cpu_qavg) / 3.0;
978         /* slowing down */
979         state->cpu_davg = (state->cpu_davg * srt + state->cpu_qavg) / (srt + 1);
980         if (state->cpu_davg < state->cpu_uavg)
981                 state->cpu_davg = state->cpu_uavg;
982
983         ustate = state->cpu_uavg / TriggerUp;
984         if (ustate < state->cpu_limit)
985                 ustate = state->cpu_uavg / TriggerDown;
986         dstate = state->cpu_davg / TriggerUp;
987         if (dstate < state->cpu_limit)
988                 dstate = state->cpu_davg / TriggerDown;
989
990         nstate = (ustate > dstate) ? ustate : dstate;
991         if (nstate > state->cpu_count)
992                 nstate = state->cpu_count;
993
994         if (DebugOpt) {
995                 printf("%s qavg=%5.2f uavg=%5.2f davg=%5.2f "
996                     "%2d ncpus=%d\n", state->cpu_name,
997                     state->cpu_qavg, state->cpu_uavg, state->cpu_davg,
998                     state->cpu_limit, nstate);
999         }
1000         return nstate;
1001 }
1002
1003 static void
1004 mon_perf(double srt)
1005 {
1006         cpumask_t ocpu_used, ocpu_pwrdom_used;
1007         int pnstate = 0, nstate;
1008         int cpu;
1009
1010         /*
1011          * Find cpus requiring performance and their cooresponding power
1012          * domains.  Save the number of cpus requiring performance in
1013          * pnstate.
1014          */
1015         ocpu_used = cpu_used;
1016         ocpu_pwrdom_used = cpu_pwrdom_used;
1017
1018         CPUMASK_ASSZERO(cpu_used);
1019         CPUMASK_ASSZERO(cpu_pwrdom_used);
1020
1021         for (cpu = 0; cpu < NCpus; ++cpu) {
1022                 struct cpu_state *state = &pcpu_state[cpu];
1023                 int s;
1024
1025                 s = get_nstate(state, srt);
1026                 if (s) {
1027                         CPUMASK_ORBIT(cpu_used, cpu);
1028                         CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
1029                 }
1030                 pnstate += s;
1031
1032                 state->cpu_limit = s;
1033         }
1034
1035         /*
1036          * Calculate nstate, the number of cpus we wish to run at max
1037          * performance.
1038          */
1039         nstate = get_nstate(&global_cpu_state, srt);
1040
1041         if (nstate == global_cpu_state.cpu_limit &&
1042             (NFreqChanged & NFREQ_MONPERF) == 0 &&
1043             (pnstate == global_pcpu_limit || nstate > pnstate)) {
1044                 /* Nothing changed; keep the sets */
1045                 cpu_used = ocpu_used;
1046                 cpu_pwrdom_used = ocpu_pwrdom_used;
1047
1048                 global_pcpu_limit = pnstate;
1049                 return;
1050         }
1051         NFreqChanged &= ~NFREQ_MONPERF;
1052         global_pcpu_limit = pnstate;
1053
1054         if (nstate > pnstate) {
1055                 /*
1056                  * Add spare cpus to meet global performance requirement.
1057                  */
1058                 add_spare_cpus(ocpu_used, nstate - pnstate);
1059         }
1060
1061         global_cpu_state.cpu_limit = nstate;
1062
1063         /*
1064          * Adjust cpu and cpu power domain performance
1065          */
1066         adj_perf(ocpu_used, ocpu_pwrdom_used);
1067 }
1068
1069 static void
1070 add_spare_cpus(const cpumask_t ocpu_used, int ncpu)
1071 {
1072         cpumask_t saved_pwrdom, xcpu_used;
1073         int done = 0, cpu;
1074
1075         /*
1076          * Find more cpus in the previous cpu set.
1077          */
1078         xcpu_used = cpu_used;
1079         CPUMASK_XORMASK(xcpu_used, ocpu_used);
1080         while (CPUMASK_TESTNZERO(xcpu_used)) {
1081                 cpu = BSFCPUMASK(xcpu_used);
1082                 CPUMASK_NANDBIT(xcpu_used, cpu);
1083
1084                 if (CPUMASK_TESTBIT(ocpu_used, cpu)) {
1085                         CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
1086                         CPUMASK_ORBIT(cpu_used, cpu);
1087                         --ncpu;
1088                         if (ncpu == 0)
1089                                 return;
1090                 }
1091         }
1092
1093         /*
1094          * Find more cpus in the used cpu power domains.
1095          */
1096         saved_pwrdom = cpu_pwrdom_used;
1097 again:
1098         while (CPUMASK_TESTNZERO(saved_pwrdom)) {
1099                 cpumask_t unused_cpumask;
1100                 int dom;
1101
1102                 dom = BSFCPUMASK(saved_pwrdom);
1103                 CPUMASK_NANDBIT(saved_pwrdom, dom);
1104
1105                 unused_cpumask = cpu_pwrdomain[dom]->dom_cpumask;
1106                 CPUMASK_NANDMASK(unused_cpumask, cpu_used);
1107
1108                 while (CPUMASK_TESTNZERO(unused_cpumask)) {
1109                         cpu = BSFCPUMASK(unused_cpumask);
1110                         CPUMASK_NANDBIT(unused_cpumask, cpu);
1111
1112                         CPUMASK_ORBIT(cpu_pwrdom_used, dom);
1113                         CPUMASK_ORBIT(cpu_used, cpu);
1114                         --ncpu;
1115                         if (ncpu == 0)
1116                                 return;
1117                 }
1118         }
1119         if (!done) {
1120                 done = 1;
1121                 /*
1122                  * Find more cpus in unused cpu power domains
1123                  */
1124                 saved_pwrdom = cpu_pwrdom_mask;
1125                 CPUMASK_NANDMASK(saved_pwrdom, cpu_pwrdom_used);
1126                 goto again;
1127         }
1128         if (DebugOpt)
1129                 printf("%d cpus not found\n", ncpu);
1130 }
1131
1132 static void
1133 acpi_set_cpufreq(int dom, int inc)
1134 {
1135         int lowest, highest, desired;
1136         char sysid[64];
1137
1138         acpi_get_cpufreq(dom, &highest, &lowest);
1139         if (highest == 0 || lowest == 0)
1140                 return;
1141         desired = inc ? highest : lowest;
1142
1143         if (DebugOpt)
1144                 printf("dom%d set frequency %d\n", dom, desired);
1145         snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.select", dom);
1146         sysctlbyname(sysid, NULL, NULL, &desired, sizeof(desired));
1147 }
1148
1149 static void
1150 adj_cpu_pwrdom(int dom, int inc)
1151 {
1152         if (AdjustCpuFreq && (inc == 0 || AdjustCpuFreqOverride == 0))
1153                 acpi_set_cpufreq(dom, inc);
1154 }
1155
1156 static void
1157 adj_cpu_perf(int cpu, int inc)
1158 {
1159         if (DebugOpt) {
1160                 if (inc)
1161                         printf("cpu%d increase perf\n", cpu);
1162                 else
1163                         printf("cpu%d decrease perf\n", cpu);
1164         }
1165
1166         if (HasPerfbias)
1167                 set_perfbias(cpu, inc);
1168         if (AdjustCstate)
1169                 set_cstate(cpu, inc);
1170 }
1171
1172 static void
1173 adj_perf(cpumask_t xcpu_used, cpumask_t xcpu_pwrdom_used)
1174 {
1175         int cpu, inc;
1176
1177         if (AdjustUsched) {
1178                 cpumask_t old_usched_used;
1179
1180                 /*
1181                  * Set cpus requiring performance to the userland process
1182                  * scheduler.  Leave the rest of cpus unmapped.
1183                  */
1184                 old_usched_used = usched_cpu_used;
1185                 usched_cpu_used = cpu_used;
1186                 if (CPUMASK_TESTZERO(usched_cpu_used))
1187                         CPUMASK_ORBIT(usched_cpu_used, 0);
1188                 if (CPUMASK_CMPMASKNEQ(usched_cpu_used, old_usched_used))
1189                         set_uschedcpus();
1190         }
1191
1192         /*
1193          * Adjust per-cpu performance for any cpus which changed.
1194          */
1195         CPUMASK_XORMASK(xcpu_used, cpu_used);
1196         if (NFreqChanged & NFREQ_ADJPERF)
1197                 CPUMASK_ASSBMASK(xcpu_used, NCpus);
1198         while (CPUMASK_TESTNZERO(xcpu_used)) {
1199                 cpu = BSFCPUMASK(xcpu_used);
1200                 CPUMASK_NANDBIT(xcpu_used, cpu);
1201
1202                 if (CPUMASK_TESTBIT(cpu_used, cpu)) {
1203                         /* Increase cpu performance */
1204                         inc = 1;
1205                 } else {
1206                         /* Decrease cpu performance */
1207                         inc = 0;
1208                 }
1209                 adj_cpu_perf(cpu, inc);
1210         }
1211
1212         /*
1213          * Adjust cpu power domain performance.  This could affect
1214          * a set of cpus.
1215          */
1216         CPUMASK_XORMASK(xcpu_pwrdom_used, cpu_pwrdom_used);
1217         if (NFreqChanged & NFREQ_ADJPERF)
1218                 CPUMASK_ASSBMASK(xcpu_pwrdom_used, NCpus);
1219         while (CPUMASK_TESTNZERO(xcpu_pwrdom_used)) {
1220                 int dom;
1221
1222                 dom = BSFCPUMASK(xcpu_pwrdom_used);
1223                 CPUMASK_NANDBIT(xcpu_pwrdom_used, dom);
1224
1225                 if (CPUMASK_TESTBIT(cpu_pwrdom_used, dom)) {
1226                         /* Increase cpu power domain performance */
1227                         inc = 1;
1228                 } else {
1229                         /* Decrease cpu power domain performance */
1230                         inc = 0;
1231                 }
1232                 adj_cpu_pwrdom(dom, inc);
1233         }
1234         NFreqChanged &= ~NFREQ_ADJPERF;
1235 }
1236
1237 static void
1238 restore_perf(void)
1239 {
1240         cpumask_t ocpu_used, ocpu_pwrdom_used;
1241
1242         /* Remove highest cpu frequency limitation */
1243         HighestCpuFreq = 0;
1244
1245         ocpu_used = cpu_used;
1246         ocpu_pwrdom_used = cpu_pwrdom_used;
1247
1248         /* Max out all cpus and cpu power domains performance */
1249         CPUMASK_ASSBMASK(cpu_used, NCpus);
1250         cpu_pwrdom_used = cpu_pwrdom_mask;
1251
1252         adj_perf(ocpu_used, ocpu_pwrdom_used);
1253
1254         if (AdjustCstate) {
1255                 /*
1256                  * Restore the original mwait C-state
1257                  */
1258                 if (DebugOpt)
1259                         printf("global set cstate %s\n", orig_global_cx);
1260                 sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1261                     orig_global_cx, strlen(orig_global_cx) + 1);
1262         }
1263 }
1264
1265 static int
1266 probe_cstate(void)
1267 {
1268         char cx_supported[1024];
1269         const char *target;
1270         char *ptr;
1271         int idle_hlt, deep = 1;
1272         size_t len;
1273
1274         len = sizeof(idle_hlt);
1275         if (sysctlbyname("machdep.cpu_idle_hlt", &idle_hlt, &len, NULL, 0) < 0)
1276                 return 0;
1277         if (idle_hlt != 1)
1278                 return 0;
1279
1280         len = sizeof(cx_supported);
1281         if (sysctlbyname("machdep.mwait.CX.supported", cx_supported, &len,
1282             NULL, 0) < 0)
1283                 return 0;
1284
1285         len = sizeof(orig_global_cx);
1286         if (sysctlbyname("machdep.mwait.CX.idle", orig_global_cx, &len,
1287             NULL, 0) < 0)
1288                 return 0;
1289
1290         strlcpy(cpu_perf_cx, "AUTODEEP", sizeof(cpu_perf_cx));
1291         cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1292         if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1293             cpu_perf_cx, cpu_perf_cxlen) < 0) {
1294                 /* AUTODEEP is not supported; try AUTO */
1295                 deep = 0;
1296                 strlcpy(cpu_perf_cx, "AUTO", sizeof(cpu_perf_cx));
1297                 cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1298                 if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1299                     cpu_perf_cx, cpu_perf_cxlen) < 0)
1300                         return 0;
1301         }
1302
1303         if (!deep)
1304                 target = "C2/0";
1305         else
1306                 target = NULL;
1307         for (ptr = strtok(cx_supported, " "); ptr != NULL;
1308              ptr = strtok(NULL, " ")) {
1309                 if (target == NULL ||
1310                     (target != NULL && strcmp(ptr, target) == 0)) {
1311                         strlcpy(cpu_idle_cx, ptr, sizeof(cpu_idle_cx));
1312                         cpu_idle_cxlen = strlen(cpu_idle_cx) + 1;
1313                         if (target != NULL)
1314                                 break;
1315                 }
1316         }
1317         if (cpu_idle_cxlen == 0)
1318                 return 0;
1319
1320         if (DebugOpt) {
1321                 printf("cstate orig %s, perf %s, idle %s\n",
1322                     orig_global_cx, cpu_perf_cx, cpu_idle_cx);
1323         }
1324         return 1;
1325 }
1326
1327 static void
1328 set_cstate(int cpu, int inc)
1329 {
1330         const char *cst;
1331         char sysid[64];
1332         size_t len;
1333
1334         if (inc) {
1335                 cst = cpu_perf_cx;
1336                 len = cpu_perf_cxlen;
1337         } else {
1338                 cst = cpu_idle_cx;
1339                 len = cpu_idle_cxlen;
1340         }
1341
1342         if (DebugOpt)
1343                 printf("cpu%d set cstate %s\n", cpu, cst);
1344         snprintf(sysid, sizeof(sysid), "machdep.mwait.CX.idle%d", cpu);
1345         sysctlbyname(sysid, NULL, NULL, cst, len);
1346 }
1347
1348 static void
1349 restore_backlight(void)
1350 {
1351         if (BackLightDown) {
1352                 BackLightDown = 0;
1353                 sysctlbyname("hw.backlight_level", NULL, NULL,
1354                     &OldBackLightLevel, sizeof(OldBackLightLevel));
1355         }
1356 }
1357
1358 /*
1359  * get_cputemp() / mon_cputemp()
1360  *
1361  * This enforces the maximum cpu frequency based on temperature
1362  * verses MinTemp and MaxTemp.
1363  */
1364 static int
1365 get_cputemp(void)
1366 {
1367         char sysid[64];
1368         struct sensor sensor;
1369         size_t sensor_size;
1370         int t;
1371         int mt = -1;
1372         int n;
1373
1374         for (n = 0; ; ++n) {
1375                 t = 0;
1376                 snprintf(sysid, sizeof(sysid),
1377                          "hw.sensors.cpu_node%d.temp0", n);
1378                 sensor_size = sizeof(sensor);
1379                 if (sysctlbyname(sysid, &sensor, &sensor_size, NULL, 0) < 0)
1380                         break;
1381                 t = -1;
1382                 if ((sensor.flags & (SENSOR_FINVALID | SENSOR_FUNKNOWN)) == 0) {
1383                         t = (int)((sensor.value - 273150000) / 1000000);
1384                         if (mt < t)
1385                                 mt = t;
1386                 }
1387         }
1388         if (n)
1389                 return mt;
1390
1391         /*
1392          * Missing nodeN for some reason, try cpuN.
1393          */
1394         for (n = 0; ; ++n) {
1395                 t = 0;
1396                 snprintf(sysid, sizeof(sysid),
1397                          "hw.sensors.cpu%d.temp0", n);
1398                 sensor_size = sizeof(sensor);
1399                 if (sysctlbyname(sysid, &sensor, &sensor_size, NULL, 0) < 0)
1400                         break;
1401                 t = -1;
1402                 if ((sensor.flags & (SENSOR_FINVALID | SENSOR_FUNKNOWN)) == 0) {
1403                         t = (int)((sensor.value - 273150000) / 1000000);
1404                         if (mt < t)
1405                                 mt = t;
1406                 }
1407         }
1408         return mt;
1409 }
1410
1411 static void
1412 set_global_freq(int freq)
1413 {
1414         if (freq > 0)
1415                 sysctlbyname("hw.acpi.cpu.px_global",
1416                              NULL, NULL, &freq, sizeof(freq));
1417 }
1418
1419 static int
1420 get_global_freq(void)
1421 {
1422         int freq;
1423         size_t freq_size;
1424
1425         freq = -1;
1426         freq_size = sizeof(freq);
1427         sysctlbyname("hw.acpi.cpu.px_global", &freq, &freq_size, NULL, 0);
1428
1429         return freq;
1430 }
1431
1432 static void
1433 mon_cputemp(void)
1434 {
1435         static int last_temp = -1;
1436         static int last_idx = -1;
1437         int temp = get_cputemp();
1438         int idx;
1439         int lowest;
1440         int highest;
1441         static int CurPXGlobal __unused;
1442
1443         /*
1444          * Reseed FreqAry, it can change w/AC power state
1445          */
1446         acpi_get_cpufreq(0, &lowest, &highest);
1447
1448         /*
1449          * Some cpu frequency steps can cause large shifts in cpu temperature,
1450          * creating an oscillation that min-maxes the temperature in a way
1451          * that is not desireable.  To deal with this, we impose an exponential
1452          * average for any temperature change.
1453          *
1454          * We have to do this in both directions, otherwise (in particular)
1455          * laptop fan responsiveness and temperature sensor response times
1456          * can create major frequency oscillations.
1457          */
1458         if (last_temp < 0 || (NFreqChanged & NFREQ_CPUTEMP)) {
1459                 NFreqChanged &= ~NFREQ_CPUTEMP;
1460                 last_temp = temp << 8;
1461         } else if (temp < last_temp) {
1462                 last_temp = (last_temp * 15 + (temp << 8)) / 16;
1463                 if (DebugOpt) {
1464                         printf("Falling temp %d (use %d)\n",
1465                                 temp, (last_temp >> 8));
1466                 }
1467         } else {
1468                 last_temp = (last_temp * 15 + (temp << 8)) / 16;
1469                 if (DebugOpt) {
1470                         printf("Rising temp %d (use %d)\n",
1471                                 temp, (last_temp >> 8));
1472                 }
1473         }
1474         temp = last_temp >> 8;
1475
1476         /*
1477          * CPU Temp not available or available frequencies not yet
1478          * probed.
1479          */
1480         if (DebugOpt)
1481                 printf("Temp %d {%d-%d} NFreq=%d)\n",
1482                        temp, MinTemp, MaxTemp, NFreq);
1483         if (temp <= 0)
1484                 return;
1485         if (NFreq == 0)
1486                 return;
1487
1488         /*
1489          * Return to normal operation if under the minimum
1490          */
1491         if (temp <= MinTemp) {
1492                 if (AdjustCpuFreqOverride) {
1493                         AdjustCpuFreqOverride = 0;
1494                         CurPXGlobal = 0;
1495                         NFreqChanged = NFREQ_ALL;
1496                         last_idx = -1;
1497                         syslog(LOG_ALERT,
1498                                "Temp below %d, returning to normal operation",
1499                                MinTemp);
1500                         if (SavedPXGlobal)
1501                                 set_global_freq(SavedPXGlobal);
1502                 }
1503                 return;
1504         }
1505
1506         /*
1507          * Hysteresis before entering temperature control mode
1508          */
1509         if (AdjustCpuFreqOverride == 0 &&
1510             temp <= MinTemp + (MaxTemp - MinTemp) / 10 + 1) {
1511                 return;
1512         }
1513
1514         /*
1515          * Override frequency controls (except for idle -> lowest)
1516          */
1517         if (AdjustCpuFreqOverride == 0) {
1518                 AdjustCpuFreqOverride = 1;
1519                 SavedPXGlobal = get_global_freq();
1520                 CurPXGlobal = 0;
1521                 NFreqChanged = NFREQ_ALL;
1522                 last_idx = -1;
1523                 syslog(LOG_ALERT,
1524                        "Temp %d {%d-%d}, entering temperature control mode",
1525                        temp, MinTemp, MaxTemp);
1526         }
1527         if (temp > MaxTemp + (MaxTemp - MinTemp) / 10 + 1) {
1528                 syslog(LOG_ALERT,
1529                        "Temp %d {%d-%d}, TOO HOT!!!",
1530                        temp, MinTemp, MaxTemp);
1531         }
1532         idx = (temp - MinTemp) * NFreq / (MaxTemp - MinTemp);
1533         if (idx < 0 || idx >= NFreq)    /* overtemp */
1534                 idx = NFreq - 1;
1535
1536         /*
1537          * Limit frequency shifts to single steps in both directions.
1538          * Some fans react very quickly, this will reduce oscillations.
1539          */
1540         if (DebugOpt)
1541                 printf("Temp index %d (use %d)\n", idx, last_idx);
1542         if (last_idx >= 0 && idx < last_idx)
1543                 idx = last_idx - 1;
1544         else if (last_idx >= 0 && idx > last_idx)
1545                 idx = last_idx + 1;
1546         last_idx = idx;
1547
1548         /*
1549          * One last thing, make sure our frequency adheres to
1550          * HighestCpuFreq.  However, override LowestCpuFreq for
1551          * temperature control purposes.
1552          */
1553         while (HighestCpuFreq > 0 && idx < NFreq &&
1554                FreqAry[idx] > HighestCpuFreq) {
1555                 ++idx;
1556         }
1557 #if 0
1558         /*
1559          * Currently ignore LowestCpuFreq if temp control thinks it
1560          * needs to go lower
1561          */
1562         while (LowestCpuFreq > 0 && idx > 0 &&
1563                FreqAry[idx] < LowestCpuFreq) {
1564                 --idx;
1565         }
1566 #endif
1567
1568         if (FreqAry[idx] != CurPXGlobal) {
1569                 CurPXGlobal = FreqAry[idx];
1570
1571 #if 0
1572                 /* this can get noisy so don't log for now */
1573                 syslog(LOG_ALERT,
1574                        "Temp %d {%d-%d}, set frequency %d",
1575                        temp, MinTemp, MaxTemp, CurPXGlobal);
1576 #endif
1577         }
1578         set_global_freq(CurPXGlobal);
1579 }