powerd - Add temperature-based management
[dragonfly.git] / usr.sbin / powerd / powerd.c
1 /*
2  * Copyright (c) 2010,2016 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 /*
36  * The powerd daemon :
37  * - Monitor the cpu load and adjusts cpu and cpu power domain
38  *   performance accordingly.
39  * - Monitor battery life.  Alarm alerts and shutdown the machine
40  *   if battery life goes low.
41  */
42
43 #define _KERNEL_STRUCTURES
44 #include <sys/types.h>
45 #include <sys/sysctl.h>
46 #include <sys/kinfo.h>
47 #include <sys/file.h>
48 #include <sys/queue.h>
49 #include <sys/soundcard.h>
50 #include <sys/sensors.h>
51 #include <sys/time.h>
52 #include <machine/cpufunc.h>
53 #include <machine/cpumask.h>
54 #include <err.h>
55 #include <signal.h>
56 #include <stdio.h>
57 #include <stdlib.h>
58 #include <unistd.h>
59 #include <string.h>
60 #include <syslog.h>
61
62 #include "alert1.h"
63
64 #define MAXDOM          MAXCPU  /* worst case, 1 cpu per domain */
65
66 #define MAXFREQ         64
67 #define CST_STRLEN      16
68
69 struct cpu_pwrdom {
70         TAILQ_ENTRY(cpu_pwrdom) dom_link;
71         int                     dom_id;
72         int                     dom_ncpus;
73         cpumask_t               dom_cpumask;
74 };
75
76 struct cpu_state {
77         double                  cpu_qavg;
78         double                  cpu_uavg;       /* used for speeding up */
79         double                  cpu_davg;       /* used for slowing down */
80         int                     cpu_limit;
81         int                     cpu_count;
82         char                    cpu_name[8];
83 };
84
85 static void usage(void);
86 static void get_ncpus(void);
87 static void mon_cputemp(void);
88
89 /* usched cpumask */
90 static void get_uschedcpus(void);
91 static void set_uschedcpus(void);
92
93 /* perfbias(4) */
94 static int has_perfbias(void);
95 static void set_perfbias(int, int);
96
97 /* acpi(4) P-state */
98 static void acpi_getcpufreq_str(int, int *, int *);
99 static int acpi_getcpufreq_bin(int, int *, int *);
100 static void acpi_get_cpufreq(int, int *, int *);
101 static void acpi_set_cpufreq(int, int);
102 static int acpi_get_cpupwrdom(void);
103
104 /* mwait C-state hint */
105 static int probe_cstate(void);
106 static void set_cstate(int, int);
107
108 /* Performance monitoring */
109 static void init_perf(void);
110 static void mon_perf(double);
111 static void adj_perf(cpumask_t, cpumask_t);
112 static void adj_cpu_pwrdom(int, int);
113 static void adj_cpu_perf(int, int);
114 static void get_cputime(double);
115 static int get_nstate(struct cpu_state *, double);
116 static void add_spare_cpus(const cpumask_t, int);
117 static void restore_perf(void);
118 static void set_global_freq(int freq);
119
120 /* Battery monitoring */
121 static int has_battery(void);
122 static int mon_battery(void);
123 static void low_battery_alert(int);
124
125 /* Backlight */
126 static void restore_backlight(void);
127
128 /* Runtime states for performance monitoring */
129 static int global_pcpu_limit;
130 static struct cpu_state pcpu_state[MAXCPU];
131 static struct cpu_state global_cpu_state;
132 static cpumask_t cpu_used;              /* cpus w/ high perf */
133 static cpumask_t cpu_pwrdom_used;       /* cpu power domains w/ high perf */
134 static cpumask_t usched_cpu_used;       /* cpus for usched */
135
136 /* Constants */
137 static cpumask_t cpu_pwrdom_mask;       /* usable cpu power domains */
138 static int cpu2pwrdom[MAXCPU];          /* cpu to cpu power domain map */
139 static struct cpu_pwrdom *cpu_pwrdomain[MAXDOM];
140 static int NCpus;                       /* # of cpus */
141 static char orig_global_cx[CST_STRLEN];
142 static char cpu_perf_cx[CST_STRLEN];
143 static int cpu_perf_cxlen;
144 static char cpu_idle_cx[CST_STRLEN];
145 static int cpu_idle_cxlen;
146 static int FreqAry[MAXFREQ];
147 static int NFreq;
148 static int SavedPXGlobal;
149
150 static int DebugOpt;
151 static int TurboOpt = 1;
152 static int PowerFd;
153 static int Hysteresis = 10;     /* percentage */
154 static double TriggerUp = 0.25; /* single-cpu load to force max freq */
155 static double TriggerDown;      /* load per cpu to force the min freq */
156 static int HasPerfbias = 0;
157 static int AdjustCpuFreq = 1;
158 static int AdjustCstate = 0;
159 static int HighestCpuFreq;
160 static int LowestCpuFreq;
161
162 static int AdjustCpuFreqOverride;
163
164 static volatile int stopped;
165
166 /* Battery life monitoring */
167 static int BatLifeMin = 2;      /* shutdown the box, if low on battery life */
168 static struct timespec BatLifePrevT;
169 static int BatLifePollIntvl = 5; /* unit: sec */
170 static struct timespec BatShutdownStartT;
171 static int BatShutdownLinger = -1;
172 static int BatShutdownLingerSet = 60; /* unit: sec */
173 static int BatShutdownLingerCnt;
174 static int BatShutdownAudioAlert = 1;
175 static int MinTemp = 75;
176 static int MaxTemp = 85;
177 static int BackLightPct = 100;
178 static int OldBackLightLevel;
179 static int BackLightDown;
180
181 static void sigintr(int signo);
182
183 int
184 main(int ac, char **av)
185 {
186         double srt;
187         double pollrate;
188         int ch;
189         int lowest;
190         int highest;
191         char buf[64];
192         int monbat;
193         char *p2;
194
195         srt = 8.0;      /* time for samples - 8 seconds */
196         pollrate = 1.0; /* polling rate in seconds */
197
198         while ((ch = getopt(ac, av, "b:cdefh:l:p:r:tu:B:H:L:P:QT:")) != -1) {
199                 switch(ch) {
200                 case 'b':
201                         BackLightPct = strtol(optarg, NULL, 10);
202                         break;
203                 case 'c':
204                         AdjustCstate = 1;
205                         break;
206                 case 'd':
207                         DebugOpt = 1;
208                         break;
209                 case 'e':
210                         HasPerfbias = 1;
211                         break;
212                 case 'f':
213                         AdjustCpuFreq = 0;
214                         break;
215                 case 'h':
216                         HighestCpuFreq = strtol(optarg, NULL, 10);
217                         break;
218                 case 'l':
219                         LowestCpuFreq = strtol(optarg, NULL, 10);
220                         break;
221                 case 'p':
222                         Hysteresis = (int)strtol(optarg, NULL, 10);
223                         break;
224                 case 'r':
225                         pollrate = strtod(optarg, NULL);
226                         break;
227                 case 't':
228                         TurboOpt = 0;
229                         break;
230                 case 'u':
231                         TriggerUp = (double)strtol(optarg, NULL, 10) / 100;
232                         break;
233                 case 'B':
234                         BatLifeMin = strtol(optarg, NULL, 10);
235                         break;
236                 case 'H':
237                         MaxTemp = strtol(optarg, &p2, 0);
238                         if (*p2 == ':') {
239                                 MinTemp = MaxTemp;
240                                 MaxTemp = strtol(p2 + 1, NULL, 0);
241                         } else {
242                                 MinTemp = MaxTemp * 9 / 10;
243                         }
244                         break;
245                 case 'L':
246                         BatShutdownLingerSet = strtol(optarg, NULL, 10);
247                         if (BatShutdownLingerSet < 0)
248                                 BatShutdownLingerSet = 0;
249                         break;
250                 case 'P':
251                         BatLifePollIntvl = strtol(optarg, NULL, 10);
252                         break;
253                 case 'Q':
254                         BatShutdownAudioAlert = 0;
255                         break;
256                 case 'T':
257                         srt = strtod(optarg, NULL);
258                         break;
259                 default:
260                         usage();
261                         /* NOT REACHED */
262                 }
263         }
264         ac -= optind;
265         av += optind;
266
267         setlinebuf(stdout);
268
269         /* Get number of cpus */
270         get_ncpus();
271
272         /* Seed FreqAry[] */
273         acpi_get_cpufreq(0, &lowest, &highest);
274
275         if (Hysteresis < 0 || Hysteresis > 99) {
276                 fprintf(stderr, "Invalid hysteresis value\n");
277                 exit(1);
278         }
279
280         if (TriggerUp < 0 || TriggerUp > 1) {
281                 fprintf(stderr, "Invalid load limit value\n");
282                 exit(1);
283         }
284
285         if (BackLightPct > 100 || BackLightPct <= 0) {
286                 fprintf(stderr, "Invalid backlight setting, ignore\n");
287                 BackLightPct = 100;
288         }
289
290         TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100);
291
292         /*
293          * Make sure powerd is not already running.
294          */
295         PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644);
296         if (PowerFd < 0) {
297                 fprintf(stderr,
298                         "Cannot create /var/run/powerd.pid, "
299                         "continuing anyway\n");
300         } else {
301                 ssize_t r;
302                 pid_t pid = -1;
303
304                 r = read(PowerFd, buf, sizeof(buf) - 1);
305                 if (r > 0) {
306                         buf[r] = 0;
307                         pid = strtol(buf, NULL, 0);
308                 }
309                 if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) {
310                         if (pid > 0) {
311                                 kill(pid, SIGTERM);
312                                 flock(PowerFd, LOCK_EX);
313                                 fprintf(stderr, "restarting powerd\n");
314                         } else {
315                                 fprintf(stderr,
316                                         "powerd is already running, "
317                                         "unable to kill pid for restart\n");
318                                 exit(1);
319                         }
320                 }
321                 lseek(PowerFd, 0L, 0);
322         }
323
324         /*
325          * Demonize and set pid
326          */
327         if (DebugOpt == 0) {
328                 daemon(0, 0);
329                 openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON);
330         }
331
332         if (PowerFd >= 0) {
333                 ftruncate(PowerFd, 0);
334                 snprintf(buf, sizeof(buf), "%d\n", (int)getpid());
335                 write(PowerFd, buf, strlen(buf));
336         }
337
338         /* Do we need to monitor battery life? */
339         if (BatLifePollIntvl <= 0)
340                 monbat = 0;
341         else
342                 monbat = has_battery();
343
344         /* Do we have perfbias(4)? */
345         if (HasPerfbias)
346                 HasPerfbias = has_perfbias();
347
348         /* Could we adjust C-state? */
349         if (AdjustCstate)
350                 AdjustCstate = probe_cstate();
351
352         /*
353          * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel.
354          *
355          * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
356          * taskqueue and ACPI taskqueue is shared across various
357          * ACPI modules, any delay in other modules may cause
358          * hw.acpi.cpu.px_dom* to be created at quite a later time
359          * (e.g. cmbat module's task could take quite a lot of time).
360          */
361         for (;;) {
362                 /* Prime delta cputime calculation. */
363                 get_cputime(pollrate);
364
365                 /* Wait for all cpus to appear */
366                 if (acpi_get_cpupwrdom())
367                         break;
368                 usleep((int)(pollrate * 1000000.0));
369         }
370
371         /*
372          * Catch some signals so that max performance could be restored.
373          */
374         signal(SIGINT, sigintr);
375         signal(SIGTERM, sigintr);
376
377         /* Initialize performance states */
378         init_perf();
379
380         srt = srt / pollrate;   /* convert to sample count */
381         if (DebugOpt)
382                 printf("samples for downgrading: %5.2f\n", srt);
383
384         /*
385          * Monitoring loop
386          */
387         while (!stopped) {
388                 /*
389                  * Monitor performance
390                  */
391                 get_cputime(pollrate);
392                 mon_cputemp();
393                 mon_perf(srt);
394
395                 /*
396                  * Monitor battery
397                  */
398                 if (monbat)
399                         monbat = mon_battery();
400
401                 usleep((int)(pollrate * 1000000.0));
402         }
403
404         /*
405          * Set to maximum performance if killed.
406          */
407         syslog(LOG_INFO, "killed, setting max and exiting");
408         if (SavedPXGlobal)
409                 set_global_freq(SavedPXGlobal);
410         restore_perf();
411         restore_backlight();
412
413         exit(0);
414 }
415
416 static void
417 sigintr(int signo __unused)
418 {
419         stopped = 1;
420 }
421
422 /*
423  * Figure out the cpu power domains.
424  */
425 static int
426 acpi_get_cpupwrdom(void)
427 {
428         struct cpu_pwrdom *dom;
429         cpumask_t pwrdom_mask;
430         char buf[64];
431         char members[1024];
432         char *str;
433         size_t msize;
434         int n, i, ncpu = 0, dom_id;
435
436         memset(cpu2pwrdom, 0, sizeof(cpu2pwrdom));
437         memset(cpu_pwrdomain, 0, sizeof(cpu_pwrdomain));
438         CPUMASK_ASSZERO(cpu_pwrdom_mask);
439
440         for (i = 0; i < MAXDOM; ++i) {
441                 snprintf(buf, sizeof(buf),
442                          "hw.acpi.cpu.px_dom%d.available", i);
443                 if (sysctlbyname(buf, NULL, NULL, NULL, 0) < 0)
444                         continue;
445
446                 dom = calloc(1, sizeof(*dom));
447                 dom->dom_id = i;
448
449                 if (cpu_pwrdomain[i] != NULL) {
450                         fprintf(stderr, "cpu power domain %d exists\n", i);
451                         exit(1);
452                 }
453                 cpu_pwrdomain[i] = dom;
454                 CPUMASK_ORBIT(cpu_pwrdom_mask, i);
455         }
456         pwrdom_mask = cpu_pwrdom_mask;
457
458         while (CPUMASK_TESTNZERO(pwrdom_mask)) {
459                 dom_id = BSFCPUMASK(pwrdom_mask);
460                 CPUMASK_NANDBIT(pwrdom_mask, dom_id);
461                 dom = cpu_pwrdomain[dom_id];
462
463                 CPUMASK_ASSZERO(dom->dom_cpumask);
464
465                 snprintf(buf, sizeof(buf),
466                          "hw.acpi.cpu.px_dom%d.members", dom->dom_id);
467                 msize = sizeof(members);
468                 if (sysctlbyname(buf, members, &msize, NULL, 0) < 0) {
469                         cpu_pwrdomain[dom_id] = NULL;
470                         free(dom);
471                         continue;
472                 }
473
474                 members[msize] = 0;
475                 for (str = strtok(members, " "); str; str = strtok(NULL, " ")) {
476                         n = -1;
477                         sscanf(str, "cpu%d", &n);
478                         if (n >= 0) {
479                                 ++ncpu;
480                                 ++dom->dom_ncpus;
481                                 CPUMASK_ORBIT(dom->dom_cpumask, n);
482                                 cpu2pwrdom[n] = dom->dom_id;
483                         }
484                 }
485                 if (dom->dom_ncpus == 0) {
486                         cpu_pwrdomain[dom_id] = NULL;
487                         free(dom);
488                         continue;
489                 }
490                 if (DebugOpt) {
491                         printf("dom%d cpumask: ", dom->dom_id);
492                         for (i = 0; i < (int)NELEM(dom->dom_cpumask.ary); ++i) {
493                                 printf("%jx ",
494                                     (uintmax_t)dom->dom_cpumask.ary[i]);
495                         }
496                         printf("\n");
497                 }
498         }
499
500         if (ncpu != NCpus) {
501                 if (DebugOpt)
502                         printf("Found %d cpus, expecting %d\n", ncpu, NCpus);
503
504                 pwrdom_mask = cpu_pwrdom_mask;
505                 while (CPUMASK_TESTNZERO(pwrdom_mask)) {
506                         dom_id = BSFCPUMASK(pwrdom_mask);
507                         CPUMASK_NANDBIT(pwrdom_mask, dom_id);
508                         dom = cpu_pwrdomain[dom_id];
509                         if (dom != NULL)
510                                 free(dom);
511                 }
512                 return 0;
513         }
514         return 1;
515 }
516
517 /*
518  * Save per-cpu load and sum of per-cpu load.
519  */
520 static void
521 get_cputime(double pollrate)
522 {
523         static struct kinfo_cputime ocpu_time[MAXCPU];
524         static struct kinfo_cputime ncpu_time[MAXCPU];
525         size_t slen;
526         int ncpu;
527         int cpu;
528         uint64_t delta;
529
530         bcopy(ncpu_time, ocpu_time, sizeof(struct kinfo_cputime) * NCpus);
531
532         slen = sizeof(ncpu_time);
533         if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) {
534                 fprintf(stderr, "kern.cputime sysctl not available\n");
535                 exit(1);
536         }
537         ncpu = slen / sizeof(ncpu_time[0]);
538
539         delta = 0;
540         for (cpu = 0; cpu < ncpu; ++cpu) {
541                 uint64_t d;
542
543                 d = (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys +
544                      ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) -
545                     (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys +
546                      ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr);
547                 pcpu_state[cpu].cpu_qavg = (double)d / (pollrate * 1000000.0);
548
549                 delta += d;
550         }
551         global_cpu_state.cpu_qavg = (double)delta / (pollrate * 1000000.0);
552 }
553
554 static void
555 acpi_getcpufreq_str(int dom_id, int *highest0, int *lowest0)
556 {
557         char buf[256], sysid[64];
558         size_t buflen;
559         char *ptr;
560         int v, highest, lowest;
561         int freqidx;
562
563         /*
564          * Retrieve availability list
565          */
566         snprintf(sysid, sizeof(sysid),
567                  "hw.acpi.cpu.px_dom%d.available", dom_id);
568         buflen = sizeof(buf) - 1;
569         if (sysctlbyname(sysid, buf, &buflen, NULL, 0) < 0)
570                 return;
571         buf[buflen] = 0;
572
573         /*
574          * Parse out the highest and lowest cpu frequencies
575          */
576         ptr = buf;
577         highest = lowest = 0;
578         freqidx = 0;
579         while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
580                 if ((lowest == 0 || lowest > v) &&
581                     (LowestCpuFreq <= 0 || v >= LowestCpuFreq))
582                         lowest = v;
583                 if ((highest == 0 || highest < v) &&
584                     (HighestCpuFreq <= 0 || v <= HighestCpuFreq))
585                         highest = v;
586                 /* 
587                  * Detect turbo mode
588                  */
589                 if (!TurboOpt && highest - v == 1)
590                         highest = v;
591                 ++freqidx;
592         }
593
594         /*
595          * Frequency array
596          */
597         NFreq = freqidx;
598         if (NFreq > MAXFREQ)
599                 NFreq = MAXFREQ;
600         freqidx = freqidx;
601         ptr = buf;
602         while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
603                 if (freqidx == 0)
604                         break;
605                 FreqAry[--freqidx] = v;
606         }
607
608         *highest0 = highest;
609         *lowest0 = lowest;
610 }
611
612 static int
613 acpi_getcpufreq_bin(int dom_id, int *highest0, int *lowest0)
614 {
615         char sysid[64];
616         size_t freqlen;
617         int freqcnt, i;
618
619         /*
620          * Retrieve availability list
621          */
622         snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.avail", dom_id);
623         freqlen = sizeof(FreqAry);
624         if (sysctlbyname(sysid, FreqAry, &freqlen, NULL, 0) < 0)
625                 return 0;
626
627         NFreq = freqcnt = freqlen / sizeof(FreqAry[0]);
628         if (freqcnt == 0)
629                 return 0;
630
631         for (i = freqcnt - 1; i >= 0; --i) {
632                 *lowest0 = FreqAry[i];
633                 if (LowestCpuFreq <= 0 || *lowest0 >= LowestCpuFreq)
634                         break;
635         }
636
637         i = 0;
638         *highest0 = FreqAry[0];
639         if (!TurboOpt && freqcnt > 1 && FreqAry[0] - FreqAry[1] == 1) {
640                 i = 1;
641                 *highest0 = FreqAry[1];
642         }
643         for (; i < freqcnt; ++i) {
644                 if (HighestCpuFreq <= 0 || *highest0 <= HighestCpuFreq)
645                         break;
646                 *highest0 = FreqAry[i];
647         }
648         return 1;
649 }
650
651 static void
652 acpi_get_cpufreq(int dom_id, int *highest, int *lowest)
653 {
654         *highest = 0;
655         *lowest = 0;
656
657         if (acpi_getcpufreq_bin(dom_id, highest, lowest))
658                 return;
659         acpi_getcpufreq_str(dom_id, highest, lowest);
660 }
661
662 static
663 void
664 usage(void)
665 {
666         fprintf(stderr, "usage: powerd [-cdeftQ] [-p hysteresis] "
667             "[-h highest_freq] [-l lowest_freq] "
668             "[-r poll_interval] [-u trigger_up] "
669             "[-B min_battery_life] [-L low_battery_linger] "
670             "[-P battery_poll_interval] [-T sample_interval] "
671             "[-b backlight]\n");
672         exit(1);
673 }
674
675 #ifndef timespecsub
676 #define timespecsub(vvp, uvp)                                           \
677         do {                                                            \
678                 (vvp)->tv_sec -= (uvp)->tv_sec;                         \
679                 (vvp)->tv_nsec -= (uvp)->tv_nsec;                       \
680                 if ((vvp)->tv_nsec < 0) {                               \
681                         (vvp)->tv_sec--;                                \
682                         (vvp)->tv_nsec += 1000000000;                   \
683                 }                                                       \
684         } while (0)
685 #endif
686
687 #define BAT_SYSCTL_TIME_MAX     50000000 /* unit: nanosecond */
688
689 static int
690 has_battery(void)
691 {
692         struct timespec s, e;
693         size_t len;
694         int val;
695
696         clock_gettime(CLOCK_MONOTONIC_FAST, &s);
697         BatLifePrevT = s;
698
699         len = sizeof(val);
700         if (sysctlbyname("hw.acpi.acline", &val, &len, NULL, 0) < 0) {
701                 /* No AC line information */
702                 return 0;
703         }
704         clock_gettime(CLOCK_MONOTONIC_FAST, &e);
705
706         timespecsub(&e, &s);
707         if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
708                 /* hw.acpi.acline takes to long to be useful */
709                 syslog(LOG_NOTICE, "hw.acpi.acline takes too long");
710                 return 0;
711         }
712
713         clock_gettime(CLOCK_MONOTONIC_FAST, &s);
714         len = sizeof(val);
715         if (sysctlbyname("hw.acpi.battery.life", &val, &len, NULL, 0) < 0) {
716                 /* No battery life */
717                 return 0;
718         }
719         clock_gettime(CLOCK_MONOTONIC_FAST, &e);
720
721         timespecsub(&e, &s);
722         if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
723                 /* hw.acpi.battery.life takes to long to be useful */
724                 syslog(LOG_NOTICE, "hw.acpi.battery.life takes too long");
725                 return 0;
726         }
727         return 1;
728 }
729
730 static void
731 low_battery_alert(int life)
732 {
733         int fmt, stereo, freq;
734         int fd;
735
736         syslog(LOG_ALERT, "low battery life %d%%, please plugin AC line, #%d",
737             life, BatShutdownLingerCnt);
738         ++BatShutdownLingerCnt;
739
740         if (!BatShutdownAudioAlert)
741                 return;
742
743         fd = open("/dev/dsp", O_WRONLY);
744         if (fd < 0)
745                 return;
746
747         fmt = AFMT_S16_LE;
748         if (ioctl(fd, SNDCTL_DSP_SETFMT, &fmt, sizeof(fmt)) < 0)
749                 goto done;
750
751         stereo = 0;
752         if (ioctl(fd, SNDCTL_DSP_STEREO, &stereo, sizeof(stereo)) < 0)
753                 goto done;
754
755         freq = 44100;
756         if (ioctl(fd, SNDCTL_DSP_SPEED, &freq, sizeof(freq)) < 0)
757                 goto done;
758
759         write(fd, alert1, sizeof(alert1));
760         write(fd, alert1, sizeof(alert1));
761
762 done:
763         close(fd);
764 }
765
766 static int
767 mon_battery(void)
768 {
769         struct timespec cur, ts;
770         int acline, life;
771         size_t len;
772
773         clock_gettime(CLOCK_MONOTONIC_FAST, &cur);
774         ts = cur;
775         timespecsub(&ts, &BatLifePrevT);
776         if (ts.tv_sec < BatLifePollIntvl)
777                 return 1;
778         BatLifePrevT = cur;
779
780         len = sizeof(acline);
781         if (sysctlbyname("hw.acpi.acline", &acline, &len, NULL, 0) < 0)
782                 return 1;
783         if (acline) {
784                 BatShutdownLinger = -1;
785                 BatShutdownLingerCnt = 0;
786                 restore_backlight();
787                 return 1;
788         }
789
790         if (!BackLightDown && BackLightPct != 100) {
791                 int backlight_max, backlight;
792
793                 len = sizeof(backlight_max);
794                 if (sysctlbyname("hw.backlight_max", &backlight_max, &len,
795                     NULL, 0) < 0) {
796                         /* No more backlight adjustment */
797                         BackLightPct = 100;
798                         goto after_backlight;
799                 }
800
801                 len = sizeof(OldBackLightLevel);
802                 if (sysctlbyname("hw.backlight_level", &OldBackLightLevel, &len,
803                     NULL, 0) < 0) {
804                         /* No more backlight adjustment */
805                         BackLightPct = 100;
806                         goto after_backlight;
807                 }
808
809                 backlight = (backlight_max * BackLightPct) / 100;
810                 if (backlight >= OldBackLightLevel) {
811                         /* No more backlight adjustment */
812                         BackLightPct = 100;
813                         goto after_backlight;
814                 }
815
816                 if (sysctlbyname("hw.backlight_level", NULL, NULL,
817                     &backlight, sizeof(backlight)) < 0) {
818                         /* No more backlight adjustment */
819                         BackLightPct = 100;
820                         goto after_backlight;
821                 }
822                 BackLightDown = 1;
823         }
824 after_backlight:
825
826         len = sizeof(life);
827         if (sysctlbyname("hw.acpi.battery.life", &life, &len, NULL, 0) < 0)
828                 return 1;
829
830         if (BatShutdownLinger > 0) {
831                 ts = cur;
832                 timespecsub(&ts, &BatShutdownStartT);
833                 if (ts.tv_sec > BatShutdownLinger)
834                         BatShutdownLinger = 0;
835         }
836
837         if (life <= BatLifeMin) {
838                 if (BatShutdownLinger == 0 || BatShutdownLingerSet == 0) {
839                         syslog(LOG_ALERT, "low battery life %d%%, "
840                             "shutting down", life);
841                         if (vfork() == 0)
842                                 execlp("poweroff", "poweroff", NULL);
843                         return 0;
844                 } else if (BatShutdownLinger < 0) {
845                         BatShutdownLinger = BatShutdownLingerSet;
846                         BatShutdownStartT = cur;
847                 }
848                 low_battery_alert(life);
849         }
850         return 1;
851 }
852
853 static void
854 get_ncpus(void)
855 {
856         size_t slen;
857
858         slen = sizeof(NCpus);
859         if (sysctlbyname("hw.ncpu", &NCpus, &slen, NULL, 0) < 0)
860                 err(1, "sysctlbyname hw.ncpu failed");
861         if (DebugOpt)
862                 printf("hw.ncpu %d\n", NCpus);
863 }
864
865 static void
866 get_uschedcpus(void)
867 {
868         size_t slen;
869
870         slen = sizeof(usched_cpu_used);
871         if (sysctlbyname("kern.usched_global_cpumask", &usched_cpu_used, &slen,
872             NULL, 0) < 0)
873                 err(1, "sysctlbyname kern.usched_global_cpumask failed");
874         if (DebugOpt) {
875                 int i;
876
877                 printf("usched cpumask was: ");
878                 for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i)
879                         printf("%jx ", (uintmax_t)usched_cpu_used.ary[i]);
880                 printf("\n");
881         }
882 }
883
884 static void
885 set_uschedcpus(void)
886 {
887         if (DebugOpt) {
888                 int i;
889
890                 printf("usched cpumask: ");
891                 for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i) {
892                         printf("%jx ",
893                             (uintmax_t)usched_cpu_used.ary[i]);
894                 }
895                 printf("\n");
896         }
897         sysctlbyname("kern.usched_global_cpumask", NULL, 0,
898             &usched_cpu_used, sizeof(usched_cpu_used));
899 }
900
901 static int
902 has_perfbias(void)
903 {
904         size_t len;
905         int hint;
906
907         len = sizeof(hint);
908         if (sysctlbyname("machdep.perfbias0.hint", &hint, &len, NULL, 0) < 0)
909                 return 0;
910         return 1;
911 }
912
913 static void
914 set_perfbias(int cpu, int inc)
915 {
916         int hint = inc ? 0 : 15;
917         char sysid[64];
918
919         if (DebugOpt)
920                 printf("cpu%d set perfbias hint %d\n", cpu, hint);
921         snprintf(sysid, sizeof(sysid), "machdep.perfbias%d.hint", cpu);
922         sysctlbyname(sysid, NULL, NULL, &hint, sizeof(hint));
923 }
924
925 static void
926 init_perf(void)
927 {
928         struct cpu_state *state;
929         int cpu;
930
931         /* Get usched cpumask */
932         get_uschedcpus();
933
934         /*
935          * Assume everything are used and are maxed out, before we
936          * start.
937          */
938
939         CPUMASK_ASSBMASK(cpu_used, NCpus);
940         cpu_pwrdom_used = cpu_pwrdom_mask;
941         global_pcpu_limit = NCpus;
942
943         for (cpu = 0; cpu < NCpus; ++cpu) {
944                 state = &pcpu_state[cpu];
945
946                 state->cpu_uavg = 0.0;
947                 state->cpu_davg = 0.0;
948                 state->cpu_limit = 1;
949                 state->cpu_count = 1;
950                 snprintf(state->cpu_name, sizeof(state->cpu_name), "cpu%d",
951                     cpu);
952         }
953
954         state = &global_cpu_state;
955         state->cpu_uavg = 0.0;
956         state->cpu_davg = 0.0;
957         state->cpu_limit = NCpus;
958         state->cpu_count = NCpus;
959         strlcpy(state->cpu_name, "global", sizeof(state->cpu_name));
960 }
961
962 static int
963 get_nstate(struct cpu_state *state, double srt)
964 {
965         int ustate, dstate, nstate;
966
967         /* speeding up */
968         state->cpu_uavg = (state->cpu_uavg * 2.0 + state->cpu_qavg) / 3.0;
969         /* slowing down */
970         state->cpu_davg = (state->cpu_davg * srt + state->cpu_qavg) / (srt + 1);
971         if (state->cpu_davg < state->cpu_uavg)
972                 state->cpu_davg = state->cpu_uavg;
973
974         ustate = state->cpu_uavg / TriggerUp;
975         if (ustate < state->cpu_limit)
976                 ustate = state->cpu_uavg / TriggerDown;
977         dstate = state->cpu_davg / TriggerUp;
978         if (dstate < state->cpu_limit)
979                 dstate = state->cpu_davg / TriggerDown;
980
981         nstate = (ustate > dstate) ? ustate : dstate;
982         if (nstate > state->cpu_count)
983                 nstate = state->cpu_count;
984
985         if (DebugOpt) {
986                 printf("%s qavg=%5.2f uavg=%5.2f davg=%5.2f "
987                     "%2d ncpus=%d\n", state->cpu_name,
988                     state->cpu_qavg, state->cpu_uavg, state->cpu_davg,
989                     state->cpu_limit, nstate);
990         }
991         return nstate;
992 }
993
994 static void
995 mon_perf(double srt)
996 {
997         cpumask_t ocpu_used, ocpu_pwrdom_used;
998         int pnstate = 0, nstate;
999         int cpu;
1000
1001         /*
1002          * Find cpus requiring performance and their cooresponding power
1003          * domains.  Save the number of cpus requiring performance in
1004          * pnstate.
1005          */
1006         ocpu_used = cpu_used;
1007         ocpu_pwrdom_used = cpu_pwrdom_used;
1008
1009         CPUMASK_ASSZERO(cpu_used);
1010         CPUMASK_ASSZERO(cpu_pwrdom_used);
1011
1012         for (cpu = 0; cpu < NCpus; ++cpu) {
1013                 struct cpu_state *state = &pcpu_state[cpu];
1014                 int s;
1015
1016                 s = get_nstate(state, srt);
1017                 if (s) {
1018                         CPUMASK_ORBIT(cpu_used, cpu);
1019                         CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
1020                 }
1021                 pnstate += s;
1022
1023                 state->cpu_limit = s;
1024         }
1025
1026         /*
1027          * Calculate nstate, the number of cpus we wish to run at max
1028          * performance.
1029          */
1030         nstate = get_nstate(&global_cpu_state, srt);
1031
1032         if (nstate == global_cpu_state.cpu_limit &&
1033             (pnstate == global_pcpu_limit || nstate > pnstate)) {
1034                 /* Nothing changed; keep the sets */
1035                 cpu_used = ocpu_used;
1036                 cpu_pwrdom_used = ocpu_pwrdom_used;
1037
1038                 global_pcpu_limit = pnstate;
1039                 return;
1040         }
1041         global_pcpu_limit = pnstate;
1042
1043         if (nstate > pnstate) {
1044                 /*
1045                  * Add spare cpus to meet global performance requirement.
1046                  */
1047                 add_spare_cpus(ocpu_used, nstate - pnstate);
1048         }
1049
1050         global_cpu_state.cpu_limit = nstate;
1051
1052         /*
1053          * Adjust cpu and cpu power domain performance
1054          */
1055         adj_perf(ocpu_used, ocpu_pwrdom_used);
1056 }
1057
1058 static void
1059 add_spare_cpus(const cpumask_t ocpu_used, int ncpu)
1060 {
1061         cpumask_t saved_pwrdom, xcpu_used;
1062         int done = 0, cpu;
1063
1064         /*
1065          * Find more cpus in the previous cpu set.
1066          */
1067         xcpu_used = cpu_used;
1068         CPUMASK_XORMASK(xcpu_used, ocpu_used);
1069         while (CPUMASK_TESTNZERO(xcpu_used)) {
1070                 cpu = BSFCPUMASK(xcpu_used);
1071                 CPUMASK_NANDBIT(xcpu_used, cpu);
1072
1073                 if (CPUMASK_TESTBIT(ocpu_used, cpu)) {
1074                         CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
1075                         CPUMASK_ORBIT(cpu_used, cpu);
1076                         --ncpu;
1077                         if (ncpu == 0)
1078                                 return;
1079                 }
1080         }
1081
1082         /*
1083          * Find more cpus in the used cpu power domains.
1084          */
1085         saved_pwrdom = cpu_pwrdom_used;
1086 again:
1087         while (CPUMASK_TESTNZERO(saved_pwrdom)) {
1088                 cpumask_t unused_cpumask;
1089                 int dom;
1090
1091                 dom = BSFCPUMASK(saved_pwrdom);
1092                 CPUMASK_NANDBIT(saved_pwrdom, dom);
1093
1094                 unused_cpumask = cpu_pwrdomain[dom]->dom_cpumask;
1095                 CPUMASK_NANDMASK(unused_cpumask, cpu_used);
1096
1097                 while (CPUMASK_TESTNZERO(unused_cpumask)) {
1098                         cpu = BSFCPUMASK(unused_cpumask);
1099                         CPUMASK_NANDBIT(unused_cpumask, cpu);
1100
1101                         CPUMASK_ORBIT(cpu_pwrdom_used, dom);
1102                         CPUMASK_ORBIT(cpu_used, cpu);
1103                         --ncpu;
1104                         if (ncpu == 0)
1105                                 return;
1106                 }
1107         }
1108         if (!done) {
1109                 done = 1;
1110                 /*
1111                  * Find more cpus in unused cpu power domains
1112                  */
1113                 saved_pwrdom = cpu_pwrdom_mask;
1114                 CPUMASK_NANDMASK(saved_pwrdom, cpu_pwrdom_used);
1115                 goto again;
1116         }
1117         if (DebugOpt)
1118                 printf("%d cpus not found\n", ncpu);
1119 }
1120
1121 static void
1122 acpi_set_cpufreq(int dom, int inc)
1123 {
1124         int lowest, highest, desired;
1125         char sysid[64];
1126
1127         acpi_get_cpufreq(dom, &highest, &lowest);
1128         if (highest == 0 || lowest == 0)
1129                 return;
1130         desired = inc ? highest : lowest;
1131
1132         if (DebugOpt)
1133                 printf("dom%d set frequency %d\n", dom, desired);
1134         snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.select", dom);
1135         sysctlbyname(sysid, NULL, NULL, &desired, sizeof(desired));
1136 }
1137
1138 static void
1139 adj_cpu_pwrdom(int dom, int inc)
1140 {
1141         if (AdjustCpuFreq && (inc == 0 || AdjustCpuFreqOverride == 0))
1142                 acpi_set_cpufreq(dom, inc);
1143 }
1144
1145 static void
1146 adj_cpu_perf(int cpu, int inc)
1147 {
1148         if (DebugOpt) {
1149                 if (inc)
1150                         printf("cpu%d increase perf\n", cpu);
1151                 else
1152                         printf("cpu%d decrease perf\n", cpu);
1153         }
1154
1155         if (HasPerfbias)
1156                 set_perfbias(cpu, inc);
1157         if (AdjustCstate)
1158                 set_cstate(cpu, inc);
1159 }
1160
1161 static void
1162 adj_perf(cpumask_t xcpu_used, cpumask_t xcpu_pwrdom_used)
1163 {
1164         cpumask_t old_usched_used;
1165         int cpu, inc;
1166
1167         /*
1168          * Set cpus requiring performance to the userland process
1169          * scheduler.  Leave the rest of cpus unmapped.
1170          */
1171         old_usched_used = usched_cpu_used;
1172         usched_cpu_used = cpu_used;
1173         if (CPUMASK_TESTZERO(usched_cpu_used))
1174                 CPUMASK_ORBIT(usched_cpu_used, 0);
1175         if (CPUMASK_CMPMASKNEQ(usched_cpu_used, old_usched_used))
1176                 set_uschedcpus();
1177
1178         /*
1179          * Adjust per-cpu performance.
1180          */
1181         CPUMASK_XORMASK(xcpu_used, cpu_used);
1182         while (CPUMASK_TESTNZERO(xcpu_used)) {
1183                 cpu = BSFCPUMASK(xcpu_used);
1184                 CPUMASK_NANDBIT(xcpu_used, cpu);
1185
1186                 if (CPUMASK_TESTBIT(cpu_used, cpu)) {
1187                         /* Increase cpu performance */
1188                         inc = 1;
1189                 } else {
1190                         /* Decrease cpu performance */
1191                         inc = 0;
1192                 }
1193                 adj_cpu_perf(cpu, inc);
1194         }
1195
1196         /*
1197          * Adjust cpu power domain performance.  This could affect
1198          * a set of cpus.
1199          */
1200         CPUMASK_XORMASK(xcpu_pwrdom_used, cpu_pwrdom_used);
1201         while (CPUMASK_TESTNZERO(xcpu_pwrdom_used)) {
1202                 int dom;
1203
1204                 dom = BSFCPUMASK(xcpu_pwrdom_used);
1205                 CPUMASK_NANDBIT(xcpu_pwrdom_used, dom);
1206
1207                 if (CPUMASK_TESTBIT(cpu_pwrdom_used, dom)) {
1208                         /* Increase cpu power domain performance */
1209                         inc = 1;
1210                 } else {
1211                         /* Decrease cpu power domain performance */
1212                         inc = 0;
1213                 }
1214                 adj_cpu_pwrdom(dom, inc);
1215         }
1216 }
1217
1218 static void
1219 restore_perf(void)
1220 {
1221         cpumask_t ocpu_used, ocpu_pwrdom_used;
1222
1223         /* Remove highest cpu frequency limitation */
1224         HighestCpuFreq = 0;
1225
1226         ocpu_used = cpu_used;
1227         ocpu_pwrdom_used = cpu_pwrdom_used;
1228
1229         /* Max out all cpus and cpu power domains performance */
1230         CPUMASK_ASSBMASK(cpu_used, NCpus);
1231         cpu_pwrdom_used = cpu_pwrdom_mask;
1232
1233         adj_perf(ocpu_used, ocpu_pwrdom_used);
1234
1235         if (AdjustCstate) {
1236                 /*
1237                  * Restore the original mwait C-state
1238                  */
1239                 if (DebugOpt)
1240                         printf("global set cstate %s\n", orig_global_cx);
1241                 sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1242                     orig_global_cx, strlen(orig_global_cx) + 1);
1243         }
1244 }
1245
1246 static int
1247 probe_cstate(void)
1248 {
1249         char cx_supported[1024];
1250         const char *target;
1251         char *ptr;
1252         int idle_hlt, deep = 1;
1253         size_t len;
1254
1255         len = sizeof(idle_hlt);
1256         if (sysctlbyname("machdep.cpu_idle_hlt", &idle_hlt, &len, NULL, 0) < 0)
1257                 return 0;
1258         if (idle_hlt != 1)
1259                 return 0;
1260
1261         len = sizeof(cx_supported);
1262         if (sysctlbyname("machdep.mwait.CX.supported", cx_supported, &len,
1263             NULL, 0) < 0)
1264                 return 0;
1265
1266         len = sizeof(orig_global_cx);
1267         if (sysctlbyname("machdep.mwait.CX.idle", orig_global_cx, &len,
1268             NULL, 0) < 0)
1269                 return 0;
1270
1271         strlcpy(cpu_perf_cx, "AUTODEEP", sizeof(cpu_perf_cx));
1272         cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1273         if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1274             cpu_perf_cx, cpu_perf_cxlen) < 0) {
1275                 /* AUTODEEP is not supported; try AUTO */
1276                 deep = 0;
1277                 strlcpy(cpu_perf_cx, "AUTO", sizeof(cpu_perf_cx));
1278                 cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1279                 if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1280                     cpu_perf_cx, cpu_perf_cxlen) < 0)
1281                         return 0;
1282         }
1283
1284         if (!deep)
1285                 target = "C2/0";
1286         else
1287                 target = NULL;
1288         for (ptr = strtok(cx_supported, " "); ptr != NULL;
1289              ptr = strtok(NULL, " ")) {
1290                 if (target == NULL ||
1291                     (target != NULL && strcmp(ptr, target) == 0)) {
1292                         strlcpy(cpu_idle_cx, ptr, sizeof(cpu_idle_cx));
1293                         cpu_idle_cxlen = strlen(cpu_idle_cx) + 1;
1294                         if (target != NULL)
1295                                 break;
1296                 }
1297         }
1298         if (cpu_idle_cxlen == 0)
1299                 return 0;
1300
1301         if (DebugOpt) {
1302                 printf("cstate orig %s, perf %s, idle %s\n",
1303                     orig_global_cx, cpu_perf_cx, cpu_idle_cx);
1304         }
1305         return 1;
1306 }
1307
1308 static void
1309 set_cstate(int cpu, int inc)
1310 {
1311         const char *cst;
1312         char sysid[64];
1313         size_t len;
1314
1315         if (inc) {
1316                 cst = cpu_perf_cx;
1317                 len = cpu_perf_cxlen;
1318         } else {
1319                 cst = cpu_idle_cx;
1320                 len = cpu_idle_cxlen;
1321         }
1322
1323         if (DebugOpt)
1324                 printf("cpu%d set cstate %s\n", cpu, cst);
1325         snprintf(sysid, sizeof(sysid), "machdep.mwait.CX.idle%d", cpu);
1326         sysctlbyname(sysid, NULL, NULL, cst, len);
1327 }
1328
1329 static void
1330 restore_backlight(void)
1331 {
1332         if (BackLightDown) {
1333                 BackLightDown = 0;
1334                 sysctlbyname("hw.backlight_level", NULL, NULL,
1335                     &OldBackLightLevel, sizeof(OldBackLightLevel));
1336         }
1337 }
1338
1339 /*
1340  * get_cputemp() / mon_cputemp()
1341  *
1342  * This enforces the maximum cpu frequency based on temperature
1343  * verses MinTemp and MaxTemp.
1344  */
1345 static int
1346 get_cputemp(void)
1347 {
1348         char sysid[64];
1349         struct sensor sensor;
1350         size_t sensor_size;
1351         int t;
1352         int mt = -1;
1353         int n;
1354
1355         for (n = 0; ; ++n) {
1356                 t = 0;
1357                 snprintf(sysid, sizeof(sysid),
1358                          "hw.sensors.cpu_node%d.temp0", n);
1359                 sensor_size = sizeof(sensor);
1360                 if (sysctlbyname(sysid, &sensor, &sensor_size, NULL, 0) < 0)
1361                         break;
1362                 t = -1;
1363                 if ((sensor.flags & (SENSOR_FINVALID | SENSOR_FUNKNOWN)) == 0) {
1364                         t = (int)((sensor.value - 273150000) / 1000000);
1365                         if (mt < t)
1366                                 mt = t;
1367                 }
1368         }
1369         if (n)
1370                 return mt;
1371
1372         /*
1373          * Missing nodeN for some reason, try cpuN.
1374          */
1375         for (n = 0; ; ++n) {
1376                 t = 0;
1377                 snprintf(sysid, sizeof(sysid),
1378                          "hw.sensors.cpu%d.temp0", n);
1379                 sensor_size = sizeof(sensor);
1380                 if (sysctlbyname(sysid, &sensor, &sensor_size, NULL, 0) < 0)
1381                         break;
1382                 t = -1;
1383                 if ((sensor.flags & (SENSOR_FINVALID | SENSOR_FUNKNOWN)) == 0) {
1384                         t = (int)((sensor.value - 273150000) / 1000000);
1385                         if (mt < t)
1386                                 mt = t;
1387                 }
1388         }
1389         return mt;
1390 }
1391
1392 static void
1393 set_global_freq(int freq)
1394 {
1395         if (freq > 0)
1396                 sysctlbyname("hw.acpi.cpu.px_global",
1397                              NULL, NULL, &freq, sizeof(freq));
1398 }
1399
1400 static int
1401 get_global_freq(void)
1402 {
1403         int freq;
1404         size_t freq_size;
1405
1406         freq = -1;
1407         freq_size = sizeof(freq);
1408         sysctlbyname("hw.acpi.cpu.px_global", &freq, &freq_size, NULL, 0);
1409
1410         return freq;
1411 }
1412
1413 static void
1414 mon_cputemp(void)
1415 {
1416         static int last_temp = -1;
1417         static int last_idx = -1;
1418         int temp = get_cputemp();
1419         int idx;
1420         int lowest;
1421         int highest;
1422         static int CurPXGlobal __unused;
1423
1424         /*
1425          * Reseed FreqAry, it can change w/AC power state
1426          */
1427         acpi_get_cpufreq(0, &lowest, &highest);
1428
1429         /*
1430          * Some cpu frequency steps can cause large shifts in cpu temperature,
1431          * creating an oscillation that min-maxes the temperature in a way
1432          * that is not desireable.  To deal with this, we impose an exponential
1433          * average for any temperature change.
1434          *
1435          * We have to do this in both directions, otherwise (in particular)
1436          * laptop fan responsiveness and temperature sensor response times
1437          * can create major frequency oscillations.
1438          */
1439         if (last_temp < 0) {
1440                 last_temp = temp << 8;
1441         } else if (temp < last_temp) {
1442                 last_temp = (last_temp * 15 + (temp << 8)) / 16;
1443                 if (DebugOpt) {
1444                         printf("Falling temp %d (use %d)\n",
1445                                 temp, (last_temp >> 8));
1446                 }
1447         } else {
1448                 last_temp = (last_temp * 15 + (temp << 8)) / 16;
1449                 if (DebugOpt) {
1450                         printf("Rising temp %d (use %d)\n",
1451                                 temp, (last_temp >> 8));
1452                 }
1453         }
1454         temp = last_temp >> 8;
1455
1456         /*
1457          * CPU Temp not available or available frequencies not yet
1458          * probed.
1459          */
1460         if (DebugOpt)
1461                 printf("Temp %d {%d-%d} NFreq=%d)\n",
1462                        temp, MinTemp, MaxTemp, NFreq);
1463         if (temp <= 0)
1464                 return;
1465         if (NFreq == 0)
1466                 return;
1467
1468         /*
1469          * Return to normal operation if under the minimum
1470          */
1471         if (temp <= MinTemp) {
1472                 if (AdjustCpuFreqOverride) {
1473                         AdjustCpuFreqOverride = 0;
1474                         CurPXGlobal = 0;
1475                         last_idx = -1;
1476                         syslog(LOG_ALERT,
1477                                "Temp below %d, returning to normal operation",
1478                                MinTemp);
1479                         if (SavedPXGlobal)
1480                                 set_global_freq(SavedPXGlobal);
1481                 }
1482                 return;
1483         }
1484
1485         /*
1486          * Hysteresis before entering temperature control mode
1487          */
1488         if (AdjustCpuFreqOverride == 0 &&
1489             temp <= MinTemp + (MaxTemp - MinTemp) / 10 + 1) {
1490                 return;
1491         }
1492
1493         /*
1494          * Override frequency controls (except for idle -> lowest)
1495          */
1496         if (AdjustCpuFreqOverride == 0) {
1497                 AdjustCpuFreqOverride = 1;
1498                 SavedPXGlobal = get_global_freq();
1499                 CurPXGlobal = 0;
1500                 last_idx = -1;
1501                 syslog(LOG_ALERT,
1502                        "Temp %d {%d-%d}, entering temperature control mode",
1503                        temp, MinTemp, MaxTemp);
1504         }
1505         if (temp > MaxTemp + (MaxTemp - MinTemp) / 10 + 1) {
1506                 syslog(LOG_ALERT,
1507                        "Temp %d {%d-%d}, TOO HOT!!!",
1508                        temp, MinTemp, MaxTemp);
1509         }
1510         idx = (temp - MinTemp) * NFreq / (MaxTemp - MinTemp);
1511         if (idx < 0 || idx >= NFreq)    /* overtemp */
1512                 idx = NFreq - 1;
1513
1514         /*
1515          * Limit frequency shifts to single steps in both directions.
1516          * Some fans react very quickly, this will reduce oscillations.
1517          */
1518         if (DebugOpt)
1519                 printf("Temp index %d (use %d)\n", idx, last_idx);
1520         if (last_idx >= 0 && idx < last_idx)
1521                 idx = last_idx - 1;
1522         else if (last_idx >= 0 && idx > last_idx)
1523                 idx = last_idx + 1;
1524         last_idx = idx;
1525
1526         /*
1527          * One last thing, make sure our frequency adheres to
1528          * HighestCpuFreq.  However, override LowestCpuFreq for
1529          * temperature control purposes.
1530          */
1531         while (HighestCpuFreq > 0 && idx < NFreq &&
1532                FreqAry[idx] > HighestCpuFreq) {
1533                 ++idx;
1534         }
1535 #if 0
1536         /*
1537          * Currently ignore LowestCpuFreq if temp control thinks it
1538          * needs to go lower
1539          */
1540         while (LowestCpuFreq > 0 && idx > 0 &&
1541                FreqAry[idx] < LowestCpuFreq) {
1542                 --idx;
1543         }
1544 #endif
1545
1546         if (FreqAry[idx] != CurPXGlobal) {
1547                 CurPXGlobal = FreqAry[idx];
1548
1549 #if 0
1550                 /* this can get noisy so don't log for now */
1551                 syslog(LOG_ALERT,
1552                        "Temp %d {%d-%d}, set frequency %d",
1553                        temp, MinTemp, MaxTemp, CurPXGlobal);
1554 #endif
1555         }
1556         set_global_freq(CurPXGlobal);
1557 }