corepower(4): Sensor for Intel CPUs' power usage via the RAPL MSRs
[dragonfly.git] / sys / dev / powermng / corepower / corepower.c
1 /*
2  * Copyright (c) 2015 Imre Vadász <imre@vdsz.com>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
23  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24  * POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 /*
28  * Device driver for Intel's On Die power usage estimation via MSR.
29  * Supported by Sandy Bridge and later CPUs, and also by Atom CPUs
30  * of the Silvermont and later architectures.
31  */
32
33 #include <sys/param.h>
34 #include <sys/bus.h>
35 #include <sys/systm.h>
36 #include <sys/module.h>
37 #include <sys/conf.h>
38 #include <sys/cpu_topology.h>
39 #include <sys/kernel.h>
40 #include <sys/sensors.h>
41 #include <sys/bitops.h>
42
43 #include <machine/specialreg.h>
44 #include <machine/cpufunc.h>
45 #include <machine/cputypes.h>
46 #include <machine/md_var.h>
47
48 #include "cpu_if.h"
49
50 #define MSR_RAPL_POWER_UNIT_POWER       __BITS64(0, 3)
51 #define MSR_RAPL_POWER_UNIT_ENERGY      __BITS64(8, 12)
52 #define MSR_RAPL_POWER_UNIT_TIME        __BITS64(16, 19)
53
54 struct corepower_sensor {
55         uint64_t        energy;
56         u_int           msr;
57         struct ksensor  sensor;
58 };
59
60 struct corepower_softc {
61         device_t                sc_dev;
62
63         uint32_t                sc_watt_divisor;
64         uint32_t                sc_joule_divisor;
65         uint32_t                sc_second_divisor;
66
67         int                     sc_have_sens;
68
69         struct corepower_sensor sc_pkg_sens;
70         struct corepower_sensor sc_dram_sens;
71         struct corepower_sensor sc_pp0_sens;
72         struct corepower_sensor sc_pp1_sens;
73
74         struct ksensordev       sc_sensordev;
75         struct sensor_task      *sc_senstask;
76 };
77
78 /*
79  * Device methods.
80  */
81 static void     corepower_identify(driver_t *driver, device_t parent);
82 static int      corepower_probe(device_t dev);
83 static int      corepower_attach(device_t dev);
84 static int      corepower_detach(device_t dev);
85 static uint32_t corepower_energy_to_uwatts(struct corepower_softc *sc,
86                                            uint32_t units, uint32_t secs);
87 static void     corepower_refresh(void *arg);
88 static void     corepower_sens_init(struct corepower_sensor *sens,
89                                     char *desc, u_int msr, int cpu);
90 static void     corepower_sens_update(struct corepower_softc *sc,
91                                       struct corepower_sensor *sens);
92
93 static device_method_t corepower_methods[] = {
94         /* Device interface */
95         DEVMETHOD(device_identify,      corepower_identify),
96         DEVMETHOD(device_probe,         corepower_probe),
97         DEVMETHOD(device_attach,        corepower_attach),
98         DEVMETHOD(device_detach,        corepower_detach),
99
100         DEVMETHOD_END
101 };
102
103 static driver_t corepower_driver = {
104         "corepower",
105         corepower_methods,
106         sizeof(struct corepower_softc),
107 };
108
109 static devclass_t corepower_devclass;
110 DRIVER_MODULE(corepower, cpu, corepower_driver, corepower_devclass, NULL, NULL);
111 MODULE_VERSION(corepower, 1);
112
113 static void
114 corepower_identify(driver_t *driver, device_t parent)
115 {
116         device_t child;
117         const struct cpu_node *node;
118         int cpu, master_cpu;
119
120         /* Make sure we're not being doubly invoked. */
121         if (device_find_child(parent, "corepower", -1) != NULL)
122                 return;
123
124         /* Check that the vendor is Intel. */
125         if (cpu_vendor_id != CPU_VENDOR_INTEL)
126                 return;
127
128         /* We only want one child per CPU package */
129         cpu = device_get_unit(parent);
130         node = get_cpu_node_by_cpuid(cpu);
131         while (node != NULL) {
132                 if (node->type == PACKAGE_LEVEL) {
133                         if (node->child_no == 0)
134                                 node = NULL;
135                         break;
136                 }
137                 node = node->parent_node;
138         }
139         if (node == NULL)
140                 return;
141
142         master_cpu = BSRCPUMASK(node->members);
143         if (cpu != master_cpu)
144                 return;
145
146         child = device_add_child(parent, "corepower", -1);
147         if (child == NULL)
148                 device_printf(parent, "add corepower child failed\n");
149 }
150
151 static int
152 corepower_probe(device_t dev)
153 {
154         int cpu_family, cpu_model;
155
156         if (resource_disabled("corepower", 0))
157                 return (ENXIO);
158
159         cpu_model = CPUID_TO_MODEL(cpu_id);
160         cpu_family = CPUID_TO_FAMILY(cpu_id);
161
162         if (cpu_family == 0x06) {
163                 switch (cpu_model) {
164                 /* Core CPUs */
165                 case 0x2a:
166                 case 0x3a:
167                 /* Xeon CPUs */
168                 case 0x2d:
169                 case 0x3e:
170                 case 0x3f:
171                 case 0x4f:
172                 case 0x56:
173                 /* Haswell, Broadwell, Skylake */
174                 case 0x3c:
175                 case 0x3d:
176                 case 0x45:
177                 case 0x46:
178                 case 0x47:
179                 case 0x4e:
180                 case 0x5e:
181                 /* Atom CPUs */
182                 case 0x37:
183                 case 0x4a:
184                 case 0x4c:
185                 case 0x5a:
186                 case 0x5d:
187                         break;
188                 default:
189                         return (ENXIO);
190                 }
191         }
192
193         device_set_desc(dev, "CPU On-Die Power Usage Estimation");
194
195         return (BUS_PROBE_GENERIC);
196 }
197
198 static int
199 corepower_attach(device_t dev)
200 {
201         struct corepower_softc *sc = device_get_softc(dev);
202         uint64_t val;
203         uint32_t power_units;
204         uint32_t energy_units;
205         uint32_t time_units;
206         int cpu_family, cpu_model;
207         int cpu;
208
209         sc->sc_dev = dev;
210         sc->sc_have_sens = 0;
211
212         cpu_family = CPUID_TO_FAMILY(cpu_id);
213         cpu_model = CPUID_TO_MODEL(cpu_id);
214
215         /* XXX Check CPU version */
216         if (cpu_family == 0x06) {
217                 switch (cpu_model) {
218                 /* Core CPUs */
219                 case 0x2a:
220                 case 0x3a:
221                         sc->sc_have_sens = 0xd;
222                         break;
223                 /* Xeon CPUs */
224                 case 0x2d: /* Only Xeon branded, Core i version should probably be 0x5 */
225                 case 0x3e:
226                 case 0x3f:
227                 case 0x4f:
228                 case 0x56:
229                         sc->sc_have_sens = 0x7;
230                         break;
231                 /* Haswell, Broadwell, Skylake */
232                 case 0x3c:
233                 case 0x3d:
234                 case 0x45:
235                 case 0x46:
236                 case 0x47:
237                 case 0x4e:
238                 case 0x5e:
239                         /* Check if Core or Xeon (Xeon CPUs might be 0x7) */
240                         sc->sc_have_sens = 0xf;
241                         break;
242                 /* Atom CPUs */
243                 case 0x37:
244                 case 0x4a:
245                 case 0x4c:
246                 case 0x5a:
247                 case 0x5d:
248                         sc->sc_have_sens = 0x5;
249                         break;
250                 default:
251                         return (ENXIO);
252                 }
253         }
254
255         val = rdmsr(MSR_RAPL_POWER_UNIT);
256
257         power_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_POWER);
258         energy_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_ENERGY);
259         time_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_TIME);
260
261         sc->sc_watt_divisor = (1 << power_units);
262         sc->sc_joule_divisor = (1 << energy_units);
263         sc->sc_second_divisor = (1 << time_units);
264
265         /*
266          * Add hw.sensors.cpu_nodeN MIB.
267          */
268         cpu = device_get_unit(device_get_parent(dev));
269         ksnprintf(sc->sc_sensordev.xname, sizeof(sc->sc_sensordev.xname),
270             "cpu_node%d", get_chip_ID(cpu));
271         if (sc->sc_have_sens & 1) {
272                 corepower_sens_init(&sc->sc_pkg_sens, "Package Power",
273                     MSR_PKG_ENERGY_STATUS, cpu);
274                 sensor_attach(&sc->sc_sensordev, &sc->sc_pkg_sens.sensor);
275         }
276         if (sc->sc_have_sens & 2) {
277                 corepower_sens_init(&sc->sc_dram_sens, "DRAM Power",
278                     MSR_DRAM_ENERGY_STATUS, cpu);
279                 sensor_attach(&sc->sc_sensordev, &sc->sc_dram_sens.sensor);
280         }
281         if (sc->sc_have_sens & 4) {
282                 corepower_sens_init(&sc->sc_pp0_sens, "Cores Power",
283                     MSR_PP0_ENERGY_STATUS, cpu);
284                 sensor_attach(&sc->sc_sensordev, &sc->sc_pp0_sens.sensor);
285         }
286         if (sc->sc_have_sens & 8) {
287                 corepower_sens_init(&sc->sc_pp1_sens, "Graphics Power",
288                     MSR_PP1_ENERGY_STATUS, cpu);
289                 sensor_attach(&sc->sc_sensordev, &sc->sc_pp1_sens.sensor);
290         }
291
292         sc->sc_senstask = sensor_task_register2(sc, corepower_refresh, 1, cpu);
293
294         sensordev_install(&sc->sc_sensordev);
295
296         return (0);
297 }
298
299 static int
300 corepower_detach(device_t dev)
301 {
302         struct corepower_softc *sc = device_get_softc(dev);
303
304         sensordev_deinstall(&sc->sc_sensordev);
305         sensor_task_unregister2(sc->sc_senstask);
306
307         return (0);
308 }
309
310 static uint32_t
311 corepower_energy_to_uwatts(struct corepower_softc *sc, uint32_t units,
312     uint32_t secs)
313 {
314         uint64_t val;
315
316         val = ((uint64_t)units) * 1000ULL * 1000ULL;
317         val /= sc->sc_joule_divisor;
318
319         return val / secs;
320 }
321
322 static void
323 corepower_refresh(void *arg)
324 {
325         struct corepower_softc *sc = (struct corepower_softc *)arg;
326
327         if (sc->sc_have_sens & 1)
328                 corepower_sens_update(sc, &sc->sc_pkg_sens);
329         if (sc->sc_have_sens & 2)
330                 corepower_sens_update(sc, &sc->sc_dram_sens);
331         if (sc->sc_have_sens & 4)
332                 corepower_sens_update(sc, &sc->sc_pp0_sens);
333         if (sc->sc_have_sens & 8)
334                 corepower_sens_update(sc, &sc->sc_pp1_sens);
335 }
336
337 static void
338 corepower_sens_init(struct corepower_sensor *sens, char *desc, u_int msr,
339     int cpu)
340 {
341         ksnprintf(sens->sensor.desc, sizeof(sens->sensor.desc), "node%d %s",
342             get_chip_ID(cpu), desc);
343         sens->sensor.type = SENSOR_WATTS;
344         sens->msr = msr;
345         sens->energy = rdmsr(sens->msr) & 0xffffffffU;
346 }
347
348 static void
349 corepower_sens_update(struct corepower_softc *sc,
350     struct corepower_sensor *sens)
351 {
352         uint64_t a, res;
353
354         a = rdmsr(sens->msr) & 0xffffffffU;
355         if (sens->energy > a) {
356                 res = (0x100000000ULL - sens->energy) + a;
357         } else {
358                 res = a - sens->energy;
359         }
360         sens->energy = a;
361         sens->sensor.value = corepower_energy_to_uwatts(sc, res, 1);
362 }