2 * Copyright (c) 2007, 2008 Rui Paulo <rpaulo@FreeBSD.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
23 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
26 * $FreeBSD: src/sys/dev/coretemp/coretemp.c,v 1.14 2011/05/05 19:15:15 delphij Exp $
30 * Device driver for Intel's On Die thermal sensor via MSR.
31 * First introduced in Intel's Core line of processors.
34 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/module.h>
39 #include <sys/cpu_topology.h>
40 #include <sys/kernel.h>
41 #include <sys/sensors.h>
42 #include <sys/proc.h> /* for curthread */
43 #include <sys/sched.h>
44 #include <sys/thread2.h>
45 #include <sys/bitops.h>
47 #include <machine/specialreg.h>
48 #include <machine/cpufunc.h>
49 #include <machine/cputypes.h>
50 #include <machine/md_var.h>
54 #define MSR_THERM_STATUS_TM_STATUS __BIT64(0)
55 #define MSR_THERM_STATUS_TM_STATUS_LOG __BIT64(1)
56 #define MSR_THERM_STATUS_PROCHOT __BIT64(2)
57 #define MSR_THERM_STATUS_PROCHOT_LOG __BIT64(3)
58 #define MSR_THERM_STATUS_CRIT __BIT64(4)
59 #define MSR_THERM_STATUS_CRIT_LOG __BIT64(5)
60 #define MSR_THERM_STATUS_THRESH1 __BIT64(6)
61 #define MSR_THERM_STATUS_THRESH1_LOG __BIT64(7)
62 #define MSR_THERM_STATUS_THRESH2 __BIT64(8)
63 #define MSR_THERM_STATUS_THRESH2_LOG __BIT64(9)
64 #define MSR_THERM_STATUS_PWRLIM __BIT64(10)
65 #define MSR_THERM_STATUS_PWRLIM_LOG __BIT64(11)
66 #define MSR_THERM_STATUS_READ __BITS64(16, 22)
67 #define MSR_THERM_STATUS_RES __BITS64(27, 30)
68 #define MSR_THERM_STATUS_READ_VALID __BIT64(31)
70 #define MSR_THERM_STATUS_HAS_STATUS(msr) \
71 (((msr) & (MSR_THERM_STATUS_TM_STATUS | MSR_THERM_STATUS_TM_STATUS_LOG)) ==\
72 (MSR_THERM_STATUS_TM_STATUS | MSR_THERM_STATUS_TM_STATUS_LOG))
74 #define MSR_THERM_STATUS_IS_CRITICAL(msr) \
75 (((msr) & (MSR_THERM_STATUS_CRIT | MSR_THERM_STATUS_CRIT_LOG)) == \
76 (MSR_THERM_STATUS_CRIT | MSR_THERM_STATUS_CRIT_LOG))
78 #define MSR_PKGTM_STATUS_TM_STATUS __BIT64(0)
79 #define MSR_PKGTM_STATUS_TM_STATUS_LOG __BIT64(1)
80 #define MSR_PKGTM_STATUS_PROCHOT __BIT64(2)
81 #define MSR_PKGTM_STATUS_PROCHOT_LOG __BIT64(3)
82 #define MSR_PKGTM_STATUS_CRIT __BIT64(4)
83 #define MSR_PKGTM_STATUS_CRIT_LOG __BIT64(5)
84 #define MSR_PKGTM_STATUS_THRESH1 __BIT64(6)
85 #define MSR_PKGTM_STATUS_THRESH1_LOG __BIT64(7)
86 #define MSR_PKGTM_STATUS_THRESH2 __BIT64(8)
87 #define MSR_PKGTM_STATUS_THRESH2_LOG __BIT64(9)
88 #define MSR_PKGTM_STATUS_PWRLIM __BIT64(10)
89 #define MSR_PKGTM_STATUS_PWRLIM_LOG __BIT64(11)
90 #define MSR_PKGTM_STATUS_READ __BITS64(16, 22)
92 #define MSR_PKGTM_STATUS_HAS_STATUS(msr) \
93 (((msr) & (MSR_PKGTM_STATUS_TM_STATUS | MSR_PKGTM_STATUS_TM_STATUS_LOG)) ==\
94 (MSR_PKGTM_STATUS_TM_STATUS | MSR_PKGTM_STATUS_TM_STATUS_LOG))
96 #define MSR_PKGTM_STATUS_IS_CRITICAL(msr) \
97 (((msr) & (MSR_PKGTM_STATUS_CRIT | MSR_PKGTM_STATUS_CRIT_LOG)) == \
98 (MSR_PKGTM_STATUS_CRIT | MSR_PKGTM_STATUS_CRIT_LOG))
100 #define CORETEMP_TEMP_INVALID -1
102 struct coretemp_sensor {
103 struct ksensordev *c_sensdev;
104 struct ksensor c_sens;
107 struct coretemp_softc {
112 struct coretemp_sensor *sc_sens;
113 struct coretemp_sensor *sc_pkg_sens;
115 struct sensor_task *sc_senstask;
117 volatile uint32_t sc_flags; /* CORETEMP_FLAG_ */
118 volatile uint64_t sc_msr;
119 volatile uint64_t sc_pkg_msr;
122 #define CORETEMP_FLAG_CRIT 0x4
123 #define CORETEMP_FLAG_PKGCRIT 0x8
125 #define CORETEMP_HAS_PKGSENSOR(sc) ((sc)->sc_pkg_sens != NULL)
130 static void coretemp_identify(driver_t *driver, device_t parent);
131 static int coretemp_probe(device_t dev);
132 static int coretemp_attach(device_t dev);
133 static int coretemp_detach(device_t dev);
135 static void coretemp_msr_fetch(struct coretemp_softc *sc, uint64_t *msr,
137 static int coretemp_msr_temp(struct coretemp_softc *sc, uint64_t msr);
138 static void coretemp_sensor_update(struct coretemp_softc *sc, int temp);
139 static void coretemp_sensor_task(void *arg);
141 static void coretemp_pkg_sensor_task(void *arg);
142 static void coretemp_pkg_sensor_update(struct coretemp_softc *sc, int temp);
143 static int coretemp_pkg_msr_temp(struct coretemp_softc *sc, uint64_t msr);
145 static device_method_t coretemp_methods[] = {
146 /* Device interface */
147 DEVMETHOD(device_identify, coretemp_identify),
148 DEVMETHOD(device_probe, coretemp_probe),
149 DEVMETHOD(device_attach, coretemp_attach),
150 DEVMETHOD(device_detach, coretemp_detach),
155 static driver_t coretemp_driver = {
158 sizeof(struct coretemp_softc),
161 static devclass_t coretemp_devclass;
162 DRIVER_MODULE(coretemp, cpu, coretemp_driver, coretemp_devclass, NULL, NULL);
163 MODULE_VERSION(coretemp, 1);
166 coretemp_sensor_set(struct ksensor *sens, const struct coretemp_softc *sc,
167 uint32_t crit_flag, int temp)
169 enum sensor_status status;
171 if (sc->sc_flags & crit_flag)
172 status = SENSOR_S_CRIT;
174 status = SENSOR_S_OK;
175 sensor_set_temp_degc(sens, temp, status);
179 coretemp_identify(driver_t *driver, device_t parent)
183 /* Make sure we're not being doubly invoked. */
184 if (device_find_child(parent, "coretemp", -1) != NULL)
187 /* Check that the vendor is Intel. */
188 if (cpu_vendor_id != CPU_VENDOR_INTEL)
192 * Some Intel CPUs, namely the PIII, don't have thermal sensors,
193 * but report them in cpu_thermal_feature. This leads to a later
194 * GPF when the sensor is queried via a MSR, so we stop here.
196 if (CPUID_TO_MODEL(cpu_id) < 0xe)
199 if ((cpu_thermal_feature & CPUID_THERMAL_SENSOR) == 0)
203 * We add a child for each CPU since settings must be performed
204 * on each CPU in the SMP case.
206 child = device_add_child(parent, "coretemp", -1);
208 device_printf(parent, "add coretemp child failed\n");
212 coretemp_probe(device_t dev)
214 if (resource_disabled("coretemp", 0))
217 device_set_desc(dev, "CPU On-Die Thermal Sensors");
219 return (BUS_PROBE_GENERIC);
223 coretemp_attach(device_t dev)
225 struct coretemp_softc *sc = device_get_softc(dev);
226 const struct cpu_node *node, *start_node;
230 int cpu_model, cpu_stepping;
231 int ret, tjtarget, cpu, sens_idx;
233 struct coretemp_sensor *csens;
234 boolean_t sens_task = FALSE;
237 pdev = device_get_parent(dev);
238 cpu_model = CPUID_TO_MODEL(cpu_id);
239 cpu_stepping = cpu_id & CPUID_STEPPING;
243 * XXXrpaulo: I have this CPU model and when it returns from C3
244 * coretemp continues to function properly.
248 * Check for errata AE18.
249 * "Processor Digital Thermal Sensor (DTS) Readout stops
250 * updating upon returning from C3/C4 state."
252 * Adapted from the Linux coretemp driver.
254 if (cpu_model == 0xe && cpu_stepping < 0xc) {
255 msr = rdmsr(MSR_BIOS_SIGN);
258 device_printf(dev, "not supported (Intel errata "
259 "AE18), try updating your BIOS\n");
266 * Use 100C as the initial value.
270 if ((cpu_model == 0xf && cpu_stepping >= 2) || cpu_model == 0xe) {
272 * On some Core 2 CPUs, there's an undocumented MSR that
273 * can tell us if Tj(max) is 100 or 85.
275 * The if-clause for CPUs having the MSR_IA32_EXT_CONFIG
276 * was adapted from the Linux coretemp driver.
278 msr = rdmsr(MSR_IA32_EXT_CONFIG);
281 } else if (cpu_model == 0x17) {
282 switch (cpu_stepping) {
283 case 0x6: /* Mobile Core 2 Duo */
286 default: /* Unknown stepping */
289 } else if (cpu_model == 0x1c) {
290 switch (cpu_stepping) {
291 case 0xa: /* 45nm Atom D400, N400 and D500 series */
300 * Attempt to get Tj(max) from MSR IA32_TEMPERATURE_TARGET.
302 * This method is described in Intel white paper "CPU
303 * Monitoring With DTS/PECI". (#322683)
305 ret = rdmsr_safe(MSR_IA32_TEMPERATURE_TARGET, &msr);
307 tjtarget = (msr >> 16) & 0xff;
310 * On earlier generation of processors, the value
311 * obtained from IA32_TEMPERATURE_TARGET register is
312 * an offset that needs to be summed with a model
313 * specific base. It is however not clear what
314 * these numbers are, with the publicly available
315 * documents from Intel.
317 * For now, we consider [70, 110]C range, as
318 * described in #322683, as "reasonable" and accept
319 * these values whenever the MSR is available for
320 * read, regardless the CPU model.
322 if (tjtarget >= 70 && tjtarget <= 110)
323 sc->sc_tjmax = tjtarget;
325 device_printf(dev, "Tj(target) value %d "
326 "does not seem right.\n", tjtarget);
328 device_printf(dev, "Can not get Tj(target) "
329 "from your CPU, using 100C.\n");
333 device_printf(dev, "Setting TjMax=%d\n", sc->sc_tjmax);
335 sc->sc_cpu = device_get_unit(device_get_parent(dev));
337 start_node = get_cpu_node_by_cpuid(sc->sc_cpu);
340 while (node != NULL) {
341 if (node->type == CORE_LEVEL) {
342 if (node->child_no == 0)
346 node = node->parent_node;
349 master_cpu = BSRCPUMASK(node->members);
351 device_printf(dev, "master cpu%d, count %u\n",
352 master_cpu, node->child_no);
354 if (sc->sc_cpu != master_cpu)
357 KKASSERT(node->child_no > 0);
358 sc->sc_nsens = node->child_no;
359 cpu_mask = node->members;
362 CPUMASK_ASSBIT(cpu_mask, sc->sc_cpu);
364 sc->sc_sens = kmalloc(sizeof(struct coretemp_sensor) * sc->sc_nsens,
365 M_DEVBUF, M_WAITOK | M_ZERO);
368 CPUSET_FOREACH(cpu, cpu_mask) {
371 cpu_dev = devclass_find_unit("cpu", cpu);
375 KKASSERT(sens_idx < sc->sc_nsens);
376 csens = &sc->sc_sens[sens_idx];
378 csens->c_sensdev = CPU_GET_SENSDEV(cpu_dev);
379 if (csens->c_sensdev == NULL)
383 * Add hw.sensors.cpuN.temp0 MIB.
385 ksnprintf(csens->c_sens.desc, sizeof(csens->c_sens.desc),
386 "node%d core%d temp", get_chip_ID(cpu),
387 get_core_number_within_chip(cpu));
388 csens->c_sens.type = SENSOR_TEMP;
389 sensor_set_unknown(&csens->c_sens);
390 sensor_attach(csens->c_sensdev, &csens->c_sens);
396 kfree(sc->sc_sens, M_DEVBUF);
403 if (cpu_thermal_feature & CPUID_THERMAL_PTM) {
404 boolean_t pkg_sens = TRUE;
407 * Package thermal sensor
411 while (node != NULL) {
412 if (node->type == CHIP_LEVEL) {
413 if (node->child_no == 0)
417 node = node->parent_node;
420 master_cpu = BSRCPUMASK(node->members);
422 device_printf(dev, "pkg master cpu%d\n",
425 if (sc->sc_cpu != master_cpu)
430 csens = sc->sc_pkg_sens =
431 kmalloc(sizeof(struct coretemp_sensor), M_DEVBUF,
433 csens->c_sensdev = kmalloc(sizeof(struct ksensordev),
434 M_DEVBUF, M_WAITOK | M_ZERO);
437 * Add hw.sensors.cpu_nodeN.temp0 MIB.
439 ksnprintf(csens->c_sensdev->xname,
440 sizeof(csens->c_sensdev->xname), "cpu_node%d",
441 get_chip_ID(sc->sc_cpu));
442 ksnprintf(csens->c_sens.desc,
443 sizeof(csens->c_sens.desc), "node%d temp",
444 get_chip_ID(sc->sc_cpu));
445 csens->c_sens.type = SENSOR_TEMP;
446 sensor_set_unknown(&csens->c_sens);
447 sensor_attach(csens->c_sensdev, &csens->c_sens);
448 sensordev_install(csens->c_sensdev);
455 if (CORETEMP_HAS_PKGSENSOR(sc)) {
456 sc->sc_senstask = sensor_task_register2(sc,
457 coretemp_pkg_sensor_task, 2, sc->sc_cpu);
459 KASSERT(sc->sc_sens != NULL, ("no sensors"));
460 sc->sc_senstask = sensor_task_register2(sc,
461 coretemp_sensor_task, 2, sc->sc_cpu);
469 coretemp_detach(device_t dev)
471 struct coretemp_softc *sc = device_get_softc(dev);
472 struct coretemp_sensor *csens;
474 if (sc->sc_senstask != NULL)
475 sensor_task_unregister2(sc->sc_senstask);
477 if (sc->sc_nsens > 0) {
480 for (i = 0; i < sc->sc_nsens; ++i) {
481 csens = &sc->sc_sens[i];
482 if (csens->c_sensdev == NULL)
484 sensor_detach(csens->c_sensdev, &csens->c_sens);
486 kfree(sc->sc_sens, M_DEVBUF);
489 if (sc->sc_pkg_sens != NULL) {
490 csens = sc->sc_pkg_sens;
491 sensordev_deinstall(csens->c_sensdev);
492 kfree(csens->c_sensdev, M_DEVBUF);
493 kfree(csens, M_DEVBUF);
499 coretemp_msr_temp(struct coretemp_softc *sc, uint64_t msr)
504 * Check for Thermal Status and Thermal Status Log.
506 if (MSR_THERM_STATUS_HAS_STATUS(msr))
507 device_printf(sc->sc_dev, "PROCHOT asserted\n");
509 if (msr & MSR_THERM_STATUS_READ_VALID)
510 temp = sc->sc_tjmax - __SHIFTOUT(msr, MSR_THERM_STATUS_READ);
512 temp = CORETEMP_TEMP_INVALID;
515 * Check for Critical Temperature Status and Critical
517 * It doesn't really matter if the current temperature is
518 * invalid because the "Critical Temperature Log" bit will
519 * tell us if the Critical Temperature has been reached in
520 * past. It's not directly related to the current temperature.
522 * If we reach a critical level, allow devctl(4) to catch this
523 * and shutdown the system.
525 if (MSR_THERM_STATUS_IS_CRITICAL(msr)) {
526 if ((sc->sc_flags & CORETEMP_FLAG_CRIT) == 0) {
527 char stemp[16], data[64];
529 device_printf(sc->sc_dev,
530 "critical temperature detected, "
531 "suggest system shutdown\n");
532 ksnprintf(stemp, sizeof(stemp), "%d", temp);
533 ksnprintf(data, sizeof(data),
534 "notify=0xcc node=%d core=%d",
535 get_chip_ID(sc->sc_cpu),
536 get_core_number_within_chip(sc->sc_cpu));
537 devctl_notify("coretemp", "Thermal", stemp, data);
538 sc->sc_flags |= CORETEMP_FLAG_CRIT;
540 } else if (sc->sc_flags & CORETEMP_FLAG_CRIT) {
541 sc->sc_flags &= ~CORETEMP_FLAG_CRIT;
548 coretemp_pkg_msr_temp(struct coretemp_softc *sc, uint64_t msr)
553 * Check for Thermal Status and Thermal Status Log.
555 if (MSR_PKGTM_STATUS_HAS_STATUS(msr))
556 device_printf(sc->sc_dev, "package PROCHOT asserted\n");
558 temp = sc->sc_tjmax - __SHIFTOUT(msr, MSR_PKGTM_STATUS_READ);
561 * Check for Critical Temperature Status and Critical
563 * It doesn't really matter if the current temperature is
564 * invalid because the "Critical Temperature Log" bit will
565 * tell us if the Critical Temperature has been reached in
566 * past. It's not directly related to the current temperature.
568 * If we reach a critical level, allow devctl(4) to catch this
569 * and shutdown the system.
571 if (MSR_PKGTM_STATUS_IS_CRITICAL(msr)) {
572 if ((sc->sc_flags & CORETEMP_FLAG_PKGCRIT) == 0) {
573 char stemp[16], data[64];
575 device_printf(sc->sc_dev,
576 "critical temperature detected, "
577 "suggest system shutdown\n");
578 ksnprintf(stemp, sizeof(stemp), "%d", temp);
579 ksnprintf(data, sizeof(data), "notify=0xcc node=%d",
580 get_chip_ID(sc->sc_cpu));
581 devctl_notify("coretemp", "Thermal", stemp, data);
582 sc->sc_flags |= CORETEMP_FLAG_PKGCRIT;
584 } else if (sc->sc_flags & CORETEMP_FLAG_PKGCRIT) {
585 sc->sc_flags &= ~CORETEMP_FLAG_PKGCRIT;
592 coretemp_msr_fetch(struct coretemp_softc *sc, uint64_t *msr, uint64_t *pkg_msr)
594 KASSERT(sc->sc_cpu == mycpuid,
595 ("%s not on the target cpu%d, but on %d",
596 device_get_name(sc->sc_dev), sc->sc_cpu, mycpuid));
598 *msr = rdmsr(MSR_THERM_STATUS);
600 *pkg_msr = rdmsr(MSR_PKG_THERM_STATUS);
604 coretemp_sensor_update(struct coretemp_softc *sc, int temp)
606 struct coretemp_sensor *csens;
609 if (sc->sc_sens == NULL)
612 if (temp == CORETEMP_TEMP_INVALID) {
613 for (i = 0; i < sc->sc_nsens; ++i) {
614 csens = &sc->sc_sens[i];
615 if (csens->c_sensdev == NULL)
617 sensor_set_invalid(&csens->c_sens);
620 for (i = 0; i < sc->sc_nsens; ++i) {
621 csens = &sc->sc_sens[i];
622 if (csens->c_sensdev == NULL)
624 coretemp_sensor_set(&csens->c_sens, sc,
625 CORETEMP_FLAG_CRIT, temp);
631 coretemp_pkg_sensor_update(struct coretemp_softc *sc, int temp)
633 KKASSERT(sc->sc_pkg_sens != NULL);
634 if (temp == CORETEMP_TEMP_INVALID) {
635 sensor_set_invalid(&sc->sc_pkg_sens->c_sens);
637 coretemp_sensor_set(&sc->sc_pkg_sens->c_sens, sc,
638 CORETEMP_FLAG_PKGCRIT, temp);
643 coretemp_sensor_task(void *arg)
645 struct coretemp_softc *sc = arg;
649 coretemp_msr_fetch(sc, &msr, NULL);
650 temp = coretemp_msr_temp(sc, msr);
652 coretemp_sensor_update(sc, temp);
656 coretemp_pkg_sensor_task(void *arg)
658 struct coretemp_softc *sc = arg;
659 uint64_t msr, pkg_msr;
662 coretemp_msr_fetch(sc, &msr, &pkg_msr);
663 temp = coretemp_msr_temp(sc, msr);
664 pkg_temp = coretemp_pkg_msr_temp(sc, pkg_msr);
666 coretemp_sensor_update(sc, temp);
667 coretemp_pkg_sensor_update(sc, pkg_temp);