clockmod: Properly implement Intel software controlled clock modulation
[dragonfly.git] / sys / dev / powermng / clockmod / clockmod.c
1 /*
2  * Copyright (c) 2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Sepherosa Ziehau <sepherosa@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include <sys/param.h>
36 #include <sys/conf.h>
37 #include <sys/kernel.h>
38 #include <sys/bus.h>
39 #include <sys/cpu_topology.h>
40 #include <sys/module.h>
41 #include <sys/queue.h>
42 #include <sys/serialize.h>
43 #include <sys/sysctl.h>
44 #include <sys/systm.h>
45
46 #include <net/netmsg2.h>
47 #include <net/netisr2.h>
48
49 #include <machine/specialreg.h>
50 #include <machine/cpufunc.h>
51 #include <machine/cputypes.h>
52 #include <machine/md_var.h>
53
54 struct clockmod_dom;
55
56 struct netmsg_clockmod {
57         struct netmsg_base      base;
58         uint64_t                ctl_value;
59 };
60
61 struct clockmod_softc {
62         TAILQ_ENTRY(clockmod_softc) sc_link;
63         struct clockmod_dom     *sc_dom;
64         int                     sc_cpuid;
65 };
66
67 struct clockmod_dom {
68         TAILQ_ENTRY(clockmod_dom) dom_link;
69         TAILQ_HEAD(, clockmod_softc) dom_list;
70         struct sysctl_ctx_list  dom_sysctl_ctx;
71         struct sysctl_oid       *dom_sysctl_tree;
72         cpumask_t               dom_cpumask;
73         char                    dom_name[16];
74         int                     dom_select;
75         uint32_t                dom_flags;
76 };
77
78 #define CLOCKMOD_DOM_FLAG_ACTIVE        0x1
79
80 struct clockmod_dom_ctrl {
81         char                    ctl_name[8];
82         uint64_t                ctl_value;
83 };
84
85 static int      clockmod_dom_attach(struct clockmod_softc *);
86 static void     clockmod_dom_detach(struct clockmod_softc *);
87 static struct clockmod_dom *clockmod_dom_find(cpumask_t);
88 static struct clockmod_dom *clockmod_dom_create(cpumask_t);
89 static void     clockmod_dom_destroy(struct clockmod_dom *);
90
91 static int      clockmod_dom_sysctl_select(SYSCTL_HANDLER_ARGS);
92 static int      clockmod_dom_sysctl_members(SYSCTL_HANDLER_ARGS);
93 static int      clockmod_dom_sysctl_available(SYSCTL_HANDLER_ARGS);
94
95 static void     clockmod_identify(driver_t *, device_t);
96 static int      clockmod_probe(device_t);
97 static int      clockmod_attach(device_t);
98 static int      clockmod_detach(device_t);
99
100 static void     clockmod_select_handler(netmsg_t);
101 static int      clockmod_select(const struct clockmod_softc *,
102                     const struct clockmod_dom_ctrl *);
103
104 static boolean_t clockmod_errata_duty(int);
105
106 static struct lwkt_serialize clockmod_dom_slize = LWKT_SERIALIZE_INITIALIZER;
107 static int      clockmod_dom_id;
108 static TAILQ_HEAD(, clockmod_dom) clockmod_dom_list =
109     TAILQ_HEAD_INITIALIZER(clockmod_dom_list);
110 static int      clockmod_dom_nctrl;
111 static struct clockmod_dom_ctrl *clockmod_dom_controls;
112
113 static device_method_t clockmod_methods[] = {
114         /* Device interface */
115         DEVMETHOD(device_identify,      clockmod_identify),
116         DEVMETHOD(device_probe,         clockmod_probe),
117         DEVMETHOD(device_attach,        clockmod_attach),
118         DEVMETHOD(device_detach,        clockmod_detach),
119
120         DEVMETHOD_END
121 };
122
123 static driver_t clockmod_driver = {
124         "clockmod",
125         clockmod_methods,
126         sizeof(struct clockmod_softc),
127 };
128
129 static devclass_t clockmod_devclass;
130 DRIVER_MODULE(clockmod, cpu, clockmod_driver, clockmod_devclass, NULL, NULL);
131
132 static void
133 clockmod_identify(driver_t *driver, device_t parent)
134 {
135         device_t child;
136
137         if (device_find_child(parent, "clockmod", -1) != NULL)
138                 return;
139
140         if (cpu_vendor_id != CPU_VENDOR_INTEL)
141                 return;
142
143         if ((cpu_feature & (CPUID_ACPI | CPUID_TM)) != (CPUID_ACPI | CPUID_TM))
144                 return;
145
146         child = device_add_child(parent, "clockmod", device_get_unit(parent));
147         if (child == NULL)
148                 device_printf(parent, "add clockmod failed\n");
149 }
150
151 static int
152 clockmod_probe(device_t dev)
153 {
154         device_set_desc(dev, "CPU clock modulation");
155         return 0;
156 }
157
158 static int
159 clockmod_attach(device_t dev)
160 {
161         struct clockmod_softc *sc = device_get_softc(dev);
162         int error;
163
164         sc->sc_cpuid = device_get_unit(dev);
165
166         error = clockmod_dom_attach(sc);
167         if (error) {
168                 device_printf(dev, "domain attach failed\n");
169                 return error;
170         }
171
172         return 0;
173 }
174
175 static int
176 clockmod_detach(device_t dev)
177 {
178         clockmod_dom_detach(device_get_softc(dev));
179         return 0;
180 }
181
182 static int
183 clockmod_dom_attach(struct clockmod_softc *sc)
184 {
185         struct clockmod_softc *sc1;
186         struct clockmod_dom *dom;
187         cpumask_t mask, found_mask = 0;
188         int error = 0;
189
190         mask = get_cpumask_from_level(sc->sc_cpuid, CORE_LEVEL);
191         if (mask == 0)
192                 mask = CPUMASK(sc->sc_cpuid);
193
194         lwkt_serialize_enter(&clockmod_dom_slize);
195
196         dom = clockmod_dom_find(mask);
197         if (dom == NULL) {
198                 dom = clockmod_dom_create(mask);
199                 if (dom == NULL) {
200                         error = ENOMEM;
201                         goto back;
202                 }
203         }
204
205         sc->sc_dom = dom;
206         TAILQ_INSERT_TAIL(&dom->dom_list, sc, sc_link);
207
208         TAILQ_FOREACH(sc1, &dom->dom_list, sc_link)
209                 found_mask |= CPUMASK(sc1->sc_cpuid);
210
211         if (found_mask == dom->dom_cpumask) {
212                 /* All cpus in this domain is found */
213                 dom->dom_flags |= CLOCKMOD_DOM_FLAG_ACTIVE;
214         }
215 back:
216         lwkt_serialize_exit(&clockmod_dom_slize);
217         return error;
218 }
219
220 static void
221 clockmod_dom_detach(struct clockmod_softc *sc)
222 {
223         struct clockmod_dom *dom;
224
225         lwkt_serialize_enter(&clockmod_dom_slize);
226
227         dom = sc->sc_dom;
228         sc->sc_dom = NULL;
229
230         if (dom->dom_flags & CLOCKMOD_DOM_FLAG_ACTIVE) {
231                 struct clockmod_softc *sc1;
232
233                 /* Raise to 100% */
234                 TAILQ_FOREACH(sc1, &dom->dom_list, sc_link)
235                         clockmod_select(sc1, &clockmod_dom_controls[0]);
236         }
237
238         /* One cpu is leaving; domain is no longer active */
239         dom->dom_flags &= ~CLOCKMOD_DOM_FLAG_ACTIVE;
240
241         TAILQ_REMOVE(&dom->dom_list, sc, sc_link);
242         if (TAILQ_EMPTY(&dom->dom_list))
243                 clockmod_dom_destroy(dom);
244
245         lwkt_serialize_exit(&clockmod_dom_slize);
246 }
247
248 static struct clockmod_dom *
249 clockmod_dom_find(cpumask_t mask)
250 {
251         struct clockmod_dom *dom;
252
253         TAILQ_FOREACH(dom, &clockmod_dom_list, dom_link) {
254                 if (dom->dom_cpumask == mask)
255                         return dom;
256         }
257         return NULL;
258 }
259
260 static struct clockmod_dom *
261 clockmod_dom_create(cpumask_t mask)
262 {
263         struct clockmod_dom *dom;
264         int id;
265
266         id = clockmod_dom_id++;
267         dom = kmalloc(sizeof(*dom), M_DEVBUF, M_WAITOK | M_ZERO);
268
269         TAILQ_INIT(&dom->dom_list);
270         dom->dom_cpumask = mask;
271         ksnprintf(dom->dom_name, sizeof(dom->dom_name), "clockmod_dom%d", id);
272
273         sysctl_ctx_init(&dom->dom_sysctl_ctx);
274         dom->dom_sysctl_tree = SYSCTL_ADD_NODE(&dom->dom_sysctl_ctx,
275             SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, dom->dom_name,
276             CTLFLAG_RD, 0, "");
277         if (dom->dom_sysctl_tree == NULL) {
278                 kprintf("%s: can't add sysctl node\n", dom->dom_name);
279                 kfree(dom, M_DEVBUF);
280                 return NULL;
281         }
282
283         SYSCTL_ADD_PROC(&dom->dom_sysctl_ctx,
284             SYSCTL_CHILDREN(dom->dom_sysctl_tree),
285             OID_AUTO, "members", CTLTYPE_STRING | CTLFLAG_RD,
286             dom, 0, clockmod_dom_sysctl_members, "A", "member cpus");
287
288         SYSCTL_ADD_PROC(&dom->dom_sysctl_ctx,
289             SYSCTL_CHILDREN(dom->dom_sysctl_tree),
290             OID_AUTO, "available", CTLTYPE_STRING | CTLFLAG_RD,
291             dom, 0, clockmod_dom_sysctl_available, "A",
292             "available duty percent");
293
294         SYSCTL_ADD_PROC(&dom->dom_sysctl_ctx,
295             SYSCTL_CHILDREN(dom->dom_sysctl_tree),
296             OID_AUTO, "select", CTLTYPE_STRING | CTLFLAG_RW,
297             dom, 0, clockmod_dom_sysctl_select, "A", "select duty");
298
299         TAILQ_INSERT_TAIL(&clockmod_dom_list, dom, dom_link);
300
301         if (clockmod_dom_controls == NULL) {
302                 int nctrl, step, i, shift, cnt;
303
304 #ifdef __x86_64__
305                 if (cpu_thermal_feature & CPUID_THERMAL_ECMD)
306                         shift = 0;
307                 else
308 #endif
309                         shift = 1;
310
311                 nctrl = 8 << (1 - shift);
312                 step = 10000 / nctrl;
313
314                 clockmod_dom_controls =
315                     kmalloc(sizeof(struct clockmod_dom_ctrl) * nctrl, M_DEVBUF,
316                     M_WAITOK | M_ZERO);
317
318                 if (bootverbose)
319                         kprintf("clock modulation:\n");
320
321                 cnt = 0;
322                 for (i = 0; i < nctrl; ++i) {
323                         struct clockmod_dom_ctrl *ctrl =
324                             &clockmod_dom_controls[cnt];
325                         int duty;
326
327                         duty = 10000 - (i * step);
328                         if (clockmod_errata_duty(duty))
329                                 continue;
330                         ++cnt;
331
332                         ksnprintf(ctrl->ctl_name, sizeof(ctrl->ctl_name),
333                             "%d.%02d%%", duty / 100, duty % 100);
334                         ctrl->ctl_value = (((nctrl - i) << shift) & 0xf);
335                         if (i != 0)
336                                 ctrl->ctl_value |= 1 << 4;
337
338                         if (bootverbose) {
339                                 kprintf("  0x%04jx %s\n", 
340                                     (uintmax_t)ctrl->ctl_value,
341                                     ctrl->ctl_name);
342                         }
343                 }
344                 clockmod_dom_nctrl = cnt;
345         }
346         return dom;
347 }
348
349 static void
350 clockmod_dom_destroy(struct clockmod_dom *dom)
351 {
352         KASSERT(TAILQ_EMPTY(&dom->dom_list),
353             ("%s: still has member cpus", dom->dom_name));
354         TAILQ_REMOVE(&clockmod_dom_list, dom, dom_link);
355
356         sysctl_ctx_free(&dom->dom_sysctl_ctx);
357         kfree(dom, M_DEVBUF);
358
359         if (TAILQ_EMPTY(&clockmod_dom_list)) {
360                 clockmod_dom_nctrl = 0;
361                 kfree(clockmod_dom_controls, M_DEVBUF);
362                 clockmod_dom_controls = NULL;
363         }
364 }
365
366 static int
367 clockmod_dom_sysctl_members(SYSCTL_HANDLER_ARGS)
368 {
369         struct clockmod_dom *dom = arg1;
370         struct clockmod_softc *sc;
371         int loop, error;
372
373         lwkt_serialize_enter(&clockmod_dom_slize);
374
375         loop = error = 0;
376         TAILQ_FOREACH(sc, &dom->dom_list, sc_link) {
377                 char buf[16];
378
379                 if (error == 0 && loop)
380                         error = SYSCTL_OUT(req, " ", 1);
381                 if (error == 0) {
382                         ksnprintf(buf, sizeof(buf), "cpu%d", sc->sc_cpuid);
383                         error = SYSCTL_OUT(req, buf, strlen(buf));
384                 }
385                 ++loop;
386         }
387
388         lwkt_serialize_exit(&clockmod_dom_slize);
389         return error;
390 }
391
392 static int
393 clockmod_dom_sysctl_available(SYSCTL_HANDLER_ARGS)
394 {
395         struct clockmod_dom *dom = arg1;
396         int loop, error, i;
397
398         lwkt_serialize_enter(&clockmod_dom_slize);
399
400         if ((dom->dom_flags & CLOCKMOD_DOM_FLAG_ACTIVE) == 0) {
401                 error = SYSCTL_OUT(req, " ", 1);
402                 goto done;
403         }
404
405         loop = error = 0;
406         for (i = 0; i < clockmod_dom_nctrl; ++i) {
407                 if (error == 0 && loop)
408                         error = SYSCTL_OUT(req, " ", 1);
409                 if (error == 0) {
410                         error = SYSCTL_OUT(req,
411                             clockmod_dom_controls[i].ctl_name,
412                             strlen(clockmod_dom_controls[i].ctl_name));
413                 }
414                 ++loop;
415         }
416 done:
417         lwkt_serialize_exit(&clockmod_dom_slize);
418         return error;
419 }
420
421 static int
422 clockmod_dom_sysctl_select(SYSCTL_HANDLER_ARGS)
423 {
424         struct clockmod_dom *dom = arg1;
425         struct clockmod_softc *sc;
426         const struct clockmod_dom_ctrl *ctrl = NULL;
427         char duty[16];
428         int error, i;
429
430         lwkt_serialize_enter(&clockmod_dom_slize);
431         KKASSERT(dom->dom_select >= 0 && dom->dom_select < clockmod_dom_nctrl);
432         ksnprintf(duty, sizeof(duty), "%s",
433             clockmod_dom_controls[dom->dom_select].ctl_name);
434         lwkt_serialize_exit(&clockmod_dom_slize);
435
436         error = sysctl_handle_string(oidp, duty, sizeof(duty), req);
437         if (error != 0 || req->newptr == NULL)
438                 return error;
439
440         lwkt_serialize_enter(&clockmod_dom_slize);
441
442         if ((dom->dom_flags & CLOCKMOD_DOM_FLAG_ACTIVE) == 0) {
443                 error = EOPNOTSUPP;
444                 goto back;
445         }
446
447         for (i = 0; i < clockmod_dom_nctrl; ++i) {
448                 ctrl = &clockmod_dom_controls[i];
449                 if (strcmp(duty, ctrl->ctl_name) == 0)
450                         break;
451         }
452         if (i == clockmod_dom_nctrl) {
453                 error = EINVAL;
454                 goto back;
455         }
456         dom->dom_select = i;
457
458         TAILQ_FOREACH(sc, &dom->dom_list, sc_link)
459                 clockmod_select(sc, ctrl);
460 back:
461         lwkt_serialize_exit(&clockmod_dom_slize);
462         return error;
463 }
464
465 static void
466 clockmod_select_handler(netmsg_t msg)
467 {
468         struct netmsg_clockmod *cmsg = (struct netmsg_clockmod *)msg;
469
470 #if 0
471         if (bootverbose) {
472                 kprintf("cpu%d: clockmod 0x%04jx\n", mycpuid,
473                     (uintmax_t)cmsg->ctl_value);
474         }
475 #endif
476
477         wrmsr(MSR_THERM_CONTROL, cmsg->ctl_value);
478         lwkt_replymsg(&cmsg->base.lmsg, 0);
479 }
480
481 static int 
482 clockmod_select(const struct clockmod_softc *sc,
483     const struct clockmod_dom_ctrl *ctrl)
484 {
485         struct netmsg_clockmod msg;
486
487         netmsg_init(&msg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
488             clockmod_select_handler);
489         msg.ctl_value = ctrl->ctl_value;
490         return lwkt_domsg(netisr_cpuport(sc->sc_cpuid), &msg.base.lmsg, 0);
491 }
492
493 static boolean_t
494 clockmod_errata_duty(int duty)
495 {
496         uint32_t model, stepping;
497
498         /*
499          * This is obtained from the original p4tcc code.
500          *
501          * The original errata checking code in p4tcc is obviously wrong.
502          * However, I am no longer being able to find the errata mentioned
503          * in the code.  The guess is that the errata only affects family
504          * 0x0f CPUs, since:
505          * - The errata applies to only to model 0x00, 0x01 and 0x02 in
506          *   the original p4tcc code.
507          * - Software controlled clock modulation has been supported since
508          *   0f_00 and the model of the oldest family 0x06 CPUs supporting
509          *   this feature is 0x09.
510          */
511         if (CPUID_TO_FAMILY(cpu_id) != 0xf)
512                 return FALSE;
513
514         model = CPUID_TO_MODEL(cpu_id);
515         stepping = cpu_id & 0xf;
516
517         if (model == 0x2) {
518                 switch (stepping) {
519                 case 0x2:
520                 case 0x4:
521                 case 0x5:
522                 case 0x7:
523                 case 0x9:
524                         /* Hang w/ 12.50% */
525                         if (duty == 1250)
526                                 return TRUE;
527                         break;
528                 }
529         } else if (model == 0x1) {
530                 switch (stepping) {
531                 case 0x2:
532                 case 0x3:
533                         /* Hang w/ 12.50% and 25.00% */
534                         if (duty == 1250 || duty == 2500)
535                                 return TRUE;
536                         break;
537                 }
538         } else if (model == 0x0) {
539                 switch (stepping) {
540                 case 0x7:
541                 case 0xa:
542                         /* Hang w/ 12.50% and 25.00% */
543                         if (duty == 1250 || duty == 2500)
544                                 return TRUE;
545                         break;
546                 }
547         }
548         return FALSE;
549 }