kernel: Make SMP support default (and non-optional).
[dragonfly.git] / sys / kern / subr_cpu_topology.c
1 /*
2  * Copyright (c) 2012 The DragonFly Project.  All rights reserved.
3  * 
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in
12  *    the documentation and/or other materials provided with the
13  *    distribution.
14  * 3. Neither the name of The DragonFly Project nor the names of its
15  *    contributors may be used to endorse or promote products derived
16  *    from this software without specific, prior written permission.
17  * 
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
22  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  * 
31  */
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/sysctl.h>
37 #include <sys/sbuf.h>
38 #include <sys/cpu_topology.h>
39
40 #include <machine/smp.h>
41
42 #ifndef NAPICID
43 #define NAPICID 256
44 #endif
45
46 #define INDENT_BUF_SIZE LEVEL_NO*3
47 #define INVALID_ID -1
48
49 /* Per-cpu sysctl nodes and info */
50 struct per_cpu_sysctl_info {
51         struct sysctl_ctx_list sysctl_ctx;
52         struct sysctl_oid *sysctl_tree;
53         char cpu_name[32];
54         int physical_id;
55         int core_id;
56         char physical_siblings[8*MAXCPU];
57         char core_siblings[8*MAXCPU];
58 };
59 typedef struct per_cpu_sysctl_info per_cpu_sysctl_info_t;
60
61 static cpu_node_t cpu_topology_nodes[MAXCPU];   /* Memory for topology */
62 static cpu_node_t *cpu_root_node;               /* Root node pointer */
63
64 static struct sysctl_ctx_list cpu_topology_sysctl_ctx;
65 static struct sysctl_oid *cpu_topology_sysctl_tree;
66 static char cpu_topology_members[8*MAXCPU];
67 static per_cpu_sysctl_info_t pcpu_sysctl[MAXCPU];
68
69 int cpu_topology_levels_number = 1;
70 cpu_node_t *root_cpu_node;
71
72 /* Get the next valid apicid starting
73  * from current apicid (curr_apicid
74  */
75 static int
76 get_next_valid_apicid(int curr_apicid)
77 {
78         int next_apicid = curr_apicid;
79         do {
80                 next_apicid++;
81         }
82         while(get_cpuid_from_apicid(next_apicid) == -1 &&
83            next_apicid < NAPICID);
84         if (next_apicid == NAPICID) {
85                 kprintf("Warning: No next valid APICID found. Returning -1\n");
86                 return -1;
87         }
88         return next_apicid;
89 }
90
91 /* Generic topology tree. The parameters have the following meaning:
92  * - children_no_per_level : the number of children on each level
93  * - level_types : the type of the level (THREAD, CORE, CHIP, etc)
94  * - cur_level : the current level of the tree
95  * - node : the current node
96  * - last_free_node : the last free node in the global array.
97  * - cpuid : basicly this are the ids of the leafs
98  */ 
99 static void
100 build_topology_tree(int *children_no_per_level,
101    uint8_t *level_types,
102    int cur_level, 
103    cpu_node_t *node,
104    cpu_node_t **last_free_node,
105    int *apicid)
106 {
107         int i;
108
109         node->child_no = children_no_per_level[cur_level];
110         node->type = level_types[cur_level];
111         node->members = 0;
112
113         if (node->child_no == 0) {
114                 node->child_node = NULL;
115                 *apicid = get_next_valid_apicid(*apicid);
116                 node->members = CPUMASK(get_cpuid_from_apicid(*apicid));
117                 return;
118         }
119
120         node->child_node = *last_free_node;
121         (*last_free_node) += node->child_no;
122         if (node->parent_node == NULL)
123                 root_cpu_node = node;
124         
125         for (i = 0; i < node->child_no; i++) {
126                 node->child_node[i].parent_node = node;
127
128                 build_topology_tree(children_no_per_level,
129                     level_types,
130                     cur_level + 1,
131                     &(node->child_node[i]),
132                     last_free_node,
133                     apicid);
134
135                 node->members |= node->child_node[i].members;
136         }
137 }
138
139 /* Build CPU topology. The detection is made by comparing the
140  * chip, core and logical IDs of each CPU with the IDs of the 
141  * BSP. When we found a match, at that level the CPUs are siblings.
142  */
143 static cpu_node_t *
144 build_cpu_topology(void)
145 {
146         detect_cpu_topology();
147         int i;
148         int BSPID = 0;
149         int threads_per_core = 0;
150         int cores_per_chip = 0;
151         int chips_per_package = 0;
152         int children_no_per_level[LEVEL_NO];
153         uint8_t level_types[LEVEL_NO];
154         int apicid = -1;
155
156         cpu_node_t *root = &cpu_topology_nodes[0];
157         cpu_node_t *last_free_node = root + 1;
158
159         /* Assume that the topology is uniform.
160          * Find the number of siblings within chip
161          * and witin core to build up the topology
162          */
163         for (i = 0; i < ncpus; i++) {
164
165                 cpumask_t mask = CPUMASK(i);
166
167                 if ((mask & smp_active_mask) == 0)
168                         continue;
169
170                 if (get_chip_ID(BSPID) == get_chip_ID(i))
171                         cores_per_chip++;
172                 else
173                         continue;
174
175                 if (get_core_number_within_chip(BSPID) ==
176                     get_core_number_within_chip(i))
177                         threads_per_core++;
178         }
179
180         cores_per_chip /= threads_per_core;
181         chips_per_package = ncpus / (cores_per_chip * threads_per_core);
182         
183         if (bootverbose)
184                 kprintf("CPU Topology: cores_per_chip: %d; threads_per_core: %d; chips_per_package: %d;\n",
185                     cores_per_chip, threads_per_core, chips_per_package);
186
187         if (threads_per_core > 1) { /* HT available - 4 levels */
188
189                 children_no_per_level[0] = chips_per_package;
190                 children_no_per_level[1] = cores_per_chip;
191                 children_no_per_level[2] = threads_per_core;
192                 children_no_per_level[3] = 0;
193
194                 level_types[0] = PACKAGE_LEVEL;
195                 level_types[1] = CHIP_LEVEL;
196                 level_types[2] = CORE_LEVEL;
197                 level_types[3] = THREAD_LEVEL;
198         
199                 build_topology_tree(children_no_per_level,
200                     level_types,
201                     0,
202                     root,
203                     &last_free_node,
204                     &apicid);
205
206                 cpu_topology_levels_number = 4;
207
208         } else if (cores_per_chip > 1) { /* No HT available - 3 levels */
209
210                 children_no_per_level[0] = chips_per_package;
211                 children_no_per_level[1] = cores_per_chip;
212                 children_no_per_level[2] = 0;
213
214                 level_types[0] = PACKAGE_LEVEL;
215                 level_types[1] = CHIP_LEVEL;
216                 level_types[2] = CORE_LEVEL;
217         
218                 build_topology_tree(children_no_per_level,
219                     level_types,
220                     0,
221                     root,
222                     &last_free_node,
223                     &apicid);
224
225                 cpu_topology_levels_number = 3;
226
227         } else { /* No HT and no Multi-Core - 2 levels */
228
229                 children_no_per_level[0] = chips_per_package;
230                 children_no_per_level[1] = 0;
231
232                 level_types[0] = PACKAGE_LEVEL;
233                 level_types[1] = CHIP_LEVEL;
234         
235                 build_topology_tree(children_no_per_level,
236                     level_types,
237                     0,
238                     root,
239                     &last_free_node,
240                     &apicid);
241
242                 cpu_topology_levels_number = 2;
243
244         }
245
246         return root;
247 }
248
249 /* Recursive function helper to print the CPU topology tree */
250 static void
251 print_cpu_topology_tree_sysctl_helper(cpu_node_t *node,
252     struct sbuf *sb,
253     char * buf,
254     int buf_len,
255     int last)
256 {
257         int i;
258         int bsr_member;
259
260         sbuf_bcat(sb, buf, buf_len);
261         if (last) {
262                 sbuf_printf(sb, "\\-");
263                 buf[buf_len] = ' ';buf_len++;
264                 buf[buf_len] = ' ';buf_len++;
265         } else {
266                 sbuf_printf(sb, "|-");
267                 buf[buf_len] = '|';buf_len++;
268                 buf[buf_len] = ' ';buf_len++;
269         }
270         
271         bsr_member = BSRCPUMASK(node->members);
272
273         if (node->type == PACKAGE_LEVEL) {
274                 sbuf_printf(sb,"PACKAGE MEMBERS: ");
275         } else if (node->type == CHIP_LEVEL) {
276                 sbuf_printf(sb,"CHIP ID %d: ",
277                         get_chip_ID(bsr_member));
278         } else if (node->type == CORE_LEVEL) {
279                 sbuf_printf(sb,"CORE ID %d: ",
280                         get_core_number_within_chip(bsr_member));
281         } else if (node->type == THREAD_LEVEL) {
282                 sbuf_printf(sb,"THREAD ID %d: ",
283                         get_logical_CPU_number_within_core(bsr_member));
284         } else {
285                 sbuf_printf(sb,"UNKNOWN: ");
286         }
287         CPUSET_FOREACH(i, node->members) {
288                 sbuf_printf(sb,"cpu%d ", i);
289         }       
290         
291         sbuf_printf(sb,"\n");
292
293         for (i = 0; i < node->child_no; i++) {
294                 print_cpu_topology_tree_sysctl_helper(&(node->child_node[i]),
295                     sb, buf, buf_len, i == (node->child_no -1));
296         }
297 }
298
299 /* SYSCTL PROCEDURE for printing the CPU Topology tree */
300 static int
301 print_cpu_topology_tree_sysctl(SYSCTL_HANDLER_ARGS)
302 {
303         struct sbuf *sb;
304         int ret;
305         char buf[INDENT_BUF_SIZE];
306
307         KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
308
309         sb = sbuf_new(NULL, NULL, 500, SBUF_AUTOEXTEND);
310         if (sb == NULL) {
311                 return (ENOMEM);
312         }
313         sbuf_printf(sb,"\n");
314         print_cpu_topology_tree_sysctl_helper(cpu_root_node, sb, buf, 0, 1);
315
316         sbuf_finish(sb);
317
318         ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
319
320         sbuf_delete(sb);
321
322         return ret;
323 }
324
325 /* SYSCTL PROCEDURE for printing the CPU Topology level description */
326 static int
327 print_cpu_topology_level_description_sysctl(SYSCTL_HANDLER_ARGS)
328 {
329         struct sbuf *sb;
330         int ret;
331
332         sb = sbuf_new(NULL, NULL, 500, SBUF_AUTOEXTEND);
333         if (sb == NULL)
334                 return (ENOMEM);
335
336         if (cpu_topology_levels_number == 4) /* HT available */
337                 sbuf_printf(sb, "0 - thread; 1 - core; 2 - socket; 3 - anything");
338         else if (cpu_topology_levels_number == 3) /* No HT available */
339                 sbuf_printf(sb, "0 - core; 1 - socket; 2 - anything");
340         else if (cpu_topology_levels_number == 2) /* No HT and no Multi-Core */
341                 sbuf_printf(sb, "0 - socket; 1 - anything");
342         else
343                 sbuf_printf(sb, "Unknown");
344
345         sbuf_finish(sb);
346
347         ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
348
349         sbuf_delete(sb);
350
351         return ret;     
352 }
353
354 /* Find a cpu_node_t by a mask */
355 static cpu_node_t *
356 get_cpu_node_by_cpumask(cpu_node_t * node,
357                         cpumask_t mask) {
358
359         cpu_node_t * found = NULL;
360         int i;
361
362         if (node->members == mask) {
363                 return node;
364         }
365
366         for (i = 0; i < node->child_no; i++) {
367                 found = get_cpu_node_by_cpumask(&(node->child_node[i]), mask);
368                 if (found != NULL) {
369                         return found;
370                 }
371         }
372         return NULL;
373 }
374
375 cpu_node_t *
376 get_cpu_node_by_cpuid(int cpuid) {
377         cpumask_t mask = CPUMASK(cpuid);
378
379         KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
380
381         return get_cpu_node_by_cpumask(cpu_root_node, mask);
382 }
383
384 /* Get the mask of siblings for level_type of a cpuid */
385 cpumask_t
386 get_cpumask_from_level(int cpuid,
387                         uint8_t level_type)
388 {
389         cpu_node_t * node;
390         cpumask_t mask = CPUMASK(cpuid);
391
392         KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
393
394         node = get_cpu_node_by_cpumask(cpu_root_node, mask);
395
396         if (node == NULL) {
397                 return 0;
398         }
399
400         while (node != NULL) {
401                 if (node->type == level_type) {
402                         return node->members;
403                 }
404                 node = node->parent_node;
405         }
406
407         return 0;
408 }
409
410 /* init pcpu_sysctl structure info */
411 static void
412 init_pcpu_topology_sysctl(void)
413 {
414         int cpu;
415         int i;
416         cpumask_t mask;
417         struct sbuf sb;
418
419         for (i = 0; i < ncpus; i++) {
420
421                 sbuf_new(&sb, pcpu_sysctl[i].cpu_name,
422                     sizeof(pcpu_sysctl[i].cpu_name), SBUF_FIXEDLEN);
423                 sbuf_printf(&sb,"cpu%d", i);
424                 sbuf_finish(&sb);
425
426
427                 /* Get physical siblings */
428                 mask = get_cpumask_from_level(i, CHIP_LEVEL);
429                 if (mask == 0) {
430                         pcpu_sysctl[i].physical_id = INVALID_ID;
431                         continue;
432                 }
433
434                 sbuf_new(&sb, pcpu_sysctl[i].physical_siblings,
435                     sizeof(pcpu_sysctl[i].physical_siblings), SBUF_FIXEDLEN);
436                 CPUSET_FOREACH(cpu, mask) {
437                         sbuf_printf(&sb,"cpu%d ", cpu);
438                 }
439                 sbuf_trim(&sb);
440                 sbuf_finish(&sb);
441
442                 pcpu_sysctl[i].physical_id = get_chip_ID(i); 
443
444                 /* Get core siblings */
445                 mask = get_cpumask_from_level(i, CORE_LEVEL);
446                 if (mask == 0) {
447                         pcpu_sysctl[i].core_id = INVALID_ID;
448                         continue;
449                 }
450
451                 sbuf_new(&sb, pcpu_sysctl[i].core_siblings,
452                     sizeof(pcpu_sysctl[i].core_siblings), SBUF_FIXEDLEN);
453                 CPUSET_FOREACH(cpu, mask) {
454                         sbuf_printf(&sb,"cpu%d ", cpu);
455                 }
456                 sbuf_trim(&sb);
457                 sbuf_finish(&sb);
458
459                 pcpu_sysctl[i].core_id = get_core_number_within_chip(i);
460
461         }
462 }
463
464 /* Build SYSCTL structure for revealing
465  * the CPU Topology to user-space.
466  */
467 static void
468 build_sysctl_cpu_topology(void)
469 {
470         int i;
471         struct sbuf sb;
472         
473         /* SYSCTL new leaf for "cpu_topology" */
474         sysctl_ctx_init(&cpu_topology_sysctl_ctx);
475         cpu_topology_sysctl_tree = SYSCTL_ADD_NODE(&cpu_topology_sysctl_ctx,
476             SYSCTL_STATIC_CHILDREN(_hw),
477             OID_AUTO,
478             "cpu_topology",
479             CTLFLAG_RD, 0, "");
480
481         /* SYSCTL cpu_topology "tree" entry */
482         SYSCTL_ADD_PROC(&cpu_topology_sysctl_ctx,
483             SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
484             OID_AUTO, "tree", CTLTYPE_STRING | CTLFLAG_RD,
485             NULL, 0, print_cpu_topology_tree_sysctl, "A",
486             "Tree print of CPU topology");
487
488         /* SYSCTL cpu_topology "level_description" entry */
489         SYSCTL_ADD_PROC(&cpu_topology_sysctl_ctx,
490             SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
491             OID_AUTO, "level_description", CTLTYPE_STRING | CTLFLAG_RD,
492             NULL, 0, print_cpu_topology_level_description_sysctl, "A",
493             "Level description of CPU topology");
494
495         /* SYSCTL cpu_topology "members" entry */
496         sbuf_new(&sb, cpu_topology_members,
497             sizeof(cpu_topology_members), SBUF_FIXEDLEN);
498         CPUSET_FOREACH(i, cpu_root_node->members) {
499                 sbuf_printf(&sb,"cpu%d ", i);
500         }
501         sbuf_trim(&sb);
502         sbuf_finish(&sb);
503         SYSCTL_ADD_STRING(&cpu_topology_sysctl_ctx,
504             SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
505             OID_AUTO, "members", CTLFLAG_RD,
506             cpu_topology_members, 0,
507             "Members of the CPU Topology");
508
509         /* SYSCTL per_cpu info */
510         for (i = 0; i < ncpus; i++) {
511                 /* New leaf : hw.cpu_topology.cpux */
512                 sysctl_ctx_init(&pcpu_sysctl[i].sysctl_ctx); 
513                 pcpu_sysctl[i].sysctl_tree = SYSCTL_ADD_NODE(&pcpu_sysctl[i].sysctl_ctx,
514                     SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
515                     OID_AUTO,
516                     pcpu_sysctl[i].cpu_name,
517                     CTLFLAG_RD, 0, "");
518
519                 /* Check if the physical_id found is valid */
520                 if (pcpu_sysctl[i].physical_id == INVALID_ID) {
521                         continue;
522                 }
523
524                 /* Add physical id info */
525                 SYSCTL_ADD_INT(&pcpu_sysctl[i].sysctl_ctx,
526                     SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
527                     OID_AUTO, "physical_id", CTLFLAG_RD,
528                     &pcpu_sysctl[i].physical_id, 0,
529                     "Physical ID");
530
531                 /* Add physical siblings */
532                 SYSCTL_ADD_STRING(&pcpu_sysctl[i].sysctl_ctx,
533                     SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
534                     OID_AUTO, "physical_siblings", CTLFLAG_RD,
535                     pcpu_sysctl[i].physical_siblings, 0,
536                     "Physical siblings");
537
538                 /* Check if the core_id found is valid */
539                 if (pcpu_sysctl[i].core_id == INVALID_ID) {
540                         continue;
541                 }
542
543                 /* Add core id info */
544                 SYSCTL_ADD_INT(&pcpu_sysctl[i].sysctl_ctx,
545                     SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
546                     OID_AUTO, "core_id", CTLFLAG_RD,
547                     &pcpu_sysctl[i].core_id, 0,
548                     "Core ID");
549                 
550                 /*Add core siblings */
551                 SYSCTL_ADD_STRING(&pcpu_sysctl[i].sysctl_ctx,
552                     SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
553                     OID_AUTO, "core_siblings", CTLFLAG_RD,
554                     pcpu_sysctl[i].core_siblings, 0,
555                     "Core siblings");
556         }
557 }
558
559 /* Build the CPU Topology and SYSCTL Topology tree */
560 static void
561 init_cpu_topology(void)
562 {
563         cpu_root_node = build_cpu_topology();
564
565         init_pcpu_topology_sysctl();
566         build_sysctl_cpu_topology();
567 }
568 SYSINIT(cpu_topology, SI_BOOT2_CPU_TOPOLOGY, SI_ORDER_FIRST,
569     init_cpu_topology, NULL)