kernel - add usched_dfly algorith, set as default for now
[dragonfly.git] / sys / kern / subr_cpu_topology.c
1 /*
2  * Copyright (c) 2012 The DragonFly Project.  All rights reserved.
3  * 
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in
12  *    the documentation and/or other materials provided with the
13  *    distribution.
14  * 3. Neither the name of The DragonFly Project nor the names of its
15  *    contributors may be used to endorse or promote products derived
16  *    from this software without specific, prior written permission.
17  * 
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
22  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  * 
31  */
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/sysctl.h>
37 #include <sys/sbuf.h>
38 #include <sys/cpu_topology.h>
39
40 #include <machine/smp.h>
41
42 #ifdef SMP
43
44 #ifndef NAPICID
45 #define NAPICID 256
46 #endif
47
48 #define INDENT_BUF_SIZE LEVEL_NO*3
49 #define INVALID_ID -1
50
51 /* Per-cpu sysctl nodes and info */
52 struct per_cpu_sysctl_info {
53         struct sysctl_ctx_list sysctl_ctx;
54         struct sysctl_oid *sysctl_tree;
55         char cpu_name[32];
56         int physical_id;
57         int core_id;
58         char physical_siblings[8*MAXCPU];
59         char core_siblings[8*MAXCPU];
60 };
61 typedef struct per_cpu_sysctl_info per_cpu_sysctl_info_t;
62
63 static cpu_node_t cpu_topology_nodes[MAXCPU];   /* Memory for topology */
64 static cpu_node_t *cpu_root_node;               /* Root node pointer */
65
66 static struct sysctl_ctx_list cpu_topology_sysctl_ctx;
67 static struct sysctl_oid *cpu_topology_sysctl_tree;
68 static char cpu_topology_members[8*MAXCPU];
69 static per_cpu_sysctl_info_t pcpu_sysctl[MAXCPU];
70
71 int cpu_topology_levels_number = 1;
72 cpu_node_t *root_cpu_node;
73
74 /* Get the next valid apicid starting
75  * from current apicid (curr_apicid
76  */
77 static int
78 get_next_valid_apicid(int curr_apicid)
79 {
80         int next_apicid = curr_apicid;
81         do {
82                 next_apicid++;
83         }
84         while(get_cpuid_from_apicid(next_apicid) == -1 &&
85            next_apicid < NAPICID);
86         if (next_apicid == NAPICID) {
87                 kprintf("Warning: No next valid APICID found. Returning -1\n");
88                 return -1;
89         }
90         return next_apicid;
91 }
92
93 /* Generic topology tree. The parameters have the following meaning:
94  * - children_no_per_level : the number of children on each level
95  * - level_types : the type of the level (THREAD, CORE, CHIP, etc)
96  * - cur_level : the current level of the tree
97  * - node : the current node
98  * - last_free_node : the last free node in the global array.
99  * - cpuid : basicly this are the ids of the leafs
100  */ 
101 static void
102 build_topology_tree(int *children_no_per_level,
103    uint8_t *level_types,
104    int cur_level, 
105    cpu_node_t *node,
106    cpu_node_t **last_free_node,
107    int *apicid)
108 {
109         int i;
110
111         node->child_no = children_no_per_level[cur_level];
112         node->type = level_types[cur_level];
113         node->members = 0;
114
115         if (node->child_no == 0) {
116                 node->child_node = NULL;
117                 *apicid = get_next_valid_apicid(*apicid);
118                 node->members = CPUMASK(get_cpuid_from_apicid(*apicid));
119                 return;
120         }
121
122         node->child_node = *last_free_node;
123         (*last_free_node) += node->child_no;
124         if (node->parent_node == NULL)
125                 root_cpu_node = node;
126         
127         for (i = 0; i < node->child_no; i++) {
128                 node->child_node[i].parent_node = node;
129
130                 build_topology_tree(children_no_per_level,
131                     level_types,
132                     cur_level + 1,
133                     &(node->child_node[i]),
134                     last_free_node,
135                     apicid);
136
137                 node->members |= node->child_node[i].members;
138         }
139 }
140
141 /* Build CPU topology. The detection is made by comparing the
142  * chip, core and logical IDs of each CPU with the IDs of the 
143  * BSP. When we found a match, at that level the CPUs are siblings.
144  */
145 static cpu_node_t *
146 build_cpu_topology(void)
147 {
148         detect_cpu_topology();
149         int i;
150         int BSPID = 0;
151         int threads_per_core = 0;
152         int cores_per_chip = 0;
153         int chips_per_package = 0;
154         int children_no_per_level[LEVEL_NO];
155         uint8_t level_types[LEVEL_NO];
156         int apicid = -1;
157
158         cpu_node_t *root = &cpu_topology_nodes[0];
159         cpu_node_t *last_free_node = root + 1;
160
161         /* Assume that the topology is uniform.
162          * Find the number of siblings within chip
163          * and witin core to build up the topology
164          */
165         for (i = 0; i < ncpus; i++) {
166
167                 cpumask_t mask = CPUMASK(i);
168
169                 if ((mask & smp_active_mask) == 0)
170                         continue;
171
172                 if (get_chip_ID(BSPID) == get_chip_ID(i))
173                         cores_per_chip++;
174                 else
175                         continue;
176
177                 if (get_core_number_within_chip(BSPID) ==
178                     get_core_number_within_chip(i))
179                         threads_per_core++;
180         }
181
182         cores_per_chip /= threads_per_core;
183         chips_per_package = ncpus / (cores_per_chip * threads_per_core);
184         
185         if (bootverbose)
186                 kprintf("CPU Topology: cores_per_chip: %d; threads_per_core: %d; chips_per_package: %d;\n",
187                     cores_per_chip, threads_per_core, chips_per_package);
188
189         if (threads_per_core > 1) { /* HT available - 4 levels */
190
191                 children_no_per_level[0] = chips_per_package;
192                 children_no_per_level[1] = cores_per_chip;
193                 children_no_per_level[2] = threads_per_core;
194                 children_no_per_level[3] = 0;
195
196                 level_types[0] = PACKAGE_LEVEL;
197                 level_types[1] = CHIP_LEVEL;
198                 level_types[2] = CORE_LEVEL;
199                 level_types[3] = THREAD_LEVEL;
200         
201                 build_topology_tree(children_no_per_level,
202                     level_types,
203                     0,
204                     root,
205                     &last_free_node,
206                     &apicid);
207
208                 cpu_topology_levels_number = 4;
209
210         } else if (cores_per_chip > 1) { /* No HT available - 3 levels */
211
212                 children_no_per_level[0] = chips_per_package;
213                 children_no_per_level[1] = cores_per_chip;
214                 children_no_per_level[2] = 0;
215
216                 level_types[0] = PACKAGE_LEVEL;
217                 level_types[1] = CHIP_LEVEL;
218                 level_types[2] = CORE_LEVEL;
219         
220                 build_topology_tree(children_no_per_level,
221                     level_types,
222                     0,
223                     root,
224                     &last_free_node,
225                     &apicid);
226
227                 cpu_topology_levels_number = 3;
228
229         } else { /* No HT and no Multi-Core - 2 levels */
230
231                 children_no_per_level[0] = chips_per_package;
232                 children_no_per_level[1] = 0;
233
234                 level_types[0] = PACKAGE_LEVEL;
235                 level_types[1] = CHIP_LEVEL;
236         
237                 build_topology_tree(children_no_per_level,
238                     level_types,
239                     0,
240                     root,
241                     &last_free_node,
242                     &apicid);
243
244                 cpu_topology_levels_number = 2;
245
246         }
247
248         return root;
249 }
250
251 /* Recursive function helper to print the CPU topology tree */
252 static void
253 print_cpu_topology_tree_sysctl_helper(cpu_node_t *node,
254     struct sbuf *sb,
255     char * buf,
256     int buf_len,
257     int last)
258 {
259         int i;
260         int bsr_member;
261
262         sbuf_bcat(sb, buf, buf_len);
263         if (last) {
264                 sbuf_printf(sb, "\\-");
265                 buf[buf_len] = ' ';buf_len++;
266                 buf[buf_len] = ' ';buf_len++;
267         } else {
268                 sbuf_printf(sb, "|-");
269                 buf[buf_len] = '|';buf_len++;
270                 buf[buf_len] = ' ';buf_len++;
271         }
272         
273         bsr_member = BSRCPUMASK(node->members);
274
275         if (node->type == PACKAGE_LEVEL) {
276                 sbuf_printf(sb,"PACKAGE MEMBERS: ");
277         } else if (node->type == CHIP_LEVEL) {
278                 sbuf_printf(sb,"CHIP ID %d: ",
279                         get_chip_ID(bsr_member));
280         } else if (node->type == CORE_LEVEL) {
281                 sbuf_printf(sb,"CORE ID %d: ",
282                         get_core_number_within_chip(bsr_member));
283         } else if (node->type == THREAD_LEVEL) {
284                 sbuf_printf(sb,"THREAD ID %d: ",
285                         get_logical_CPU_number_within_core(bsr_member));
286         } else {
287                 sbuf_printf(sb,"UNKNOWN: ");
288         }
289         CPUSET_FOREACH(i, node->members) {
290                 sbuf_printf(sb,"cpu%d ", i);
291         }       
292         
293         sbuf_printf(sb,"\n");
294
295         for (i = 0; i < node->child_no; i++) {
296                 print_cpu_topology_tree_sysctl_helper(&(node->child_node[i]),
297                     sb, buf, buf_len, i == (node->child_no -1));
298         }
299 }
300
301 /* SYSCTL PROCEDURE for printing the CPU Topology tree */
302 static int
303 print_cpu_topology_tree_sysctl(SYSCTL_HANDLER_ARGS)
304 {
305         struct sbuf *sb;
306         int ret;
307         char buf[INDENT_BUF_SIZE];
308
309         KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
310
311         sb = sbuf_new(NULL, NULL, 500, SBUF_AUTOEXTEND);
312         if (sb == NULL) {
313                 return (ENOMEM);
314         }
315         sbuf_printf(sb,"\n");
316         print_cpu_topology_tree_sysctl_helper(cpu_root_node, sb, buf, 0, 1);
317
318         sbuf_finish(sb);
319
320         ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
321
322         sbuf_delete(sb);
323
324         return ret;
325 }
326
327 /* SYSCTL PROCEDURE for printing the CPU Topology level description */
328 static int
329 print_cpu_topology_level_description_sysctl(SYSCTL_HANDLER_ARGS)
330 {
331         struct sbuf *sb;
332         int ret;
333
334         sb = sbuf_new(NULL, NULL, 500, SBUF_AUTOEXTEND);
335         if (sb == NULL)
336                 return (ENOMEM);
337
338         if (cpu_topology_levels_number == 4) /* HT available */
339                 sbuf_printf(sb, "0 - thread; 1 - core; 2 - socket; 3 - anything");
340         else if (cpu_topology_levels_number == 3) /* No HT available */
341                 sbuf_printf(sb, "0 - core; 1 - socket; 2 - anything");
342         else if (cpu_topology_levels_number == 2) /* No HT and no Multi-Core */
343                 sbuf_printf(sb, "0 - socket; 1 - anything");
344         else
345                 sbuf_printf(sb, "Unknown");
346
347         sbuf_finish(sb);
348
349         ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
350
351         sbuf_delete(sb);
352
353         return ret;     
354 }
355
356 /* Find a cpu_node_t by a mask */
357 static cpu_node_t *
358 get_cpu_node_by_cpumask(cpu_node_t * node,
359                         cpumask_t mask) {
360
361         cpu_node_t * found = NULL;
362         int i;
363
364         if (node->members == mask) {
365                 return node;
366         }
367
368         for (i = 0; i < node->child_no; i++) {
369                 found = get_cpu_node_by_cpumask(&(node->child_node[i]), mask);
370                 if (found != NULL) {
371                         return found;
372                 }
373         }
374         return NULL;
375 }
376
377 cpu_node_t *
378 get_cpu_node_by_cpuid(int cpuid) {
379         cpumask_t mask = CPUMASK(cpuid);
380
381         KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
382
383         return get_cpu_node_by_cpumask(cpu_root_node, mask);
384 }
385
386 /* Get the mask of siblings for level_type of a cpuid */
387 cpumask_t
388 get_cpumask_from_level(int cpuid,
389                         uint8_t level_type)
390 {
391         cpu_node_t * node;
392         cpumask_t mask = CPUMASK(cpuid);
393
394         KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized"));
395
396         node = get_cpu_node_by_cpumask(cpu_root_node, mask);
397
398         if (node == NULL) {
399                 return 0;
400         }
401
402         while (node != NULL) {
403                 if (node->type == level_type) {
404                         return node->members;
405                 }
406                 node = node->parent_node;
407         }
408
409         return 0;
410 }
411
412 /* init pcpu_sysctl structure info */
413 static void
414 init_pcpu_topology_sysctl(void)
415 {
416         int cpu;
417         int i;
418         cpumask_t mask;
419         struct sbuf sb;
420
421         for (i = 0; i < ncpus; i++) {
422
423                 sbuf_new(&sb, pcpu_sysctl[i].cpu_name,
424                     sizeof(pcpu_sysctl[i].cpu_name), SBUF_FIXEDLEN);
425                 sbuf_printf(&sb,"cpu%d", i);
426                 sbuf_finish(&sb);
427
428
429                 /* Get physical siblings */
430                 mask = get_cpumask_from_level(i, CHIP_LEVEL);
431                 if (mask == 0) {
432                         pcpu_sysctl[i].physical_id = INVALID_ID;
433                         continue;
434                 }
435
436                 sbuf_new(&sb, pcpu_sysctl[i].physical_siblings,
437                     sizeof(pcpu_sysctl[i].physical_siblings), SBUF_FIXEDLEN);
438                 CPUSET_FOREACH(cpu, mask) {
439                         sbuf_printf(&sb,"cpu%d ", cpu);
440                 }
441                 sbuf_trim(&sb);
442                 sbuf_finish(&sb);
443
444                 pcpu_sysctl[i].physical_id = get_chip_ID(i); 
445
446                 /* Get core siblings */
447                 mask = get_cpumask_from_level(i, CORE_LEVEL);
448                 if (mask == 0) {
449                         pcpu_sysctl[i].core_id = INVALID_ID;
450                         continue;
451                 }
452
453                 sbuf_new(&sb, pcpu_sysctl[i].core_siblings,
454                     sizeof(pcpu_sysctl[i].core_siblings), SBUF_FIXEDLEN);
455                 CPUSET_FOREACH(cpu, mask) {
456                         sbuf_printf(&sb,"cpu%d ", cpu);
457                 }
458                 sbuf_trim(&sb);
459                 sbuf_finish(&sb);
460
461                 pcpu_sysctl[i].core_id = get_core_number_within_chip(i);
462
463         }
464 }
465
466 /* Build SYSCTL structure for revealing
467  * the CPU Topology to user-space.
468  */
469 static void
470 build_sysctl_cpu_topology(void)
471 {
472         int i;
473         struct sbuf sb;
474         
475         /* SYSCTL new leaf for "cpu_topology" */
476         sysctl_ctx_init(&cpu_topology_sysctl_ctx);
477         cpu_topology_sysctl_tree = SYSCTL_ADD_NODE(&cpu_topology_sysctl_ctx,
478             SYSCTL_STATIC_CHILDREN(_hw),
479             OID_AUTO,
480             "cpu_topology",
481             CTLFLAG_RD, 0, "");
482
483         /* SYSCTL cpu_topology "tree" entry */
484         SYSCTL_ADD_PROC(&cpu_topology_sysctl_ctx,
485             SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
486             OID_AUTO, "tree", CTLTYPE_STRING | CTLFLAG_RD,
487             NULL, 0, print_cpu_topology_tree_sysctl, "A",
488             "Tree print of CPU topology");
489
490         /* SYSCTL cpu_topology "level_description" entry */
491         SYSCTL_ADD_PROC(&cpu_topology_sysctl_ctx,
492             SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
493             OID_AUTO, "level_description", CTLTYPE_STRING | CTLFLAG_RD,
494             NULL, 0, print_cpu_topology_level_description_sysctl, "A",
495             "Level description of CPU topology");
496
497         /* SYSCTL cpu_topology "members" entry */
498         sbuf_new(&sb, cpu_topology_members,
499             sizeof(cpu_topology_members), SBUF_FIXEDLEN);
500         CPUSET_FOREACH(i, cpu_root_node->members) {
501                 sbuf_printf(&sb,"cpu%d ", i);
502         }
503         sbuf_trim(&sb);
504         sbuf_finish(&sb);
505         SYSCTL_ADD_STRING(&cpu_topology_sysctl_ctx,
506             SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
507             OID_AUTO, "members", CTLFLAG_RD,
508             cpu_topology_members, 0,
509             "Members of the CPU Topology");
510
511         /* SYSCTL per_cpu info */
512         for (i = 0; i < ncpus; i++) {
513                 /* New leaf : hw.cpu_topology.cpux */
514                 sysctl_ctx_init(&pcpu_sysctl[i].sysctl_ctx); 
515                 pcpu_sysctl[i].sysctl_tree = SYSCTL_ADD_NODE(&pcpu_sysctl[i].sysctl_ctx,
516                     SYSCTL_CHILDREN(cpu_topology_sysctl_tree),
517                     OID_AUTO,
518                     pcpu_sysctl[i].cpu_name,
519                     CTLFLAG_RD, 0, "");
520
521                 /* Check if the physical_id found is valid */
522                 if (pcpu_sysctl[i].physical_id == INVALID_ID) {
523                         continue;
524                 }
525
526                 /* Add physical id info */
527                 SYSCTL_ADD_INT(&pcpu_sysctl[i].sysctl_ctx,
528                     SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
529                     OID_AUTO, "physical_id", CTLFLAG_RD,
530                     &pcpu_sysctl[i].physical_id, 0,
531                     "Physical ID");
532
533                 /* Add physical siblings */
534                 SYSCTL_ADD_STRING(&pcpu_sysctl[i].sysctl_ctx,
535                     SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
536                     OID_AUTO, "physical_siblings", CTLFLAG_RD,
537                     pcpu_sysctl[i].physical_siblings, 0,
538                     "Physical siblings");
539
540                 /* Check if the core_id found is valid */
541                 if (pcpu_sysctl[i].core_id == INVALID_ID) {
542                         continue;
543                 }
544
545                 /* Add core id info */
546                 SYSCTL_ADD_INT(&pcpu_sysctl[i].sysctl_ctx,
547                     SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
548                     OID_AUTO, "core_id", CTLFLAG_RD,
549                     &pcpu_sysctl[i].core_id, 0,
550                     "Core ID");
551                 
552                 /*Add core siblings */
553                 SYSCTL_ADD_STRING(&pcpu_sysctl[i].sysctl_ctx,
554                     SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree),
555                     OID_AUTO, "core_siblings", CTLFLAG_RD,
556                     pcpu_sysctl[i].core_siblings, 0,
557                     "Core siblings");
558         }
559 }
560
561 /* Build the CPU Topology and SYSCTL Topology tree */
562 static void
563 init_cpu_topology(void)
564 {
565         cpu_root_node = build_cpu_topology();
566
567         init_pcpu_topology_sysctl();
568         build_sysctl_cpu_topology();
569 }
570 SYSINIT(cpu_topology, SI_BOOT2_CPU_TOPOLOGY, SI_ORDER_FIRST,
571     init_cpu_topology, NULL)
572 #endif