linux/arch/sparc/kernel/cpumap.c
<<
>>
Prefs
   1/* cpumap.c: used for optimizing CPU assignment
   2 *
   3 * Copyright (C) 2009 Hong H. Pham <hong.pham@windriver.com>
   4 */
   5
   6#include <linux/module.h>
   7#include <linux/kernel.h>
   8#include <linux/init.h>
   9#include <linux/cpumask.h>
  10#include <linux/spinlock.h>
  11#include <asm/cpudata.h>
  12#include "cpumap.h"
  13
  14
  15enum {
  16        CPUINFO_LVL_ROOT = 0,
  17        CPUINFO_LVL_NODE,
  18        CPUINFO_LVL_CORE,
  19        CPUINFO_LVL_PROC,
  20        CPUINFO_LVL_MAX,
  21};
  22
  23enum {
  24        ROVER_NO_OP              = 0,
  25        /* Increment rover every time level is visited */
  26        ROVER_INC_ON_VISIT       = 1 << 0,
  27        /* Increment parent's rover every time rover wraps around */
  28        ROVER_INC_PARENT_ON_LOOP = 1 << 1,
  29};
  30
  31struct cpuinfo_node {
  32        int id;
  33        int level;
  34        int num_cpus;    /* Number of CPUs in this hierarchy */
  35        int parent_index;
  36        int child_start; /* Array index of the first child node */
  37        int child_end;   /* Array index of the last child node */
  38        int rover;       /* Child node iterator */
  39};
  40
  41struct cpuinfo_level {
  42        int start_index; /* Index of first node of a level in a cpuinfo tree */
  43        int end_index;   /* Index of last node of a level in a cpuinfo tree */
  44        int num_nodes;   /* Number of nodes in a level in a cpuinfo tree */
  45};
  46
  47struct cpuinfo_tree {
  48        int total_nodes;
  49
  50        /* Offsets into nodes[] for each level of the tree */
  51        struct cpuinfo_level level[CPUINFO_LVL_MAX];
  52        struct cpuinfo_node  nodes[0];
  53};
  54
  55
  56static struct cpuinfo_tree *cpuinfo_tree;
  57
  58static u16 cpu_distribution_map[NR_CPUS];
  59static DEFINE_SPINLOCK(cpu_map_lock);
  60
  61
  62/* Niagara optimized cpuinfo tree traversal. */
  63static const int niagara_iterate_method[] = {
  64        [CPUINFO_LVL_ROOT] = ROVER_NO_OP,
  65
  66        /* Strands (or virtual CPUs) within a core may not run concurrently
  67         * on the Niagara, as instruction pipeline(s) are shared.  Distribute
  68         * work to strands in different cores first for better concurrency.
  69         * Go to next NUMA node when all cores are used.
  70         */
  71        [CPUINFO_LVL_NODE] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP,
  72
  73        /* Strands are grouped together by proc_id in cpuinfo_sparc, i.e.
  74         * a proc_id represents an instruction pipeline.  Distribute work to
  75         * strands in different proc_id groups if the core has multiple
  76         * instruction pipelines (e.g. the Niagara 2/2+ has two).
  77         */
  78        [CPUINFO_LVL_CORE] = ROVER_INC_ON_VISIT,
  79
  80        /* Pick the next strand in the proc_id group. */
  81        [CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT,
  82};
  83
  84/* Generic cpuinfo tree traversal.  Distribute work round robin across NUMA
  85 * nodes.
  86 */
  87static const int generic_iterate_method[] = {
  88        [CPUINFO_LVL_ROOT] = ROVER_INC_ON_VISIT,
  89        [CPUINFO_LVL_NODE] = ROVER_NO_OP,
  90        [CPUINFO_LVL_CORE] = ROVER_INC_PARENT_ON_LOOP,
  91        [CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP,
  92};
  93
  94
  95static int cpuinfo_id(int cpu, int level)
  96{
  97        int id;
  98
  99        switch (level) {
 100        case CPUINFO_LVL_ROOT:
 101                id = 0;
 102                break;
 103        case CPUINFO_LVL_NODE:
 104                id = cpu_to_node(cpu);
 105                break;
 106        case CPUINFO_LVL_CORE:
 107                id = cpu_data(cpu).core_id;
 108                break;
 109        case CPUINFO_LVL_PROC:
 110                id = cpu_data(cpu).proc_id;
 111                break;
 112        default:
 113                id = -EINVAL;
 114        }
 115        return id;
 116}
 117
 118/*
 119 * Enumerate the CPU information in __cpu_data to determine the start index,
 120 * end index, and number of nodes for each level in the cpuinfo tree.  The
 121 * total number of cpuinfo nodes required to build the tree is returned.
 122 */
 123static int enumerate_cpuinfo_nodes(struct cpuinfo_level *tree_level)
 124{
 125        int prev_id[CPUINFO_LVL_MAX];
 126        int i, n, num_nodes;
 127
 128        for (i = CPUINFO_LVL_ROOT; i < CPUINFO_LVL_MAX; i++) {
 129                struct cpuinfo_level *lv = &tree_level[i];
 130
 131                prev_id[i] = -1;
 132                lv->start_index = lv->end_index = lv->num_nodes = 0;
 133        }
 134
 135        num_nodes = 1; /* Include the root node */
 136
 137        for (i = 0; i < num_possible_cpus(); i++) {
 138                if (!cpu_online(i))
 139                        continue;
 140
 141                n = cpuinfo_id(i, CPUINFO_LVL_NODE);
 142                if (n > prev_id[CPUINFO_LVL_NODE]) {
 143                        tree_level[CPUINFO_LVL_NODE].num_nodes++;
 144                        prev_id[CPUINFO_LVL_NODE] = n;
 145                        num_nodes++;
 146                }
 147                n = cpuinfo_id(i, CPUINFO_LVL_CORE);
 148                if (n > prev_id[CPUINFO_LVL_CORE]) {
 149                        tree_level[CPUINFO_LVL_CORE].num_nodes++;
 150                        prev_id[CPUINFO_LVL_CORE] = n;
 151                        num_nodes++;
 152                }
 153                n = cpuinfo_id(i, CPUINFO_LVL_PROC);
 154                if (n > prev_id[CPUINFO_LVL_PROC]) {
 155                        tree_level[CPUINFO_LVL_PROC].num_nodes++;
 156                        prev_id[CPUINFO_LVL_PROC] = n;
 157                        num_nodes++;
 158                }
 159        }
 160
 161        tree_level[CPUINFO_LVL_ROOT].num_nodes = 1;
 162
 163        n = tree_level[CPUINFO_LVL_NODE].num_nodes;
 164        tree_level[CPUINFO_LVL_NODE].start_index = 1;
 165        tree_level[CPUINFO_LVL_NODE].end_index   = n;
 166
 167        n++;
 168        tree_level[CPUINFO_LVL_CORE].start_index = n;
 169        n += tree_level[CPUINFO_LVL_CORE].num_nodes;
 170        tree_level[CPUINFO_LVL_CORE].end_index   = n - 1;
 171
 172        tree_level[CPUINFO_LVL_PROC].start_index = n;
 173        n += tree_level[CPUINFO_LVL_PROC].num_nodes;
 174        tree_level[CPUINFO_LVL_PROC].end_index   = n - 1;
 175
 176        return num_nodes;
 177}
 178
 179/* Build a tree representation of the CPU hierarchy using the per CPU
 180 * information in __cpu_data.  Entries in __cpu_data[0..NR_CPUS] are
 181 * assumed to be sorted in ascending order based on node, core_id, and
 182 * proc_id (in order of significance).
 183 */
 184static struct cpuinfo_tree *build_cpuinfo_tree(void)
 185{
 186        struct cpuinfo_tree *new_tree;
 187        struct cpuinfo_node *node;
 188        struct cpuinfo_level tmp_level[CPUINFO_LVL_MAX];
 189        int num_cpus[CPUINFO_LVL_MAX];
 190        int level_rover[CPUINFO_LVL_MAX];
 191        int prev_id[CPUINFO_LVL_MAX];
 192        int n, id, cpu, prev_cpu, last_cpu, level;
 193
 194        n = enumerate_cpuinfo_nodes(tmp_level);
 195
 196        new_tree = kzalloc(sizeof(struct cpuinfo_tree) +
 197                           (sizeof(struct cpuinfo_node) * n), GFP_ATOMIC);
 198        if (!new_tree)
 199                return NULL;
 200
 201        new_tree->total_nodes = n;
 202        memcpy(&new_tree->level, tmp_level, sizeof(tmp_level));
 203
 204        prev_cpu = cpu = first_cpu(cpu_online_map);
 205
 206        /* Initialize all levels in the tree with the first CPU */
 207        for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) {
 208                n = new_tree->level[level].start_index;
 209
 210                level_rover[level] = n;
 211                node = &new_tree->nodes[n];
 212
 213                id = cpuinfo_id(cpu, level);
 214                if (unlikely(id < 0)) {
 215                        kfree(new_tree);
 216                        return NULL;
 217                }
 218                node->id = id;
 219                node->level = level;
 220                node->num_cpus = 1;
 221
 222                node->parent_index = (level > CPUINFO_LVL_ROOT)
 223                    ? new_tree->level[level - 1].start_index : -1;
 224
 225                node->child_start = node->child_end = node->rover =
 226                    (level == CPUINFO_LVL_PROC)
 227                    ? cpu : new_tree->level[level + 1].start_index;
 228
 229                prev_id[level] = node->id;
 230                num_cpus[level] = 1;
 231        }
 232
 233        for (last_cpu = (num_possible_cpus() - 1); last_cpu >= 0; last_cpu--) {
 234                if (cpu_online(last_cpu))
 235                        break;
 236        }
 237
 238        while (++cpu <= last_cpu) {
 239                if (!cpu_online(cpu))
 240                        continue;
 241
 242                for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT;
 243                     level--) {
 244                        id = cpuinfo_id(cpu, level);
 245                        if (unlikely(id < 0)) {
 246                                kfree(new_tree);
 247                                return NULL;
 248                        }
 249
 250                        if ((id != prev_id[level]) || (cpu == last_cpu)) {
 251                                prev_id[level] = id;
 252                                node = &new_tree->nodes[level_rover[level]];
 253                                node->num_cpus = num_cpus[level];
 254                                num_cpus[level] = 1;
 255
 256                                if (cpu == last_cpu)
 257                                        node->num_cpus++;
 258
 259                                /* Connect tree node to parent */
 260                                if (level == CPUINFO_LVL_ROOT)
 261                                        node->parent_index = -1;
 262                                else
 263                                        node->parent_index =
 264                                            level_rover[level - 1];
 265
 266                                if (level == CPUINFO_LVL_PROC) {
 267                                        node->child_end =
 268                                            (cpu == last_cpu) ? cpu : prev_cpu;
 269                                } else {
 270                                        node->child_end =
 271                                            level_rover[level + 1] - 1;
 272                                }
 273
 274                                /* Initialize the next node in the same level */
 275                                n = ++level_rover[level];
 276                                if (n <= new_tree->level[level].end_index) {
 277                                        node = &new_tree->nodes[n];
 278                                        node->id = id;
 279                                        node->level = level;
 280
 281                                        /* Connect node to child */
 282                                        node->child_start = node->child_end =
 283                                        node->rover =
 284                                            (level == CPUINFO_LVL_PROC)
 285                                            ? cpu : level_rover[level + 1];
 286                                }
 287                        } else
 288                                num_cpus[level]++;
 289                }
 290                prev_cpu = cpu;
 291        }
 292
 293        return new_tree;
 294}
 295
 296static void increment_rover(struct cpuinfo_tree *t, int node_index,
 297                            int root_index, const int *rover_inc_table)
 298{
 299        struct cpuinfo_node *node = &t->nodes[node_index];
 300        int top_level, level;
 301
 302        top_level = t->nodes[root_index].level;
 303        for (level = node->level; level >= top_level; level--) {
 304                node->rover++;
 305                if (node->rover <= node->child_end)
 306                        return;
 307
 308                node->rover = node->child_start;
 309                /* If parent's rover does not need to be adjusted, stop here. */
 310                if ((level == top_level) ||
 311                    !(rover_inc_table[level] & ROVER_INC_PARENT_ON_LOOP))
 312                        return;
 313
 314                node = &t->nodes[node->parent_index];
 315        }
 316}
 317
 318static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index)
 319{
 320        const int *rover_inc_table;
 321        int level, new_index, index = root_index;
 322
 323        switch (sun4v_chip_type) {
 324        case SUN4V_CHIP_NIAGARA1:
 325        case SUN4V_CHIP_NIAGARA2:
 326                rover_inc_table = niagara_iterate_method;
 327                break;
 328        default:
 329                rover_inc_table = generic_iterate_method;
 330        }
 331
 332        for (level = t->nodes[root_index].level; level < CPUINFO_LVL_MAX;
 333             level++) {
 334                new_index = t->nodes[index].rover;
 335                if (rover_inc_table[level] & ROVER_INC_ON_VISIT)
 336                        increment_rover(t, index, root_index, rover_inc_table);
 337
 338                index = new_index;
 339        }
 340        return index;
 341}
 342
 343static void _cpu_map_rebuild(void)
 344{
 345        int i;
 346
 347        if (cpuinfo_tree) {
 348                kfree(cpuinfo_tree);
 349                cpuinfo_tree = NULL;
 350        }
 351
 352        cpuinfo_tree = build_cpuinfo_tree();
 353        if (!cpuinfo_tree)
 354                return;
 355
 356        /* Build CPU distribution map that spans all online CPUs.  No need
 357         * to check if the CPU is online, as that is done when the cpuinfo
 358         * tree is being built.
 359         */
 360        for (i = 0; i < cpuinfo_tree->nodes[0].num_cpus; i++)
 361                cpu_distribution_map[i] = iterate_cpu(cpuinfo_tree, 0);
 362}
 363
 364/* Fallback if the cpuinfo tree could not be built.  CPU mapping is linear
 365 * round robin.
 366 */
 367static int simple_map_to_cpu(unsigned int index)
 368{
 369        int i, end, cpu_rover;
 370
 371        cpu_rover = 0;
 372        end = index % num_online_cpus();
 373        for (i = 0; i < num_possible_cpus(); i++) {
 374                if (cpu_online(cpu_rover)) {
 375                        if (cpu_rover >= end)
 376                                return cpu_rover;
 377
 378                        cpu_rover++;
 379                }
 380        }
 381
 382        /* Impossible, since num_online_cpus() <= num_possible_cpus() */
 383        return first_cpu(cpu_online_map);
 384}
 385
 386static int _map_to_cpu(unsigned int index)
 387{
 388        struct cpuinfo_node *root_node;
 389
 390        if (unlikely(!cpuinfo_tree)) {
 391                _cpu_map_rebuild();
 392                if (!cpuinfo_tree)
 393                        return simple_map_to_cpu(index);
 394        }
 395
 396        root_node = &cpuinfo_tree->nodes[0];
 397#ifdef CONFIG_HOTPLUG_CPU
 398        if (unlikely(root_node->num_cpus != num_online_cpus())) {
 399                _cpu_map_rebuild();
 400                if (!cpuinfo_tree)
 401                        return simple_map_to_cpu(index);
 402        }
 403#endif
 404        return cpu_distribution_map[index % root_node->num_cpus];
 405}
 406
 407int map_to_cpu(unsigned int index)
 408{
 409        int mapped_cpu;
 410        unsigned long flag;
 411
 412        spin_lock_irqsave(&cpu_map_lock, flag);
 413        mapped_cpu = _map_to_cpu(index);
 414
 415#ifdef CONFIG_HOTPLUG_CPU
 416        while (unlikely(!cpu_online(mapped_cpu)))
 417                mapped_cpu = _map_to_cpu(index);
 418#endif
 419        spin_unlock_irqrestore(&cpu_map_lock, flag);
 420        return mapped_cpu;
 421}
 422EXPORT_SYMBOL(map_to_cpu);
 423
 424void cpu_map_rebuild(void)
 425{
 426        unsigned long flag;
 427
 428        spin_lock_irqsave(&cpu_map_lock, flag);
 429        _cpu_map_rebuild();
 430        spin_unlock_irqrestore(&cpu_map_lock, flag);
 431}
 432