linux/arch/powerpc/platforms/powernv/subcore.c
<<
>>
Prefs
   1/*
   2 * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public License
   6 * as published by the Free Software Foundation; either version
   7 * 2 of the License, or (at your option) any later version.
   8 */
   9
  10#define pr_fmt(fmt)     "powernv: " fmt
  11
  12#include <linux/kernel.h>
  13#include <linux/cpu.h>
  14#include <linux/cpumask.h>
  15#include <linux/device.h>
  16#include <linux/gfp.h>
  17#include <linux/smp.h>
  18#include <linux/stop_machine.h>
  19
  20#include <asm/cputhreads.h>
  21#include <asm/cpuidle.h>
  22#include <asm/kvm_ppc.h>
  23#include <asm/machdep.h>
  24#include <asm/opal.h>
  25#include <asm/smp.h>
  26
  27#include "subcore.h"
  28#include "powernv.h"
  29
  30
  31/*
  32 * Split/unsplit procedure:
  33 *
  34 * A core can be in one of three states, unsplit, 2-way split, and 4-way split.
  35 *
  36 * The mapping to subcores_per_core is simple:
  37 *
  38 *  State       | subcores_per_core
  39 *  ------------|------------------
  40 *  Unsplit     |        1
  41 *  2-way split |        2
  42 *  4-way split |        4
  43 *
  44 * The core is split along thread boundaries, the mapping between subcores and
  45 * threads is as follows:
  46 *
  47 *  Unsplit:
  48 *          ----------------------------
  49 *  Subcore |            0             |
  50 *          ----------------------------
  51 *  Thread  |  0  1  2  3  4  5  6  7  |
  52 *          ----------------------------
  53 *
  54 *  2-way split:
  55 *          -------------------------------------
  56 *  Subcore |        0        |        1        |
  57 *          -------------------------------------
  58 *  Thread  |  0   1   2   3  |  4   5   6   7  |
  59 *          -------------------------------------
  60 *
  61 *  4-way split:
  62 *          -----------------------------------------
  63 *  Subcore |    0    |    1    |    2    |    3    |
  64 *          -----------------------------------------
  65 *  Thread  |  0   1  |  2   3  |  4   5  |  6   7  |
  66 *          -----------------------------------------
  67 *
  68 *
  69 * Transitions
  70 * -----------
  71 *
  72 * It is not possible to transition between either of the split states, the
  73 * core must first be unsplit. The legal transitions are:
  74 *
  75 *  -----------          ---------------
  76 *  |         |  <---->  | 2-way split |
  77 *  |         |          ---------------
  78 *  | Unsplit |
  79 *  |         |          ---------------
  80 *  |         |  <---->  | 4-way split |
  81 *  -----------          ---------------
  82 *
  83 * Unsplitting
  84 * -----------
  85 *
  86 * Unsplitting is the simpler procedure. It requires thread 0 to request the
  87 * unsplit while all other threads NAP.
  88 *
  89 * Thread 0 clears HID0_POWER8_DYNLPARDIS (Dynamic LPAR Disable). This tells
  90 * the hardware that if all threads except 0 are napping, the hardware should
  91 * unsplit the core.
  92 *
  93 * Non-zero threads are sent to a NAP loop, they don't exit the loop until they
  94 * see the core unsplit.
  95 *
  96 * Core 0 spins waiting for the hardware to see all the other threads napping
  97 * and perform the unsplit.
  98 *
  99 * Once thread 0 sees the unsplit, it IPIs the secondary threads to wake them
 100 * out of NAP. They will then see the core unsplit and exit the NAP loop.
 101 *
 102 * Splitting
 103 * ---------
 104 *
 105 * The basic splitting procedure is fairly straight forward. However it is
 106 * complicated by the fact that after the split occurs, the newly created
 107 * subcores are not in a fully initialised state.
 108 *
 109 * Most notably the subcores do not have the correct value for SDR1, which
 110 * means they must not be running in virtual mode when the split occurs. The
 111 * subcores have separate timebases SPRs but these are pre-synchronised by
 112 * opal.
 113 *
 114 * To begin with secondary threads are sent to an assembly routine. There they
 115 * switch to real mode, so they are immune to the uninitialised SDR1 value.
 116 * Once in real mode they indicate that they are in real mode, and spin waiting
 117 * to see the core split.
 118 *
 119 * Thread 0 waits to see that all secondaries are in real mode, and then begins
 120 * the splitting procedure. It firstly sets HID0_POWER8_DYNLPARDIS, which
 121 * prevents the hardware from unsplitting. Then it sets the appropriate HID bit
 122 * to request the split, and spins waiting to see that the split has happened.
 123 *
 124 * Concurrently the secondaries will notice the split. When they do they set up
 125 * their SPRs, notably SDR1, and then they can return to virtual mode and exit
 126 * the procedure.
 127 */
 128
 129/* Initialised at boot by subcore_init() */
 130static int subcores_per_core;
 131
 132/*
 133 * Used to communicate to offline cpus that we want them to pop out of the
 134 * offline loop and do a split or unsplit.
 135 *
 136 * 0 - no split happening
 137 * 1 - unsplit in progress
 138 * 2 - split to 2 in progress
 139 * 4 - split to 4 in progress
 140 */
 141static int new_split_mode;
 142
 143static cpumask_var_t cpu_offline_mask;
 144
 145struct split_state {
 146        u8 step;
 147        u8 master;
 148};
 149
 150static DEFINE_PER_CPU(struct split_state, split_state);
 151
 152static void wait_for_sync_step(int step)
 153{
 154        int i, cpu = smp_processor_id();
 155
 156        for (i = cpu + 1; i < cpu + threads_per_core; i++)
 157                while(per_cpu(split_state, i).step < step)
 158                        barrier();
 159
 160        /* Order the wait loop vs any subsequent loads/stores. */
 161        mb();
 162}
 163
 164static void update_hid_in_slw(u64 hid0)
 165{
 166        u64 idle_states = pnv_get_supported_cpuidle_states();
 167
 168        if (idle_states & OPAL_PM_WINKLE_ENABLED) {
 169                /* OPAL call to patch slw with the new HID0 value */
 170                u64 cpu_pir = hard_smp_processor_id();
 171
 172                opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0);
 173        }
 174}
 175
 176static void unsplit_core(void)
 177{
 178        u64 hid0, mask;
 179        int i, cpu;
 180
 181        mask = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
 182
 183        cpu = smp_processor_id();
 184        if (cpu_thread_in_core(cpu) != 0) {
 185                while (mfspr(SPRN_HID0) & mask)
 186                        power7_idle_insn(PNV_THREAD_NAP);
 187
 188                per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT;
 189                return;
 190        }
 191
 192        hid0 = mfspr(SPRN_HID0);
 193        hid0 &= ~HID0_POWER8_DYNLPARDIS;
 194        update_power8_hid0(hid0);
 195        update_hid_in_slw(hid0);
 196
 197        while (mfspr(SPRN_HID0) & mask)
 198                cpu_relax();
 199
 200        /* Wake secondaries out of NAP */
 201        for (i = cpu + 1; i < cpu + threads_per_core; i++)
 202                smp_send_reschedule(i);
 203
 204        wait_for_sync_step(SYNC_STEP_UNSPLIT);
 205}
 206
 207static void split_core(int new_mode)
 208{
 209        struct {  u64 value; u64 mask; } split_parms[2] = {
 210                { HID0_POWER8_1TO2LPAR, HID0_POWER8_2LPARMODE },
 211                { HID0_POWER8_1TO4LPAR, HID0_POWER8_4LPARMODE }
 212        };
 213        int i, cpu;
 214        u64 hid0;
 215
 216        /* Convert new_mode (2 or 4) into an index into our parms array */
 217        i = (new_mode >> 1) - 1;
 218        BUG_ON(i < 0 || i > 1);
 219
 220        cpu = smp_processor_id();
 221        if (cpu_thread_in_core(cpu) != 0) {
 222                split_core_secondary_loop(&per_cpu(split_state, cpu).step);
 223                return;
 224        }
 225
 226        wait_for_sync_step(SYNC_STEP_REAL_MODE);
 227
 228        /* Write new mode */
 229        hid0  = mfspr(SPRN_HID0);
 230        hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
 231        update_power8_hid0(hid0);
 232        update_hid_in_slw(hid0);
 233
 234        /* Wait for it to happen */
 235        while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
 236                cpu_relax();
 237}
 238
 239static void cpu_do_split(int new_mode)
 240{
 241        /*
 242         * At boot subcores_per_core will be 0, so we will always unsplit at
 243         * boot. In the usual case where the core is already unsplit it's a
 244         * nop, and this just ensures the kernel's notion of the mode is
 245         * consistent with the hardware.
 246         */
 247        if (subcores_per_core != 1)
 248                unsplit_core();
 249
 250        if (new_mode != 1)
 251                split_core(new_mode);
 252
 253        mb();
 254        per_cpu(split_state, smp_processor_id()).step = SYNC_STEP_FINISHED;
 255}
 256
 257bool cpu_core_split_required(void)
 258{
 259        smp_rmb();
 260
 261        if (!new_split_mode)
 262                return false;
 263
 264        cpu_do_split(new_split_mode);
 265
 266        return true;
 267}
 268
 269void update_subcore_sibling_mask(void)
 270{
 271        int cpu;
 272        /*
 273         * sibling mask for the first cpu. Left shift this by required bits
 274         * to get sibling mask for the rest of the cpus.
 275         */
 276        int sibling_mask_first_cpu =  (1 << threads_per_subcore) - 1;
 277
 278        for_each_possible_cpu(cpu) {
 279                int tid = cpu_thread_in_core(cpu);
 280                int offset = (tid / threads_per_subcore) * threads_per_subcore;
 281                int mask = sibling_mask_first_cpu << offset;
 282
 283                paca_ptrs[cpu]->subcore_sibling_mask = mask;
 284
 285        }
 286}
 287
 288static int cpu_update_split_mode(void *data)
 289{
 290        int cpu, new_mode = *(int *)data;
 291
 292        if (this_cpu_ptr(&split_state)->master) {
 293                new_split_mode = new_mode;
 294                smp_wmb();
 295
 296                cpumask_andnot(cpu_offline_mask, cpu_present_mask,
 297                               cpu_online_mask);
 298
 299                /* This should work even though the cpu is offline */
 300                for_each_cpu(cpu, cpu_offline_mask)
 301                        smp_send_reschedule(cpu);
 302        }
 303
 304        cpu_do_split(new_mode);
 305
 306        if (this_cpu_ptr(&split_state)->master) {
 307                /* Wait for all cpus to finish before we touch subcores_per_core */
 308                for_each_present_cpu(cpu) {
 309                        if (cpu >= setup_max_cpus)
 310                                break;
 311
 312                        while(per_cpu(split_state, cpu).step < SYNC_STEP_FINISHED)
 313                                barrier();
 314                }
 315
 316                new_split_mode = 0;
 317
 318                /* Make the new mode public */
 319                subcores_per_core = new_mode;
 320                threads_per_subcore = threads_per_core / subcores_per_core;
 321                update_subcore_sibling_mask();
 322
 323                /* Make sure the new mode is written before we exit */
 324                mb();
 325        }
 326
 327        return 0;
 328}
 329
 330static int set_subcores_per_core(int new_mode)
 331{
 332        struct split_state *state;
 333        int cpu;
 334
 335        if (kvm_hv_mode_active()) {
 336                pr_err("Unable to change split core mode while KVM active.\n");
 337                return -EBUSY;
 338        }
 339
 340        /*
 341         * We are only called at boot, or from the sysfs write. If that ever
 342         * changes we'll need a lock here.
 343         */
 344        BUG_ON(new_mode < 1 || new_mode > 4 || new_mode == 3);
 345
 346        for_each_present_cpu(cpu) {
 347                state = &per_cpu(split_state, cpu);
 348                state->step = SYNC_STEP_INITIAL;
 349                state->master = 0;
 350        }
 351
 352        cpus_read_lock();
 353
 354        /* This cpu will update the globals before exiting stop machine */
 355        this_cpu_ptr(&split_state)->master = 1;
 356
 357        /* Ensure state is consistent before we call the other cpus */
 358        mb();
 359
 360        stop_machine_cpuslocked(cpu_update_split_mode, &new_mode,
 361                                cpu_online_mask);
 362
 363        cpus_read_unlock();
 364
 365        return 0;
 366}
 367
 368static ssize_t __used store_subcores_per_core(struct device *dev,
 369                struct device_attribute *attr, const char *buf,
 370                size_t count)
 371{
 372        unsigned long val;
 373        int rc;
 374
 375        /* We are serialised by the attribute lock */
 376
 377        rc = sscanf(buf, "%lx", &val);
 378        if (rc != 1)
 379                return -EINVAL;
 380
 381        switch (val) {
 382        case 1:
 383        case 2:
 384        case 4:
 385                if (subcores_per_core == val)
 386                        /* Nothing to do */
 387                        goto out;
 388                break;
 389        default:
 390                return -EINVAL;
 391        }
 392
 393        rc = set_subcores_per_core(val);
 394        if (rc)
 395                return rc;
 396
 397out:
 398        return count;
 399}
 400
 401static ssize_t show_subcores_per_core(struct device *dev,
 402                struct device_attribute *attr, char *buf)
 403{
 404        return sprintf(buf, "%x\n", subcores_per_core);
 405}
 406
 407static DEVICE_ATTR(subcores_per_core, 0644,
 408                show_subcores_per_core, store_subcores_per_core);
 409
 410static int subcore_init(void)
 411{
 412        unsigned pvr_ver;
 413
 414        pvr_ver = PVR_VER(mfspr(SPRN_PVR));
 415
 416        if (pvr_ver != PVR_POWER8 &&
 417            pvr_ver != PVR_POWER8E &&
 418            pvr_ver != PVR_POWER8NVL)
 419                return 0;
 420
 421        /*
 422         * We need all threads in a core to be present to split/unsplit so
 423         * continue only if max_cpus are aligned to threads_per_core.
 424         */
 425        if (setup_max_cpus % threads_per_core)
 426                return 0;
 427
 428        BUG_ON(!alloc_cpumask_var(&cpu_offline_mask, GFP_KERNEL));
 429
 430        set_subcores_per_core(1);
 431
 432        return device_create_file(cpu_subsys.dev_root,
 433                                  &dev_attr_subcores_per_core);
 434}
 435machine_device_initcall(powernv, subcore_init);
 436