linux/arch/powerpc/platforms/powernv/subcore.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation.
   4 */
   5
   6#define pr_fmt(fmt)     "powernv: " fmt
   7
   8#include <linux/kernel.h>
   9#include <linux/cpu.h>
  10#include <linux/cpumask.h>
  11#include <linux/device.h>
  12#include <linux/gfp.h>
  13#include <linux/smp.h>
  14#include <linux/stop_machine.h>
  15
  16#include <asm/cputhreads.h>
  17#include <asm/cpuidle.h>
  18#include <asm/kvm_ppc.h>
  19#include <asm/machdep.h>
  20#include <asm/opal.h>
  21#include <asm/smp.h>
  22
  23#include "subcore.h"
  24#include "powernv.h"
  25
  26
  27/*
  28 * Split/unsplit procedure:
  29 *
  30 * A core can be in one of three states, unsplit, 2-way split, and 4-way split.
  31 *
  32 * The mapping to subcores_per_core is simple:
  33 *
  34 *  State       | subcores_per_core
  35 *  ------------|------------------
  36 *  Unsplit     |        1
  37 *  2-way split |        2
  38 *  4-way split |        4
  39 *
  40 * The core is split along thread boundaries, the mapping between subcores and
  41 * threads is as follows:
  42 *
  43 *  Unsplit:
  44 *          ----------------------------
  45 *  Subcore |            0             |
  46 *          ----------------------------
  47 *  Thread  |  0  1  2  3  4  5  6  7  |
  48 *          ----------------------------
  49 *
  50 *  2-way split:
  51 *          -------------------------------------
  52 *  Subcore |        0        |        1        |
  53 *          -------------------------------------
  54 *  Thread  |  0   1   2   3  |  4   5   6   7  |
  55 *          -------------------------------------
  56 *
  57 *  4-way split:
  58 *          -----------------------------------------
  59 *  Subcore |    0    |    1    |    2    |    3    |
  60 *          -----------------------------------------
  61 *  Thread  |  0   1  |  2   3  |  4   5  |  6   7  |
  62 *          -----------------------------------------
  63 *
  64 *
  65 * Transitions
  66 * -----------
  67 *
  68 * It is not possible to transition between either of the split states, the
  69 * core must first be unsplit. The legal transitions are:
  70 *
  71 *  -----------          ---------------
  72 *  |         |  <---->  | 2-way split |
  73 *  |         |          ---------------
  74 *  | Unsplit |
  75 *  |         |          ---------------
  76 *  |         |  <---->  | 4-way split |
  77 *  -----------          ---------------
  78 *
  79 * Unsplitting
  80 * -----------
  81 *
  82 * Unsplitting is the simpler procedure. It requires thread 0 to request the
  83 * unsplit while all other threads NAP.
  84 *
  85 * Thread 0 clears HID0_POWER8_DYNLPARDIS (Dynamic LPAR Disable). This tells
  86 * the hardware that if all threads except 0 are napping, the hardware should
  87 * unsplit the core.
  88 *
  89 * Non-zero threads are sent to a NAP loop, they don't exit the loop until they
  90 * see the core unsplit.
  91 *
  92 * Core 0 spins waiting for the hardware to see all the other threads napping
  93 * and perform the unsplit.
  94 *
  95 * Once thread 0 sees the unsplit, it IPIs the secondary threads to wake them
  96 * out of NAP. They will then see the core unsplit and exit the NAP loop.
  97 *
  98 * Splitting
  99 * ---------
 100 *
 101 * The basic splitting procedure is fairly straight forward. However it is
 102 * complicated by the fact that after the split occurs, the newly created
 103 * subcores are not in a fully initialised state.
 104 *
 105 * Most notably the subcores do not have the correct value for SDR1, which
 106 * means they must not be running in virtual mode when the split occurs. The
 107 * subcores have separate timebases SPRs but these are pre-synchronised by
 108 * opal.
 109 *
 110 * To begin with secondary threads are sent to an assembly routine. There they
 111 * switch to real mode, so they are immune to the uninitialised SDR1 value.
 112 * Once in real mode they indicate that they are in real mode, and spin waiting
 113 * to see the core split.
 114 *
 115 * Thread 0 waits to see that all secondaries are in real mode, and then begins
 116 * the splitting procedure. It firstly sets HID0_POWER8_DYNLPARDIS, which
 117 * prevents the hardware from unsplitting. Then it sets the appropriate HID bit
 118 * to request the split, and spins waiting to see that the split has happened.
 119 *
 120 * Concurrently the secondaries will notice the split. When they do they set up
 121 * their SPRs, notably SDR1, and then they can return to virtual mode and exit
 122 * the procedure.
 123 */
 124
 125/* Initialised at boot by subcore_init() */
 126static int subcores_per_core;
 127
 128/*
 129 * Used to communicate to offline cpus that we want them to pop out of the
 130 * offline loop and do a split or unsplit.
 131 *
 132 * 0 - no split happening
 133 * 1 - unsplit in progress
 134 * 2 - split to 2 in progress
 135 * 4 - split to 4 in progress
 136 */
 137static int new_split_mode;
 138
 139static cpumask_var_t cpu_offline_mask;
 140
 141struct split_state {
 142        u8 step;
 143        u8 master;
 144};
 145
 146static DEFINE_PER_CPU(struct split_state, split_state);
 147
 148static void wait_for_sync_step(int step)
 149{
 150        int i, cpu = smp_processor_id();
 151
 152        for (i = cpu + 1; i < cpu + threads_per_core; i++)
 153                while(per_cpu(split_state, i).step < step)
 154                        barrier();
 155
 156        /* Order the wait loop vs any subsequent loads/stores. */
 157        mb();
 158}
 159
 160static void update_hid_in_slw(u64 hid0)
 161{
 162        u64 idle_states = pnv_get_supported_cpuidle_states();
 163
 164        if (idle_states & OPAL_PM_WINKLE_ENABLED) {
 165                /* OPAL call to patch slw with the new HID0 value */
 166                u64 cpu_pir = hard_smp_processor_id();
 167
 168                opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0);
 169        }
 170}
 171
 172static inline void update_power8_hid0(unsigned long hid0)
 173{
 174        /*
 175         *  The HID0 update on Power8 should at the very least be
 176         *  preceded by a SYNC instruction followed by an ISYNC
 177         *  instruction
 178         */
 179        asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0));
 180}
 181
 182static void unsplit_core(void)
 183{
 184        u64 hid0, mask;
 185        int i, cpu;
 186
 187        mask = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
 188
 189        cpu = smp_processor_id();
 190        if (cpu_thread_in_core(cpu) != 0) {
 191                while (mfspr(SPRN_HID0) & mask)
 192                        power7_idle_type(PNV_THREAD_NAP);
 193
 194                per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT;
 195                return;
 196        }
 197
 198        hid0 = mfspr(SPRN_HID0);
 199        hid0 &= ~HID0_POWER8_DYNLPARDIS;
 200        update_power8_hid0(hid0);
 201        update_hid_in_slw(hid0);
 202
 203        while (mfspr(SPRN_HID0) & mask)
 204                cpu_relax();
 205
 206        /* Wake secondaries out of NAP */
 207        for (i = cpu + 1; i < cpu + threads_per_core; i++)
 208                smp_send_reschedule(i);
 209
 210        wait_for_sync_step(SYNC_STEP_UNSPLIT);
 211}
 212
 213static void split_core(int new_mode)
 214{
 215        struct {  u64 value; u64 mask; } split_parms[2] = {
 216                { HID0_POWER8_1TO2LPAR, HID0_POWER8_2LPARMODE },
 217                { HID0_POWER8_1TO4LPAR, HID0_POWER8_4LPARMODE }
 218        };
 219        int i, cpu;
 220        u64 hid0;
 221
 222        /* Convert new_mode (2 or 4) into an index into our parms array */
 223        i = (new_mode >> 1) - 1;
 224        BUG_ON(i < 0 || i > 1);
 225
 226        cpu = smp_processor_id();
 227        if (cpu_thread_in_core(cpu) != 0) {
 228                split_core_secondary_loop(&per_cpu(split_state, cpu).step);
 229                return;
 230        }
 231
 232        wait_for_sync_step(SYNC_STEP_REAL_MODE);
 233
 234        /* Write new mode */
 235        hid0  = mfspr(SPRN_HID0);
 236        hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
 237        update_power8_hid0(hid0);
 238        update_hid_in_slw(hid0);
 239
 240        /* Wait for it to happen */
 241        while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
 242                cpu_relax();
 243}
 244
 245static void cpu_do_split(int new_mode)
 246{
 247        /*
 248         * At boot subcores_per_core will be 0, so we will always unsplit at
 249         * boot. In the usual case where the core is already unsplit it's a
 250         * nop, and this just ensures the kernel's notion of the mode is
 251         * consistent with the hardware.
 252         */
 253        if (subcores_per_core != 1)
 254                unsplit_core();
 255
 256        if (new_mode != 1)
 257                split_core(new_mode);
 258
 259        mb();
 260        per_cpu(split_state, smp_processor_id()).step = SYNC_STEP_FINISHED;
 261}
 262
 263bool cpu_core_split_required(void)
 264{
 265        smp_rmb();
 266
 267        if (!new_split_mode)
 268                return false;
 269
 270        cpu_do_split(new_split_mode);
 271
 272        return true;
 273}
 274
 275void update_subcore_sibling_mask(void)
 276{
 277        int cpu;
 278        /*
 279         * sibling mask for the first cpu. Left shift this by required bits
 280         * to get sibling mask for the rest of the cpus.
 281         */
 282        int sibling_mask_first_cpu =  (1 << threads_per_subcore) - 1;
 283
 284        for_each_possible_cpu(cpu) {
 285                int tid = cpu_thread_in_core(cpu);
 286                int offset = (tid / threads_per_subcore) * threads_per_subcore;
 287                int mask = sibling_mask_first_cpu << offset;
 288
 289                paca_ptrs[cpu]->subcore_sibling_mask = mask;
 290
 291        }
 292}
 293
 294static int cpu_update_split_mode(void *data)
 295{
 296        int cpu, new_mode = *(int *)data;
 297
 298        if (this_cpu_ptr(&split_state)->master) {
 299                new_split_mode = new_mode;
 300                smp_wmb();
 301
 302                cpumask_andnot(cpu_offline_mask, cpu_present_mask,
 303                               cpu_online_mask);
 304
 305                /* This should work even though the cpu is offline */
 306                for_each_cpu(cpu, cpu_offline_mask)
 307                        smp_send_reschedule(cpu);
 308        }
 309
 310        cpu_do_split(new_mode);
 311
 312        if (this_cpu_ptr(&split_state)->master) {
 313                /* Wait for all cpus to finish before we touch subcores_per_core */
 314                for_each_present_cpu(cpu) {
 315                        if (cpu >= setup_max_cpus)
 316                                break;
 317
 318                        while(per_cpu(split_state, cpu).step < SYNC_STEP_FINISHED)
 319                                barrier();
 320                }
 321
 322                new_split_mode = 0;
 323
 324                /* Make the new mode public */
 325                subcores_per_core = new_mode;
 326                threads_per_subcore = threads_per_core / subcores_per_core;
 327                update_subcore_sibling_mask();
 328
 329                /* Make sure the new mode is written before we exit */
 330                mb();
 331        }
 332
 333        return 0;
 334}
 335
 336static int set_subcores_per_core(int new_mode)
 337{
 338        struct split_state *state;
 339        int cpu;
 340
 341        if (kvm_hv_mode_active()) {
 342                pr_err("Unable to change split core mode while KVM active.\n");
 343                return -EBUSY;
 344        }
 345
 346        /*
 347         * We are only called at boot, or from the sysfs write. If that ever
 348         * changes we'll need a lock here.
 349         */
 350        BUG_ON(new_mode < 1 || new_mode > 4 || new_mode == 3);
 351
 352        for_each_present_cpu(cpu) {
 353                state = &per_cpu(split_state, cpu);
 354                state->step = SYNC_STEP_INITIAL;
 355                state->master = 0;
 356        }
 357
 358        cpus_read_lock();
 359
 360        /* This cpu will update the globals before exiting stop machine */
 361        this_cpu_ptr(&split_state)->master = 1;
 362
 363        /* Ensure state is consistent before we call the other cpus */
 364        mb();
 365
 366        stop_machine_cpuslocked(cpu_update_split_mode, &new_mode,
 367                                cpu_online_mask);
 368
 369        cpus_read_unlock();
 370
 371        return 0;
 372}
 373
 374static ssize_t __used store_subcores_per_core(struct device *dev,
 375                struct device_attribute *attr, const char *buf,
 376                size_t count)
 377{
 378        unsigned long val;
 379        int rc;
 380
 381        /* We are serialised by the attribute lock */
 382
 383        rc = sscanf(buf, "%lx", &val);
 384        if (rc != 1)
 385                return -EINVAL;
 386
 387        switch (val) {
 388        case 1:
 389        case 2:
 390        case 4:
 391                if (subcores_per_core == val)
 392                        /* Nothing to do */
 393                        goto out;
 394                break;
 395        default:
 396                return -EINVAL;
 397        }
 398
 399        rc = set_subcores_per_core(val);
 400        if (rc)
 401                return rc;
 402
 403out:
 404        return count;
 405}
 406
 407static ssize_t show_subcores_per_core(struct device *dev,
 408                struct device_attribute *attr, char *buf)
 409{
 410        return sprintf(buf, "%x\n", subcores_per_core);
 411}
 412
 413static DEVICE_ATTR(subcores_per_core, 0644,
 414                show_subcores_per_core, store_subcores_per_core);
 415
 416static int subcore_init(void)
 417{
 418        unsigned pvr_ver;
 419
 420        pvr_ver = PVR_VER(mfspr(SPRN_PVR));
 421
 422        if (pvr_ver != PVR_POWER8 &&
 423            pvr_ver != PVR_POWER8E &&
 424            pvr_ver != PVR_POWER8NVL)
 425                return 0;
 426
 427        /*
 428         * We need all threads in a core to be present to split/unsplit so
 429         * continue only if max_cpus are aligned to threads_per_core.
 430         */
 431        if (setup_max_cpus % threads_per_core)
 432                return 0;
 433
 434        BUG_ON(!alloc_cpumask_var(&cpu_offline_mask, GFP_KERNEL));
 435
 436        set_subcores_per_core(1);
 437
 438        return device_create_file(cpu_subsys.dev_root,
 439                                  &dev_attr_subcores_per_core);
 440}
 441machine_device_initcall(powernv, subcore_init);
 442