uboot/arch/x86/cpu/mp_init.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * Copyright (C) 2015 Google, Inc
   4 *
   5 * Based on code from the coreboot file of the same name
   6 */
   7
   8#include <common.h>
   9#include <cpu.h>
  10#include <dm.h>
  11#include <errno.h>
  12#include <malloc.h>
  13#include <qfw.h>
  14#include <asm/atomic.h>
  15#include <asm/cpu.h>
  16#include <asm/interrupt.h>
  17#include <asm/lapic.h>
  18#include <asm/microcode.h>
  19#include <asm/mp.h>
  20#include <asm/msr.h>
  21#include <asm/mtrr.h>
  22#include <asm/processor.h>
  23#include <asm/sipi.h>
  24#include <dm/device-internal.h>
  25#include <dm/uclass-internal.h>
  26#include <dm/lists.h>
  27#include <dm/root.h>
  28#include <linux/linkage.h>
  29
  30DECLARE_GLOBAL_DATA_PTR;
  31
  32/* Total CPUs include BSP */
  33static int num_cpus;
  34
  35/* This also needs to match the sipi.S assembly code for saved MSR encoding */
  36struct saved_msr {
  37        uint32_t index;
  38        uint32_t lo;
  39        uint32_t hi;
  40} __packed;
  41
  42
  43struct mp_flight_plan {
  44        int num_records;
  45        struct mp_flight_record *records;
  46};
  47
  48static struct mp_flight_plan mp_info;
  49
  50struct cpu_map {
  51        struct udevice *dev;
  52        int apic_id;
  53        int err_code;
  54};
  55
  56static inline void barrier_wait(atomic_t *b)
  57{
  58        while (atomic_read(b) == 0)
  59                asm("pause");
  60        mfence();
  61}
  62
  63static inline void release_barrier(atomic_t *b)
  64{
  65        mfence();
  66        atomic_set(b, 1);
  67}
  68
  69static inline void stop_this_cpu(void)
  70{
  71        /* Called by an AP when it is ready to halt and wait for a new task */
  72        for (;;)
  73                cpu_hlt();
  74}
  75
  76/* Returns 1 if timeout waiting for APs. 0 if target APs found */
  77static int wait_for_aps(atomic_t *val, int target, int total_delay,
  78                        int delay_step)
  79{
  80        int timeout = 0;
  81        int delayed = 0;
  82
  83        while (atomic_read(val) != target) {
  84                udelay(delay_step);
  85                delayed += delay_step;
  86                if (delayed >= total_delay) {
  87                        timeout = 1;
  88                        break;
  89                }
  90        }
  91
  92        return timeout;
  93}
  94
  95static void ap_do_flight_plan(struct udevice *cpu)
  96{
  97        int i;
  98
  99        for (i = 0; i < mp_info.num_records; i++) {
 100                struct mp_flight_record *rec = &mp_info.records[i];
 101
 102                atomic_inc(&rec->cpus_entered);
 103                barrier_wait(&rec->barrier);
 104
 105                if (rec->ap_call != NULL)
 106                        rec->ap_call(cpu, rec->ap_arg);
 107        }
 108}
 109
 110static int find_cpu_by_apic_id(int apic_id, struct udevice **devp)
 111{
 112        struct udevice *dev;
 113
 114        *devp = NULL;
 115        for (uclass_find_first_device(UCLASS_CPU, &dev);
 116             dev;
 117             uclass_find_next_device(&dev)) {
 118                struct cpu_platdata *plat = dev_get_parent_platdata(dev);
 119
 120                if (plat->cpu_id == apic_id) {
 121                        *devp = dev;
 122                        return 0;
 123                }
 124        }
 125
 126        return -ENOENT;
 127}
 128
 129/*
 130 * By the time APs call ap_init() caching has been setup, and microcode has
 131 * been loaded
 132 */
 133static void ap_init(unsigned int cpu_index)
 134{
 135        struct udevice *dev;
 136        int apic_id;
 137        int ret;
 138
 139        /* Ensure the local apic is enabled */
 140        enable_lapic();
 141
 142        apic_id = lapicid();
 143        ret = find_cpu_by_apic_id(apic_id, &dev);
 144        if (ret) {
 145                debug("Unknown CPU apic_id %x\n", apic_id);
 146                goto done;
 147        }
 148
 149        debug("AP: slot %d apic_id %x, dev %s\n", cpu_index, apic_id,
 150              dev ? dev->name : "(apic_id not found)");
 151
 152        /* Walk the flight plan */
 153        ap_do_flight_plan(dev);
 154
 155        /* Park the AP */
 156        debug("parking\n");
 157done:
 158        stop_this_cpu();
 159}
 160
 161static const unsigned int fixed_mtrrs[NUM_FIXED_MTRRS] = {
 162        MTRR_FIX_64K_00000_MSR, MTRR_FIX_16K_80000_MSR, MTRR_FIX_16K_A0000_MSR,
 163        MTRR_FIX_4K_C0000_MSR, MTRR_FIX_4K_C8000_MSR, MTRR_FIX_4K_D0000_MSR,
 164        MTRR_FIX_4K_D8000_MSR, MTRR_FIX_4K_E0000_MSR, MTRR_FIX_4K_E8000_MSR,
 165        MTRR_FIX_4K_F0000_MSR, MTRR_FIX_4K_F8000_MSR,
 166};
 167
 168static inline struct saved_msr *save_msr(int index, struct saved_msr *entry)
 169{
 170        msr_t msr;
 171
 172        msr = msr_read(index);
 173        entry->index = index;
 174        entry->lo = msr.lo;
 175        entry->hi = msr.hi;
 176
 177        /* Return the next entry */
 178        entry++;
 179        return entry;
 180}
 181
 182static int save_bsp_msrs(char *start, int size)
 183{
 184        int msr_count;
 185        int num_var_mtrrs;
 186        struct saved_msr *msr_entry;
 187        int i;
 188        msr_t msr;
 189
 190        /* Determine number of MTRRs need to be saved */
 191        msr = msr_read(MTRR_CAP_MSR);
 192        num_var_mtrrs = msr.lo & 0xff;
 193
 194        /* 2 * num_var_mtrrs for base and mask. +1 for IA32_MTRR_DEF_TYPE */
 195        msr_count = 2 * num_var_mtrrs + NUM_FIXED_MTRRS + 1;
 196
 197        if ((msr_count * sizeof(struct saved_msr)) > size) {
 198                printf("Cannot mirror all %d msrs\n", msr_count);
 199                return -ENOSPC;
 200        }
 201
 202        msr_entry = (void *)start;
 203        for (i = 0; i < NUM_FIXED_MTRRS; i++)
 204                msr_entry = save_msr(fixed_mtrrs[i], msr_entry);
 205
 206        for (i = 0; i < num_var_mtrrs; i++) {
 207                msr_entry = save_msr(MTRR_PHYS_BASE_MSR(i), msr_entry);
 208                msr_entry = save_msr(MTRR_PHYS_MASK_MSR(i), msr_entry);
 209        }
 210
 211        msr_entry = save_msr(MTRR_DEF_TYPE_MSR, msr_entry);
 212
 213        return msr_count;
 214}
 215
 216static int load_sipi_vector(atomic_t **ap_countp, int num_cpus)
 217{
 218        struct sipi_params_16bit *params16;
 219        struct sipi_params *params;
 220        static char msr_save[512];
 221        char *stack;
 222        ulong addr;
 223        int code_len;
 224        int size;
 225        int ret;
 226
 227        /* Copy in the code */
 228        code_len = ap_start16_code_end - ap_start16;
 229        debug("Copying SIPI code to %x: %d bytes\n", AP_DEFAULT_BASE,
 230              code_len);
 231        memcpy((void *)AP_DEFAULT_BASE, ap_start16, code_len);
 232
 233        addr = AP_DEFAULT_BASE + (ulong)sipi_params_16bit - (ulong)ap_start16;
 234        params16 = (struct sipi_params_16bit *)addr;
 235        params16->ap_start = (uint32_t)ap_start;
 236        params16->gdt = (uint32_t)gd->arch.gdt;
 237        params16->gdt_limit = X86_GDT_SIZE - 1;
 238        debug("gdt = %x, gdt_limit = %x\n", params16->gdt, params16->gdt_limit);
 239
 240        params = (struct sipi_params *)sipi_params;
 241        debug("SIPI 32-bit params at %p\n", params);
 242        params->idt_ptr = (uint32_t)x86_get_idt();
 243
 244        params->stack_size = CONFIG_AP_STACK_SIZE;
 245        size = params->stack_size * num_cpus;
 246        stack = memalign(4096, size);
 247        if (!stack)
 248                return -ENOMEM;
 249        params->stack_top = (u32)(stack + size);
 250#if !defined(CONFIG_QEMU) && !defined(CONFIG_HAVE_FSP) && \
 251        !defined(CONFIG_INTEL_MID)
 252        params->microcode_ptr = ucode_base;
 253        debug("Microcode at %x\n", params->microcode_ptr);
 254#endif
 255        params->msr_table_ptr = (u32)msr_save;
 256        ret = save_bsp_msrs(msr_save, sizeof(msr_save));
 257        if (ret < 0)
 258                return ret;
 259        params->msr_count = ret;
 260
 261        params->c_handler = (uint32_t)&ap_init;
 262
 263        *ap_countp = &params->ap_count;
 264        atomic_set(*ap_countp, 0);
 265        debug("SIPI vector is ready\n");
 266
 267        return 0;
 268}
 269
 270static int check_cpu_devices(int expected_cpus)
 271{
 272        int i;
 273
 274        for (i = 0; i < expected_cpus; i++) {
 275                struct udevice *dev;
 276                int ret;
 277
 278                ret = uclass_find_device(UCLASS_CPU, i, &dev);
 279                if (ret) {
 280                        debug("Cannot find CPU %d in device tree\n", i);
 281                        return ret;
 282                }
 283        }
 284
 285        return 0;
 286}
 287
 288/* Returns 1 for timeout. 0 on success */
 289static int apic_wait_timeout(int total_delay, const char *msg)
 290{
 291        int total = 0;
 292
 293        if (!(lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY))
 294                return 0;
 295
 296        debug("Waiting for %s...", msg);
 297        while (lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY) {
 298                udelay(50);
 299                total += 50;
 300                if (total >= total_delay) {
 301                        debug("timed out: aborting\n");
 302                        return -ETIMEDOUT;
 303                }
 304        }
 305        debug("done\n");
 306
 307        return 0;
 308}
 309
 310static int start_aps(int ap_count, atomic_t *num_aps)
 311{
 312        int sipi_vector;
 313        /* Max location is 4KiB below 1MiB */
 314        const int max_vector_loc = ((1 << 20) - (1 << 12)) >> 12;
 315
 316        if (ap_count == 0)
 317                return 0;
 318
 319        /* The vector is sent as a 4k aligned address in one byte */
 320        sipi_vector = AP_DEFAULT_BASE >> 12;
 321
 322        if (sipi_vector > max_vector_loc) {
 323                printf("SIPI vector too large! 0x%08x\n",
 324                       sipi_vector);
 325                return -1;
 326        }
 327
 328        debug("Attempting to start %d APs\n", ap_count);
 329
 330        if (apic_wait_timeout(1000, "ICR not to be busy"))
 331                return -ETIMEDOUT;
 332
 333        /* Send INIT IPI to all but self */
 334        lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
 335        lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
 336                    LAPIC_DM_INIT);
 337        debug("Waiting for 10ms after sending INIT\n");
 338        mdelay(10);
 339
 340        /* Send 1st SIPI */
 341        if (apic_wait_timeout(1000, "ICR not to be busy"))
 342                return -ETIMEDOUT;
 343
 344        lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
 345        lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
 346                    LAPIC_DM_STARTUP | sipi_vector);
 347        if (apic_wait_timeout(10000, "first SIPI to complete"))
 348                return -ETIMEDOUT;
 349
 350        /* Wait for CPUs to check in up to 200 us */
 351        wait_for_aps(num_aps, ap_count, 200, 15);
 352
 353        /* Send 2nd SIPI */
 354        if (apic_wait_timeout(1000, "ICR not to be busy"))
 355                return -ETIMEDOUT;
 356
 357        lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
 358        lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
 359                    LAPIC_DM_STARTUP | sipi_vector);
 360        if (apic_wait_timeout(10000, "second SIPI to complete"))
 361                return -ETIMEDOUT;
 362
 363        /* Wait for CPUs to check in */
 364        if (wait_for_aps(num_aps, ap_count, 10000, 50)) {
 365                debug("Not all APs checked in: %d/%d\n",
 366                      atomic_read(num_aps), ap_count);
 367                return -1;
 368        }
 369
 370        return 0;
 371}
 372
 373static int bsp_do_flight_plan(struct udevice *cpu, struct mp_params *mp_params)
 374{
 375        int i;
 376        int ret = 0;
 377        const int timeout_us = 100000;
 378        const int step_us = 100;
 379        int num_aps = num_cpus - 1;
 380
 381        for (i = 0; i < mp_params->num_records; i++) {
 382                struct mp_flight_record *rec = &mp_params->flight_plan[i];
 383
 384                /* Wait for APs if the record is not released */
 385                if (atomic_read(&rec->barrier) == 0) {
 386                        /* Wait for the APs to check in */
 387                        if (wait_for_aps(&rec->cpus_entered, num_aps,
 388                                         timeout_us, step_us)) {
 389                                debug("MP record %d timeout\n", i);
 390                                ret = -1;
 391                        }
 392                }
 393
 394                if (rec->bsp_call != NULL)
 395                        rec->bsp_call(cpu, rec->bsp_arg);
 396
 397                release_barrier(&rec->barrier);
 398        }
 399        return ret;
 400}
 401
 402static int init_bsp(struct udevice **devp)
 403{
 404        char processor_name[CPU_MAX_NAME_LEN];
 405        int apic_id;
 406        int ret;
 407
 408        cpu_get_name(processor_name);
 409        debug("CPU: %s\n", processor_name);
 410
 411        apic_id = lapicid();
 412        ret = find_cpu_by_apic_id(apic_id, devp);
 413        if (ret) {
 414                printf("Cannot find boot CPU, APIC ID %d\n", apic_id);
 415                return ret;
 416        }
 417
 418        return 0;
 419}
 420
 421#ifdef CONFIG_QFW
 422static int qemu_cpu_fixup(void)
 423{
 424        int ret;
 425        int cpu_num;
 426        int cpu_online;
 427        struct udevice *dev, *pdev;
 428        struct cpu_platdata *plat;
 429        char *cpu;
 430
 431        /* first we need to find '/cpus' */
 432        for (device_find_first_child(dm_root(), &pdev);
 433             pdev;
 434             device_find_next_child(&pdev)) {
 435                if (!strcmp(pdev->name, "cpus"))
 436                        break;
 437        }
 438        if (!pdev) {
 439                printf("unable to find cpus device\n");
 440                return -ENODEV;
 441        }
 442
 443        /* calculate cpus that are already bound */
 444        cpu_num = 0;
 445        for (uclass_find_first_device(UCLASS_CPU, &dev);
 446             dev;
 447             uclass_find_next_device(&dev)) {
 448                cpu_num++;
 449        }
 450
 451        /* get actual cpu number */
 452        cpu_online = qemu_fwcfg_online_cpus();
 453        if (cpu_online < 0) {
 454                printf("unable to get online cpu number: %d\n", cpu_online);
 455                return cpu_online;
 456        }
 457
 458        /* bind addtional cpus */
 459        dev = NULL;
 460        for (; cpu_num < cpu_online; cpu_num++) {
 461                /*
 462                 * allocate device name here as device_bind_driver() does
 463                 * not copy device name, 8 bytes are enough for
 464                 * sizeof("cpu@") + 3 digits cpu number + '\0'
 465                 */
 466                cpu = malloc(8);
 467                if (!cpu) {
 468                        printf("unable to allocate device name\n");
 469                        return -ENOMEM;
 470                }
 471                sprintf(cpu, "cpu@%d", cpu_num);
 472                ret = device_bind_driver(pdev, "cpu_qemu", cpu, &dev);
 473                if (ret) {
 474                        printf("binding cpu@%d failed: %d\n", cpu_num, ret);
 475                        return ret;
 476                }
 477                plat = dev_get_parent_platdata(dev);
 478                plat->cpu_id = cpu_num;
 479        }
 480        return 0;
 481}
 482#endif
 483
 484int mp_init(struct mp_params *p)
 485{
 486        int num_aps;
 487        atomic_t *ap_count;
 488        struct udevice *cpu;
 489        int ret;
 490
 491        /* This will cause the CPUs devices to be bound */
 492        struct uclass *uc;
 493        ret = uclass_get(UCLASS_CPU, &uc);
 494        if (ret)
 495                return ret;
 496
 497#ifdef CONFIG_QFW
 498        ret = qemu_cpu_fixup();
 499        if (ret)
 500                return ret;
 501#endif
 502
 503        ret = init_bsp(&cpu);
 504        if (ret) {
 505                debug("Cannot init boot CPU: err=%d\n", ret);
 506                return ret;
 507        }
 508
 509        if (p == NULL || p->flight_plan == NULL || p->num_records < 1) {
 510                printf("Invalid MP parameters\n");
 511                return -1;
 512        }
 513
 514        num_cpus = cpu_get_count(cpu);
 515        if (num_cpus < 0) {
 516                debug("Cannot get number of CPUs: err=%d\n", num_cpus);
 517                return num_cpus;
 518        }
 519
 520        if (num_cpus < 2)
 521                debug("Warning: Only 1 CPU is detected\n");
 522
 523        ret = check_cpu_devices(num_cpus);
 524        if (ret)
 525                debug("Warning: Device tree does not describe all CPUs. Extra ones will not be started correctly\n");
 526
 527        /* Copy needed parameters so that APs have a reference to the plan */
 528        mp_info.num_records = p->num_records;
 529        mp_info.records = p->flight_plan;
 530
 531        /* Load the SIPI vector */
 532        ret = load_sipi_vector(&ap_count, num_cpus);
 533        if (ap_count == NULL)
 534                return -1;
 535
 536        /*
 537         * Make sure SIPI data hits RAM so the APs that come up will see
 538         * the startup code even if the caches are disabled
 539         */
 540        wbinvd();
 541
 542        /* Start the APs providing number of APs and the cpus_entered field */
 543        num_aps = num_cpus - 1;
 544        ret = start_aps(num_aps, ap_count);
 545        if (ret) {
 546                mdelay(1000);
 547                debug("%d/%d eventually checked in?\n", atomic_read(ap_count),
 548                      num_aps);
 549                return ret;
 550        }
 551
 552        /* Walk the flight plan for the BSP */
 553        ret = bsp_do_flight_plan(cpu, p);
 554        if (ret) {
 555                debug("CPU init failed: err=%d\n", ret);
 556                return ret;
 557        }
 558
 559        return 0;
 560}
 561
 562int mp_init_cpu(struct udevice *cpu, void *unused)
 563{
 564        struct cpu_platdata *plat = dev_get_parent_platdata(cpu);
 565
 566        /*
 567         * Multiple APs are brought up simultaneously and they may get the same
 568         * seq num in the uclass_resolve_seq() during device_probe(). To avoid
 569         * this, set req_seq to the reg number in the device tree in advance.
 570         */
 571        cpu->req_seq = fdtdec_get_int(gd->fdt_blob, dev_of_offset(cpu), "reg",
 572                                      -1);
 573        plat->ucode_version = microcode_read_rev();
 574        plat->device_id = gd->arch.x86_device;
 575
 576        return device_probe(cpu);
 577}
 578