linux/arch/x86/kernel/cpu/intel.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/kernel.h>
   3#include <linux/pgtable.h>
   4
   5#include <linux/string.h>
   6#include <linux/bitops.h>
   7#include <linux/smp.h>
   8#include <linux/sched.h>
   9#include <linux/sched/clock.h>
  10#include <linux/thread_info.h>
  11#include <linux/init.h>
  12#include <linux/uaccess.h>
  13#include <linux/delay.h>
  14
  15#include <asm/cpufeature.h>
  16#include <asm/msr.h>
  17#include <asm/bugs.h>
  18#include <asm/cpu.h>
  19#include <asm/intel-family.h>
  20#include <asm/microcode_intel.h>
  21#include <asm/hwcap2.h>
  22#include <asm/elf.h>
  23#include <asm/cpu_device_id.h>
  24#include <asm/cmdline.h>
  25#include <asm/traps.h>
  26#include <asm/resctrl.h>
  27#include <asm/numa.h>
  28#include <asm/thermal.h>
  29
  30#ifdef CONFIG_X86_64
  31#include <linux/topology.h>
  32#endif
  33
  34#include "cpu.h"
  35
  36#ifdef CONFIG_X86_LOCAL_APIC
  37#include <asm/mpspec.h>
  38#include <asm/apic.h>
  39#endif
  40
  41enum split_lock_detect_state {
  42        sld_off = 0,
  43        sld_warn,
  44        sld_fatal,
  45        sld_ratelimit,
  46};
  47
  48/*
  49 * Default to sld_off because most systems do not support split lock detection.
  50 * sld_state_setup() will switch this to sld_warn on systems that support
  51 * split lock/bus lock detect, unless there is a command line override.
  52 */
  53static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
  54static u64 msr_test_ctrl_cache __ro_after_init;
  55
  56/*
  57 * With a name like MSR_TEST_CTL it should go without saying, but don't touch
  58 * MSR_TEST_CTL unless the CPU is one of the whitelisted models.  Writing it
  59 * on CPUs that do not support SLD can cause fireworks, even when writing '0'.
  60 */
  61static bool cpu_model_supports_sld __ro_after_init;
  62
  63/*
  64 * Processors which have self-snooping capability can handle conflicting
  65 * memory type across CPUs by snooping its own cache. However, there exists
  66 * CPU models in which having conflicting memory types still leads to
  67 * unpredictable behavior, machine check errors, or hangs. Clear this
  68 * feature to prevent its use on machines with known erratas.
  69 */
  70static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c)
  71{
  72        switch (c->x86_model) {
  73        case INTEL_FAM6_CORE_YONAH:
  74        case INTEL_FAM6_CORE2_MEROM:
  75        case INTEL_FAM6_CORE2_MEROM_L:
  76        case INTEL_FAM6_CORE2_PENRYN:
  77        case INTEL_FAM6_CORE2_DUNNINGTON:
  78        case INTEL_FAM6_NEHALEM:
  79        case INTEL_FAM6_NEHALEM_G:
  80        case INTEL_FAM6_NEHALEM_EP:
  81        case INTEL_FAM6_NEHALEM_EX:
  82        case INTEL_FAM6_WESTMERE:
  83        case INTEL_FAM6_WESTMERE_EP:
  84        case INTEL_FAM6_SANDYBRIDGE:
  85                setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP);
  86        }
  87}
  88
  89static bool ring3mwait_disabled __read_mostly;
  90
  91static int __init ring3mwait_disable(char *__unused)
  92{
  93        ring3mwait_disabled = true;
  94        return 0;
  95}
  96__setup("ring3mwait=disable", ring3mwait_disable);
  97
  98static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
  99{
 100        /*
 101         * Ring 3 MONITOR/MWAIT feature cannot be detected without
 102         * cpu model and family comparison.
 103         */
 104        if (c->x86 != 6)
 105                return;
 106        switch (c->x86_model) {
 107        case INTEL_FAM6_XEON_PHI_KNL:
 108        case INTEL_FAM6_XEON_PHI_KNM:
 109                break;
 110        default:
 111                return;
 112        }
 113
 114        if (ring3mwait_disabled)
 115                return;
 116
 117        set_cpu_cap(c, X86_FEATURE_RING3MWAIT);
 118        this_cpu_or(msr_misc_features_shadow,
 119                    1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT);
 120
 121        if (c == &boot_cpu_data)
 122                ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
 123}
 124
 125/*
 126 * Early microcode releases for the Spectre v2 mitigation were broken.
 127 * Information taken from;
 128 * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf
 129 * - https://kb.vmware.com/s/article/52345
 130 * - Microcode revisions observed in the wild
 131 * - Release note from 20180108 microcode release
 132 */
 133struct sku_microcode {
 134        u8 model;
 135        u8 stepping;
 136        u32 microcode;
 137};
 138static const struct sku_microcode spectre_bad_microcodes[] = {
 139        { INTEL_FAM6_KABYLAKE,          0x0B,   0x80 },
 140        { INTEL_FAM6_KABYLAKE,          0x0A,   0x80 },
 141        { INTEL_FAM6_KABYLAKE,          0x09,   0x80 },
 142        { INTEL_FAM6_KABYLAKE_L,        0x0A,   0x80 },
 143        { INTEL_FAM6_KABYLAKE_L,        0x09,   0x80 },
 144        { INTEL_FAM6_SKYLAKE_X,         0x03,   0x0100013e },
 145        { INTEL_FAM6_SKYLAKE_X,         0x04,   0x0200003c },
 146        { INTEL_FAM6_BROADWELL,         0x04,   0x28 },
 147        { INTEL_FAM6_BROADWELL_G,       0x01,   0x1b },
 148        { INTEL_FAM6_BROADWELL_D,       0x02,   0x14 },
 149        { INTEL_FAM6_BROADWELL_D,       0x03,   0x07000011 },
 150        { INTEL_FAM6_BROADWELL_X,       0x01,   0x0b000025 },
 151        { INTEL_FAM6_HASWELL_L,         0x01,   0x21 },
 152        { INTEL_FAM6_HASWELL_G,         0x01,   0x18 },
 153        { INTEL_FAM6_HASWELL,           0x03,   0x23 },
 154        { INTEL_FAM6_HASWELL_X,         0x02,   0x3b },
 155        { INTEL_FAM6_HASWELL_X,         0x04,   0x10 },
 156        { INTEL_FAM6_IVYBRIDGE_X,       0x04,   0x42a },
 157        /* Observed in the wild */
 158        { INTEL_FAM6_SANDYBRIDGE_X,     0x06,   0x61b },
 159        { INTEL_FAM6_SANDYBRIDGE_X,     0x07,   0x712 },
 160};
 161
 162static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
 163{
 164        int i;
 165
 166        /*
 167         * We know that the hypervisor lie to us on the microcode version so
 168         * we may as well hope that it is running the correct version.
 169         */
 170        if (cpu_has(c, X86_FEATURE_HYPERVISOR))
 171                return false;
 172
 173        if (c->x86 != 6)
 174                return false;
 175
 176        for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
 177                if (c->x86_model == spectre_bad_microcodes[i].model &&
 178                    c->x86_stepping == spectre_bad_microcodes[i].stepping)
 179                        return (c->microcode <= spectre_bad_microcodes[i].microcode);
 180        }
 181        return false;
 182}
 183
 184static void early_init_intel(struct cpuinfo_x86 *c)
 185{
 186        u64 misc_enable;
 187
 188        /* Unmask CPUID levels if masked: */
 189        if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
 190                if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
 191                                  MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) {
 192                        c->cpuid_level = cpuid_eax(0);
 193                        get_cpu_cap(c);
 194                }
 195        }
 196
 197        if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
 198                (c->x86 == 0x6 && c->x86_model >= 0x0e))
 199                set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 200
 201        if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
 202                c->microcode = intel_get_microcode_revision();
 203
 204        /* Now if any of them are set, check the blacklist and clear the lot */
 205        if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
 206             cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
 207             cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
 208             cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
 209                pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
 210                setup_clear_cpu_cap(X86_FEATURE_IBRS);
 211                setup_clear_cpu_cap(X86_FEATURE_IBPB);
 212                setup_clear_cpu_cap(X86_FEATURE_STIBP);
 213                setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
 214                setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL);
 215                setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
 216                setup_clear_cpu_cap(X86_FEATURE_SSBD);
 217                setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD);
 218        }
 219
 220        /*
 221         * Atom erratum AAE44/AAF40/AAG38/AAH41:
 222         *
 223         * A race condition between speculative fetches and invalidating
 224         * a large page.  This is worked around in microcode, but we
 225         * need the microcode to have already been loaded... so if it is
 226         * not, recommend a BIOS update and disable large pages.
 227         */
 228        if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 &&
 229            c->microcode < 0x20e) {
 230                pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
 231                clear_cpu_cap(c, X86_FEATURE_PSE);
 232        }
 233
 234#ifdef CONFIG_X86_64
 235        set_cpu_cap(c, X86_FEATURE_SYSENTER32);
 236#else
 237        /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
 238        if (c->x86 == 15 && c->x86_cache_alignment == 64)
 239                c->x86_cache_alignment = 128;
 240#endif
 241
 242        /* CPUID workaround for 0F33/0F34 CPU */
 243        if (c->x86 == 0xF && c->x86_model == 0x3
 244            && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4))
 245                c->x86_phys_bits = 36;
 246
 247        /*
 248         * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
 249         * with P/T states and does not stop in deep C-states.
 250         *
 251         * It is also reliable across cores and sockets. (but not across
 252         * cabinets - we turn it off in that case explicitly.)
 253         */
 254        if (c->x86_power & (1 << 8)) {
 255                set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 256                set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
 257        }
 258
 259        /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
 260        if (c->x86 == 6) {
 261                switch (c->x86_model) {
 262                case INTEL_FAM6_ATOM_SALTWELL_MID:
 263                case INTEL_FAM6_ATOM_SALTWELL_TABLET:
 264                case INTEL_FAM6_ATOM_SILVERMONT_MID:
 265                case INTEL_FAM6_ATOM_AIRMONT_NP:
 266                        set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3);
 267                        break;
 268                default:
 269                        break;
 270                }
 271        }
 272
 273        /*
 274         * There is a known erratum on Pentium III and Core Solo
 275         * and Core Duo CPUs.
 276         * " Page with PAT set to WC while associated MTRR is UC
 277         *   may consolidate to UC "
 278         * Because of this erratum, it is better to stick with
 279         * setting WC in MTRR rather than using PAT on these CPUs.
 280         *
 281         * Enable PAT WC only on P4, Core 2 or later CPUs.
 282         */
 283        if (c->x86 == 6 && c->x86_model < 15)
 284                clear_cpu_cap(c, X86_FEATURE_PAT);
 285
 286        /*
 287         * If fast string is not enabled in IA32_MISC_ENABLE for any reason,
 288         * clear the fast string and enhanced fast string CPU capabilities.
 289         */
 290        if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
 291                rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
 292                if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
 293                        pr_info("Disabled fast string operations\n");
 294                        setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
 295                        setup_clear_cpu_cap(X86_FEATURE_ERMS);
 296                }
 297        }
 298
 299        /*
 300         * Intel Quark Core DevMan_001.pdf section 6.4.11
 301         * "The operating system also is required to invalidate (i.e., flush)
 302         *  the TLB when any changes are made to any of the page table entries.
 303         *  The operating system must reload CR3 to cause the TLB to be flushed"
 304         *
 305         * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h
 306         * should be false so that __flush_tlb_all() causes CR3 instead of CR4.PGE
 307         * to be modified.
 308         */
 309        if (c->x86 == 5 && c->x86_model == 9) {
 310                pr_info("Disabling PGE capability bit\n");
 311                setup_clear_cpu_cap(X86_FEATURE_PGE);
 312        }
 313
 314        if (c->cpuid_level >= 0x00000001) {
 315                u32 eax, ebx, ecx, edx;
 316
 317                cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
 318                /*
 319                 * If HTT (EDX[28]) is set EBX[16:23] contain the number of
 320                 * apicids which are reserved per package. Store the resulting
 321                 * shift value for the package management code.
 322                 */
 323                if (edx & (1U << 28))
 324                        c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
 325        }
 326
 327        check_memory_type_self_snoop_errata(c);
 328
 329        /*
 330         * Get the number of SMT siblings early from the extended topology
 331         * leaf, if available. Otherwise try the legacy SMT detection.
 332         */
 333        if (detect_extended_topology_early(c) < 0)
 334                detect_ht_early(c);
 335}
 336
 337static void bsp_init_intel(struct cpuinfo_x86 *c)
 338{
 339        resctrl_cpu_detect(c);
 340}
 341
 342#ifdef CONFIG_X86_32
 343/*
 344 *      Early probe support logic for ppro memory erratum #50
 345 *
 346 *      This is called before we do cpu ident work
 347 */
 348
 349int ppro_with_ram_bug(void)
 350{
 351        /* Uses data from early_cpu_detect now */
 352        if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
 353            boot_cpu_data.x86 == 6 &&
 354            boot_cpu_data.x86_model == 1 &&
 355            boot_cpu_data.x86_stepping < 8) {
 356                pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
 357                return 1;
 358        }
 359        return 0;
 360}
 361
 362static void intel_smp_check(struct cpuinfo_x86 *c)
 363{
 364        /* calling is from identify_secondary_cpu() ? */
 365        if (!c->cpu_index)
 366                return;
 367
 368        /*
 369         * Mask B, Pentium, but not Pentium MMX
 370         */
 371        if (c->x86 == 5 &&
 372            c->x86_stepping >= 1 && c->x86_stepping <= 4 &&
 373            c->x86_model <= 3) {
 374                /*
 375                 * Remember we have B step Pentia with bugs
 376                 */
 377                WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
 378                                    "with B stepping processors.\n");
 379        }
 380}
 381
 382static int forcepae;
 383static int __init forcepae_setup(char *__unused)
 384{
 385        forcepae = 1;
 386        return 1;
 387}
 388__setup("forcepae", forcepae_setup);
 389
 390static void intel_workarounds(struct cpuinfo_x86 *c)
 391{
 392#ifdef CONFIG_X86_F00F_BUG
 393        /*
 394         * All models of Pentium and Pentium with MMX technology CPUs
 395         * have the F0 0F bug, which lets nonprivileged users lock up the
 396         * system. Announce that the fault handler will be checking for it.
 397         * The Quark is also family 5, but does not have the same bug.
 398         */
 399        clear_cpu_bug(c, X86_BUG_F00F);
 400        if (c->x86 == 5 && c->x86_model < 9) {
 401                static int f00f_workaround_enabled;
 402
 403                set_cpu_bug(c, X86_BUG_F00F);
 404                if (!f00f_workaround_enabled) {
 405                        pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n");
 406                        f00f_workaround_enabled = 1;
 407                }
 408        }
 409#endif
 410
 411        /*
 412         * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
 413         * model 3 mask 3
 414         */
 415        if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633)
 416                clear_cpu_cap(c, X86_FEATURE_SEP);
 417
 418        /*
 419         * PAE CPUID issue: many Pentium M report no PAE but may have a
 420         * functionally usable PAE implementation.
 421         * Forcefully enable PAE if kernel parameter "forcepae" is present.
 422         */
 423        if (forcepae) {
 424                pr_warn("PAE forced!\n");
 425                set_cpu_cap(c, X86_FEATURE_PAE);
 426                add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
 427        }
 428
 429        /*
 430         * P4 Xeon erratum 037 workaround.
 431         * Hardware prefetcher may cause stale data to be loaded into the cache.
 432         */
 433        if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) {
 434                if (msr_set_bit(MSR_IA32_MISC_ENABLE,
 435                                MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) {
 436                        pr_info("CPU: C0 stepping P4 Xeon detected.\n");
 437                        pr_info("CPU: Disabling hardware prefetching (Erratum 037)\n");
 438                }
 439        }
 440
 441        /*
 442         * See if we have a good local APIC by checking for buggy Pentia,
 443         * i.e. all B steppings and the C2 stepping of P54C when using their
 444         * integrated APIC (see 11AP erratum in "Pentium Processor
 445         * Specification Update").
 446         */
 447        if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
 448            (c->x86_stepping < 0x6 || c->x86_stepping == 0xb))
 449                set_cpu_bug(c, X86_BUG_11AP);
 450
 451
 452#ifdef CONFIG_X86_INTEL_USERCOPY
 453        /*
 454         * Set up the preferred alignment for movsl bulk memory moves
 455         */
 456        switch (c->x86) {
 457        case 4:         /* 486: untested */
 458                break;
 459        case 5:         /* Old Pentia: untested */
 460                break;
 461        case 6:         /* PII/PIII only like movsl with 8-byte alignment */
 462                movsl_mask.mask = 7;
 463                break;
 464        case 15:        /* P4 is OK down to 8-byte alignment */
 465                movsl_mask.mask = 7;
 466                break;
 467        }
 468#endif
 469
 470        intel_smp_check(c);
 471}
 472#else
 473static void intel_workarounds(struct cpuinfo_x86 *c)
 474{
 475}
 476#endif
 477
 478static void srat_detect_node(struct cpuinfo_x86 *c)
 479{
 480#ifdef CONFIG_NUMA
 481        unsigned node;
 482        int cpu = smp_processor_id();
 483
 484        /* Don't do the funky fallback heuristics the AMD version employs
 485           for now. */
 486        node = numa_cpu_node(cpu);
 487        if (node == NUMA_NO_NODE || !node_online(node)) {
 488                /* reuse the value from init_cpu_to_node() */
 489                node = cpu_to_node(cpu);
 490        }
 491        numa_set_node(cpu, node);
 492#endif
 493}
 494
 495#define MSR_IA32_TME_ACTIVATE           0x982
 496
 497/* Helpers to access TME_ACTIVATE MSR */
 498#define TME_ACTIVATE_LOCKED(x)          (x & 0x1)
 499#define TME_ACTIVATE_ENABLED(x)         (x & 0x2)
 500
 501#define TME_ACTIVATE_POLICY(x)          ((x >> 4) & 0xf)        /* Bits 7:4 */
 502#define TME_ACTIVATE_POLICY_AES_XTS_128 0
 503
 504#define TME_ACTIVATE_KEYID_BITS(x)      ((x >> 32) & 0xf)       /* Bits 35:32 */
 505
 506#define TME_ACTIVATE_CRYPTO_ALGS(x)     ((x >> 48) & 0xffff)    /* Bits 63:48 */
 507#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
 508
 509/* Values for mktme_status (SW only construct) */
 510#define MKTME_ENABLED                   0
 511#define MKTME_DISABLED                  1
 512#define MKTME_UNINITIALIZED             2
 513static int mktme_status = MKTME_UNINITIALIZED;
 514
 515static void detect_tme(struct cpuinfo_x86 *c)
 516{
 517        u64 tme_activate, tme_policy, tme_crypto_algs;
 518        int keyid_bits = 0, nr_keyids = 0;
 519        static u64 tme_activate_cpu0 = 0;
 520
 521        rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
 522
 523        if (mktme_status != MKTME_UNINITIALIZED) {
 524                if (tme_activate != tme_activate_cpu0) {
 525                        /* Broken BIOS? */
 526                        pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
 527                        pr_err_once("x86/tme: MKTME is not usable\n");
 528                        mktme_status = MKTME_DISABLED;
 529
 530                        /* Proceed. We may need to exclude bits from x86_phys_bits. */
 531                }
 532        } else {
 533                tme_activate_cpu0 = tme_activate;
 534        }
 535
 536        if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
 537                pr_info_once("x86/tme: not enabled by BIOS\n");
 538                mktme_status = MKTME_DISABLED;
 539                return;
 540        }
 541
 542        if (mktme_status != MKTME_UNINITIALIZED)
 543                goto detect_keyid_bits;
 544
 545        pr_info("x86/tme: enabled by BIOS\n");
 546
 547        tme_policy = TME_ACTIVATE_POLICY(tme_activate);
 548        if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
 549                pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
 550
 551        tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
 552        if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
 553                pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
 554                                tme_crypto_algs);
 555                mktme_status = MKTME_DISABLED;
 556        }
 557detect_keyid_bits:
 558        keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
 559        nr_keyids = (1UL << keyid_bits) - 1;
 560        if (nr_keyids) {
 561                pr_info_once("x86/mktme: enabled by BIOS\n");
 562                pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
 563        } else {
 564                pr_info_once("x86/mktme: disabled by BIOS\n");
 565        }
 566
 567        if (mktme_status == MKTME_UNINITIALIZED) {
 568                /* MKTME is usable */
 569                mktme_status = MKTME_ENABLED;
 570        }
 571
 572        /*
 573         * KeyID bits effectively lower the number of physical address
 574         * bits.  Update cpuinfo_x86::x86_phys_bits accordingly.
 575         */
 576        c->x86_phys_bits -= keyid_bits;
 577}
 578
 579static void init_cpuid_fault(struct cpuinfo_x86 *c)
 580{
 581        u64 msr;
 582
 583        if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) {
 584                if (msr & MSR_PLATFORM_INFO_CPUID_FAULT)
 585                        set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
 586        }
 587}
 588
 589static void init_intel_misc_features(struct cpuinfo_x86 *c)
 590{
 591        u64 msr;
 592
 593        if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr))
 594                return;
 595
 596        /* Clear all MISC features */
 597        this_cpu_write(msr_misc_features_shadow, 0);
 598
 599        /* Check features and update capabilities and shadow control bits */
 600        init_cpuid_fault(c);
 601        probe_xeon_phi_r3mwait(c);
 602
 603        msr = this_cpu_read(msr_misc_features_shadow);
 604        wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
 605}
 606
 607static void split_lock_init(void);
 608static void bus_lock_init(void);
 609
 610static void init_intel(struct cpuinfo_x86 *c)
 611{
 612        early_init_intel(c);
 613
 614        intel_workarounds(c);
 615
 616        /*
 617         * Detect the extended topology information if available. This
 618         * will reinitialise the initial_apicid which will be used
 619         * in init_intel_cacheinfo()
 620         */
 621        detect_extended_topology(c);
 622
 623        if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
 624                /*
 625                 * let's use the legacy cpuid vector 0x1 and 0x4 for topology
 626                 * detection.
 627                 */
 628                detect_num_cpu_cores(c);
 629#ifdef CONFIG_X86_32
 630                detect_ht(c);
 631#endif
 632        }
 633
 634        init_intel_cacheinfo(c);
 635
 636        if (c->cpuid_level > 9) {
 637                unsigned eax = cpuid_eax(10);
 638                /* Check for version and the number of counters */
 639                if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
 640                        set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
 641        }
 642
 643        if (cpu_has(c, X86_FEATURE_XMM2))
 644                set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
 645
 646        if (boot_cpu_has(X86_FEATURE_DS)) {
 647                unsigned int l1, l2;
 648
 649                rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
 650                if (!(l1 & (1<<11)))
 651                        set_cpu_cap(c, X86_FEATURE_BTS);
 652                if (!(l1 & (1<<12)))
 653                        set_cpu_cap(c, X86_FEATURE_PEBS);
 654        }
 655
 656        if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) &&
 657            (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
 658                set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
 659
 660        if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_MWAIT) &&
 661                ((c->x86_model == INTEL_FAM6_ATOM_GOLDMONT)))
 662                set_cpu_bug(c, X86_BUG_MONITOR);
 663
 664#ifdef CONFIG_X86_64
 665        if (c->x86 == 15)
 666                c->x86_cache_alignment = c->x86_clflush_size * 2;
 667        if (c->x86 == 6)
 668                set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 669#else
 670        /*
 671         * Names for the Pentium II/Celeron processors
 672         * detectable only by also checking the cache size.
 673         * Dixon is NOT a Celeron.
 674         */
 675        if (c->x86 == 6) {
 676                unsigned int l2 = c->x86_cache_size;
 677                char *p = NULL;
 678
 679                switch (c->x86_model) {
 680                case 5:
 681                        if (l2 == 0)
 682                                p = "Celeron (Covington)";
 683                        else if (l2 == 256)
 684                                p = "Mobile Pentium II (Dixon)";
 685                        break;
 686
 687                case 6:
 688                        if (l2 == 128)
 689                                p = "Celeron (Mendocino)";
 690                        else if (c->x86_stepping == 0 || c->x86_stepping == 5)
 691                                p = "Celeron-A";
 692                        break;
 693
 694                case 8:
 695                        if (l2 == 128)
 696                                p = "Celeron (Coppermine)";
 697                        break;
 698                }
 699
 700                if (p)
 701                        strcpy(c->x86_model_id, p);
 702        }
 703
 704        if (c->x86 == 15)
 705                set_cpu_cap(c, X86_FEATURE_P4);
 706        if (c->x86 == 6)
 707                set_cpu_cap(c, X86_FEATURE_P3);
 708#endif
 709
 710        /* Work around errata */
 711        srat_detect_node(c);
 712
 713        init_ia32_feat_ctl(c);
 714
 715        if (cpu_has(c, X86_FEATURE_TME))
 716                detect_tme(c);
 717
 718        init_intel_misc_features(c);
 719
 720        if (tsx_ctrl_state == TSX_CTRL_ENABLE)
 721                tsx_enable();
 722        else if (tsx_ctrl_state == TSX_CTRL_DISABLE)
 723                tsx_disable();
 724        else if (tsx_ctrl_state == TSX_CTRL_RTM_ALWAYS_ABORT)
 725                tsx_clear_cpuid();
 726
 727        split_lock_init();
 728        bus_lock_init();
 729
 730        intel_init_thermal(c);
 731}
 732
 733#ifdef CONFIG_X86_32
 734static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 735{
 736        /*
 737         * Intel PIII Tualatin. This comes in two flavours.
 738         * One has 256kb of cache, the other 512. We have no way
 739         * to determine which, so we use a boottime override
 740         * for the 512kb model, and assume 256 otherwise.
 741         */
 742        if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0))
 743                size = 256;
 744
 745        /*
 746         * Intel Quark SoC X1000 contains a 4-way set associative
 747         * 16K cache with a 16 byte cache line and 256 lines per tag
 748         */
 749        if ((c->x86 == 5) && (c->x86_model == 9))
 750                size = 16;
 751        return size;
 752}
 753#endif
 754
 755#define TLB_INST_4K     0x01
 756#define TLB_INST_4M     0x02
 757#define TLB_INST_2M_4M  0x03
 758
 759#define TLB_INST_ALL    0x05
 760#define TLB_INST_1G     0x06
 761
 762#define TLB_DATA_4K     0x11
 763#define TLB_DATA_4M     0x12
 764#define TLB_DATA_2M_4M  0x13
 765#define TLB_DATA_4K_4M  0x14
 766
 767#define TLB_DATA_1G     0x16
 768
 769#define TLB_DATA0_4K    0x21
 770#define TLB_DATA0_4M    0x22
 771#define TLB_DATA0_2M_4M 0x23
 772
 773#define STLB_4K         0x41
 774#define STLB_4K_2M      0x42
 775
 776static const struct _tlb_table intel_tlb_table[] = {
 777        { 0x01, TLB_INST_4K,            32,     " TLB_INST 4 KByte pages, 4-way set associative" },
 778        { 0x02, TLB_INST_4M,            2,      " TLB_INST 4 MByte pages, full associative" },
 779        { 0x03, TLB_DATA_4K,            64,     " TLB_DATA 4 KByte pages, 4-way set associative" },
 780        { 0x04, TLB_DATA_4M,            8,      " TLB_DATA 4 MByte pages, 4-way set associative" },
 781        { 0x05, TLB_DATA_4M,            32,     " TLB_DATA 4 MByte pages, 4-way set associative" },
 782        { 0x0b, TLB_INST_4M,            4,      " TLB_INST 4 MByte pages, 4-way set associative" },
 783        { 0x4f, TLB_INST_4K,            32,     " TLB_INST 4 KByte pages" },
 784        { 0x50, TLB_INST_ALL,           64,     " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
 785        { 0x51, TLB_INST_ALL,           128,    " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
 786        { 0x52, TLB_INST_ALL,           256,    " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
 787        { 0x55, TLB_INST_2M_4M,         7,      " TLB_INST 2-MByte or 4-MByte pages, fully associative" },
 788        { 0x56, TLB_DATA0_4M,           16,     " TLB_DATA0 4 MByte pages, 4-way set associative" },
 789        { 0x57, TLB_DATA0_4K,           16,     " TLB_DATA0 4 KByte pages, 4-way associative" },
 790        { 0x59, TLB_DATA0_4K,           16,     " TLB_DATA0 4 KByte pages, fully associative" },
 791        { 0x5a, TLB_DATA0_2M_4M,        32,     " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" },
 792        { 0x5b, TLB_DATA_4K_4M,         64,     " TLB_DATA 4 KByte and 4 MByte pages" },
 793        { 0x5c, TLB_DATA_4K_4M,         128,    " TLB_DATA 4 KByte and 4 MByte pages" },
 794        { 0x5d, TLB_DATA_4K_4M,         256,    " TLB_DATA 4 KByte and 4 MByte pages" },
 795        { 0x61, TLB_INST_4K,            48,     " TLB_INST 4 KByte pages, full associative" },
 796        { 0x63, TLB_DATA_1G,            4,      " TLB_DATA 1 GByte pages, 4-way set associative" },
 797        { 0x6b, TLB_DATA_4K,            256,    " TLB_DATA 4 KByte pages, 8-way associative" },
 798        { 0x6c, TLB_DATA_2M_4M,         128,    " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" },
 799        { 0x6d, TLB_DATA_1G,            16,     " TLB_DATA 1 GByte pages, fully associative" },
 800        { 0x76, TLB_INST_2M_4M,         8,      " TLB_INST 2-MByte or 4-MByte pages, fully associative" },
 801        { 0xb0, TLB_INST_4K,            128,    " TLB_INST 4 KByte pages, 4-way set associative" },
 802        { 0xb1, TLB_INST_2M_4M,         4,      " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" },
 803        { 0xb2, TLB_INST_4K,            64,     " TLB_INST 4KByte pages, 4-way set associative" },
 804        { 0xb3, TLB_DATA_4K,            128,    " TLB_DATA 4 KByte pages, 4-way set associative" },
 805        { 0xb4, TLB_DATA_4K,            256,    " TLB_DATA 4 KByte pages, 4-way associative" },
 806        { 0xb5, TLB_INST_4K,            64,     " TLB_INST 4 KByte pages, 8-way set associative" },
 807        { 0xb6, TLB_INST_4K,            128,    " TLB_INST 4 KByte pages, 8-way set associative" },
 808        { 0xba, TLB_DATA_4K,            64,     " TLB_DATA 4 KByte pages, 4-way associative" },
 809        { 0xc0, TLB_DATA_4K_4M,         8,      " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" },
 810        { 0xc1, STLB_4K_2M,             1024,   " STLB 4 KByte and 2 MByte pages, 8-way associative" },
 811        { 0xc2, TLB_DATA_2M_4M,         16,     " TLB_DATA 2 MByte/4MByte pages, 4-way associative" },
 812        { 0xca, STLB_4K,                512,    " STLB 4 KByte pages, 4-way associative" },
 813        { 0x00, 0, 0 }
 814};
 815
 816static void intel_tlb_lookup(const unsigned char desc)
 817{
 818        unsigned char k;
 819        if (desc == 0)
 820                return;
 821
 822        /* look up this descriptor in the table */
 823        for (k = 0; intel_tlb_table[k].descriptor != desc &&
 824             intel_tlb_table[k].descriptor != 0; k++)
 825                ;
 826
 827        if (intel_tlb_table[k].tlb_type == 0)
 828                return;
 829
 830        switch (intel_tlb_table[k].tlb_type) {
 831        case STLB_4K:
 832                if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
 833                        tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
 834                if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
 835                        tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
 836                break;
 837        case STLB_4K_2M:
 838                if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
 839                        tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
 840                if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
 841                        tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
 842                if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
 843                        tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
 844                if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries)
 845                        tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries;
 846                if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
 847                        tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
 848                if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
 849                        tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
 850                break;
 851        case TLB_INST_ALL:
 852                if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
 853                        tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
 854                if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
 855                        tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
 856                if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
 857                        tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
 858                break;
 859        case TLB_INST_4K:
 860                if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
 861                        tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
 862                break;
 863        case TLB_INST_4M:
 864                if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
 865                        tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
 866                break;
 867        case TLB_INST_2M_4M:
 868                if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
 869                        tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
 870                if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
 871                        tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
 872                break;
 873        case TLB_DATA_4K:
 874        case TLB_DATA0_4K:
 875                if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
 876                        tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
 877                break;
 878        case TLB_DATA_4M:
 879        case TLB_DATA0_4M:
 880                if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
 881                        tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
 882                break;
 883        case TLB_DATA_2M_4M:
 884        case TLB_DATA0_2M_4M:
 885                if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries)
 886                        tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries;
 887                if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
 888                        tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
 889                break;
 890        case TLB_DATA_4K_4M:
 891                if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
 892                        tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
 893                if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
 894                        tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
 895                break;
 896        case TLB_DATA_1G:
 897                if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries)
 898                        tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries;
 899                break;
 900        }
 901}
 902
 903static void intel_detect_tlb(struct cpuinfo_x86 *c)
 904{
 905        int i, j, n;
 906        unsigned int regs[4];
 907        unsigned char *desc = (unsigned char *)regs;
 908
 909        if (c->cpuid_level < 2)
 910                return;
 911
 912        /* Number of times to iterate */
 913        n = cpuid_eax(2) & 0xFF;
 914
 915        for (i = 0 ; i < n ; i++) {
 916                cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
 917
 918                /* If bit 31 is set, this is an unknown format */
 919                for (j = 0 ; j < 3 ; j++)
 920                        if (regs[j] & (1 << 31))
 921                                regs[j] = 0;
 922
 923                /* Byte 0 is level count, not a descriptor */
 924                for (j = 1 ; j < 16 ; j++)
 925                        intel_tlb_lookup(desc[j]);
 926        }
 927}
 928
 929static const struct cpu_dev intel_cpu_dev = {
 930        .c_vendor       = "Intel",
 931        .c_ident        = { "GenuineIntel" },
 932#ifdef CONFIG_X86_32
 933        .legacy_models = {
 934                { .family = 4, .model_names =
 935                  {
 936                          [0] = "486 DX-25/33",
 937                          [1] = "486 DX-50",
 938                          [2] = "486 SX",
 939                          [3] = "486 DX/2",
 940                          [4] = "486 SL",
 941                          [5] = "486 SX/2",
 942                          [7] = "486 DX/2-WB",
 943                          [8] = "486 DX/4",
 944                          [9] = "486 DX/4-WB"
 945                  }
 946                },
 947                { .family = 5, .model_names =
 948                  {
 949                          [0] = "Pentium 60/66 A-step",
 950                          [1] = "Pentium 60/66",
 951                          [2] = "Pentium 75 - 200",
 952                          [3] = "OverDrive PODP5V83",
 953                          [4] = "Pentium MMX",
 954                          [7] = "Mobile Pentium 75 - 200",
 955                          [8] = "Mobile Pentium MMX",
 956                          [9] = "Quark SoC X1000",
 957                  }
 958                },
 959                { .family = 6, .model_names =
 960                  {
 961                          [0] = "Pentium Pro A-step",
 962                          [1] = "Pentium Pro",
 963                          [3] = "Pentium II (Klamath)",
 964                          [4] = "Pentium II (Deschutes)",
 965                          [5] = "Pentium II (Deschutes)",
 966                          [6] = "Mobile Pentium II",
 967                          [7] = "Pentium III (Katmai)",
 968                          [8] = "Pentium III (Coppermine)",
 969                          [10] = "Pentium III (Cascades)",
 970                          [11] = "Pentium III (Tualatin)",
 971                  }
 972                },
 973                { .family = 15, .model_names =
 974                  {
 975                          [0] = "Pentium 4 (Unknown)",
 976                          [1] = "Pentium 4 (Willamette)",
 977                          [2] = "Pentium 4 (Northwood)",
 978                          [4] = "Pentium 4 (Foster)",
 979                          [5] = "Pentium 4 (Foster)",
 980                  }
 981                },
 982        },
 983        .legacy_cache_size = intel_size_cache,
 984#endif
 985        .c_detect_tlb   = intel_detect_tlb,
 986        .c_early_init   = early_init_intel,
 987        .c_bsp_init     = bsp_init_intel,
 988        .c_init         = init_intel,
 989        .c_x86_vendor   = X86_VENDOR_INTEL,
 990};
 991
 992cpu_dev_register(intel_cpu_dev);
 993
 994#undef pr_fmt
 995#define pr_fmt(fmt) "x86/split lock detection: " fmt
 996
 997static const struct {
 998        const char                      *option;
 999        enum split_lock_detect_state    state;
1000} sld_options[] __initconst = {
1001        { "off",        sld_off   },
1002        { "warn",       sld_warn  },
1003        { "fatal",      sld_fatal },
1004        { "ratelimit:", sld_ratelimit },
1005};
1006
1007static struct ratelimit_state bld_ratelimit;
1008
1009static inline bool match_option(const char *arg, int arglen, const char *opt)
1010{
1011        int len = strlen(opt), ratelimit;
1012
1013        if (strncmp(arg, opt, len))
1014                return false;
1015
1016        /*
1017         * Min ratelimit is 1 bus lock/sec.
1018         * Max ratelimit is 1000 bus locks/sec.
1019         */
1020        if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 &&
1021            ratelimit > 0 && ratelimit <= 1000) {
1022                ratelimit_state_init(&bld_ratelimit, HZ, ratelimit);
1023                ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE);
1024                return true;
1025        }
1026
1027        return len == arglen;
1028}
1029
1030static bool split_lock_verify_msr(bool on)
1031{
1032        u64 ctrl, tmp;
1033
1034        if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl))
1035                return false;
1036        if (on)
1037                ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
1038        else
1039                ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
1040        if (wrmsrl_safe(MSR_TEST_CTRL, ctrl))
1041                return false;
1042        rdmsrl(MSR_TEST_CTRL, tmp);
1043        return ctrl == tmp;
1044}
1045
1046static void __init sld_state_setup(void)
1047{
1048        enum split_lock_detect_state state = sld_warn;
1049        char arg[20];
1050        int i, ret;
1051
1052        if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
1053            !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
1054                return;
1055
1056        ret = cmdline_find_option(boot_command_line, "split_lock_detect",
1057                                  arg, sizeof(arg));
1058        if (ret >= 0) {
1059                for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
1060                        if (match_option(arg, ret, sld_options[i].option)) {
1061                                state = sld_options[i].state;
1062                                break;
1063                        }
1064                }
1065        }
1066        sld_state = state;
1067}
1068
1069static void __init __split_lock_setup(void)
1070{
1071        if (!split_lock_verify_msr(false)) {
1072                pr_info("MSR access failed: Disabled\n");
1073                return;
1074        }
1075
1076        rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
1077
1078        if (!split_lock_verify_msr(true)) {
1079                pr_info("MSR access failed: Disabled\n");
1080                return;
1081        }
1082
1083        /* Restore the MSR to its cached value. */
1084        wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
1085
1086        setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
1087}
1088
1089/*
1090 * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
1091 * is not implemented as one thread could undo the setting of the other
1092 * thread immediately after dropping the lock anyway.
1093 */
1094static void sld_update_msr(bool on)
1095{
1096        u64 test_ctrl_val = msr_test_ctrl_cache;
1097
1098        if (on)
1099                test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
1100
1101        wrmsrl(MSR_TEST_CTRL, test_ctrl_val);
1102}
1103
1104static void split_lock_init(void)
1105{
1106        /*
1107         * #DB for bus lock handles ratelimit and #AC for split lock is
1108         * disabled.
1109         */
1110        if (sld_state == sld_ratelimit) {
1111                split_lock_verify_msr(false);
1112                return;
1113        }
1114
1115        if (cpu_model_supports_sld)
1116                split_lock_verify_msr(sld_state != sld_off);
1117}
1118
1119static void split_lock_warn(unsigned long ip)
1120{
1121        pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
1122                            current->comm, current->pid, ip);
1123
1124        /*
1125         * Disable the split lock detection for this task so it can make
1126         * progress and set TIF_SLD so the detection is re-enabled via
1127         * switch_to_sld() when the task is scheduled out.
1128         */
1129        sld_update_msr(false);
1130        set_tsk_thread_flag(current, TIF_SLD);
1131}
1132
1133bool handle_guest_split_lock(unsigned long ip)
1134{
1135        if (sld_state == sld_warn) {
1136                split_lock_warn(ip);
1137                return true;
1138        }
1139
1140        pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
1141                     current->comm, current->pid,
1142                     sld_state == sld_fatal ? "fatal" : "bogus", ip);
1143
1144        current->thread.error_code = 0;
1145        current->thread.trap_nr = X86_TRAP_AC;
1146        force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
1147        return false;
1148}
1149EXPORT_SYMBOL_GPL(handle_guest_split_lock);
1150
1151static void bus_lock_init(void)
1152{
1153        u64 val;
1154
1155        /*
1156         * Warn and fatal are handled by #AC for split lock if #AC for
1157         * split lock is supported.
1158         */
1159        if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) ||
1160            (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
1161            (sld_state == sld_warn || sld_state == sld_fatal)) ||
1162            sld_state == sld_off)
1163                return;
1164
1165        /*
1166         * Enable #DB for bus lock. All bus locks are handled in #DB except
1167         * split locks are handled in #AC in the fatal case.
1168         */
1169        rdmsrl(MSR_IA32_DEBUGCTLMSR, val);
1170        val |= DEBUGCTLMSR_BUS_LOCK_DETECT;
1171        wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
1172}
1173
1174bool handle_user_split_lock(struct pt_regs *regs, long error_code)
1175{
1176        if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
1177                return false;
1178        split_lock_warn(regs->ip);
1179        return true;
1180}
1181
1182void handle_bus_lock(struct pt_regs *regs)
1183{
1184        switch (sld_state) {
1185        case sld_off:
1186                break;
1187        case sld_ratelimit:
1188                /* Enforce no more than bld_ratelimit bus locks/sec. */
1189                while (!__ratelimit(&bld_ratelimit))
1190                        msleep(20);
1191                /* Warn on the bus lock. */
1192                fallthrough;
1193        case sld_warn:
1194                pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n",
1195                                    current->comm, current->pid, regs->ip);
1196                break;
1197        case sld_fatal:
1198                force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
1199                break;
1200        }
1201}
1202
1203/*
1204 * This function is called only when switching between tasks with
1205 * different split-lock detection modes. It sets the MSR for the
1206 * mode of the new task. This is right most of the time, but since
1207 * the MSR is shared by hyperthreads on a physical core there can
1208 * be glitches when the two threads need different modes.
1209 */
1210void switch_to_sld(unsigned long tifn)
1211{
1212        sld_update_msr(!(tifn & _TIF_SLD));
1213}
1214
1215/*
1216 * Bits in the IA32_CORE_CAPABILITIES are not architectural, so they should
1217 * only be trusted if it is confirmed that a CPU model implements a
1218 * specific feature at a particular bit position.
1219 *
1220 * The possible driver data field values:
1221 *
1222 * - 0: CPU models that are known to have the per-core split-lock detection
1223 *      feature even though they do not enumerate IA32_CORE_CAPABILITIES.
1224 *
1225 * - 1: CPU models which may enumerate IA32_CORE_CAPABILITIES and if so use
1226 *      bit 5 to enumerate the per-core split-lock detection feature.
1227 */
1228static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
1229        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           0),
1230        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,           0),
1231        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           0),
1232        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT,        1),
1233        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      1),
1234        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L,      1),
1235        X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,         1),
1236        X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,           1),
1237        X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    1),
1238        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           1),
1239        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         1),
1240        {}
1241};
1242
1243static void __init split_lock_setup(struct cpuinfo_x86 *c)
1244{
1245        const struct x86_cpu_id *m;
1246        u64 ia32_core_caps;
1247
1248        if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1249                return;
1250
1251        m = x86_match_cpu(split_lock_cpu_ids);
1252        if (!m)
1253                return;
1254
1255        switch (m->driver_data) {
1256        case 0:
1257                break;
1258        case 1:
1259                if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES))
1260                        return;
1261                rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
1262                if (!(ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT))
1263                        return;
1264                break;
1265        default:
1266                return;
1267        }
1268
1269        cpu_model_supports_sld = true;
1270        __split_lock_setup();
1271}
1272
1273static void sld_state_show(void)
1274{
1275        if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
1276            !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
1277                return;
1278
1279        switch (sld_state) {
1280        case sld_off:
1281                pr_info("disabled\n");
1282                break;
1283        case sld_warn:
1284                if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
1285                        pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
1286                else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
1287                        pr_info("#DB: warning on user-space bus_locks\n");
1288                break;
1289        case sld_fatal:
1290                if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
1291                        pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n");
1292                } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
1293                        pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n",
1294                                boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ?
1295                                " from non-WB" : "");
1296                }
1297                break;
1298        case sld_ratelimit:
1299                if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
1300                        pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst);
1301                break;
1302        }
1303}
1304
1305void __init sld_setup(struct cpuinfo_x86 *c)
1306{
1307        split_lock_setup(c);
1308        sld_state_setup();
1309        sld_state_show();
1310}
1311
1312#define X86_HYBRID_CPU_TYPE_ID_SHIFT    24
1313
1314/**
1315 * get_this_hybrid_cpu_type() - Get the type of this hybrid CPU
1316 *
1317 * Returns the CPU type [31:24] (i.e., Atom or Core) of a CPU in
1318 * a hybrid processor. If the processor is not hybrid, returns 0.
1319 */
1320u8 get_this_hybrid_cpu_type(void)
1321{
1322        if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
1323                return 0;
1324
1325        return cpuid_eax(0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT;
1326}
1327