linux/arch/x86/kernel/cpu/cacheinfo.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *      Routines to identify caches on Intel CPU.
   4 *
   5 *      Changes:
   6 *      Venkatesh Pallipadi     : Adding cache identification through cpuid(4)
   7 *      Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
   8 *      Andi Kleen / Andreas Herrmann   : CPUID4 emulation on AMD.
   9 */
  10
  11#include <linux/slab.h>
  12#include <linux/cacheinfo.h>
  13#include <linux/cpu.h>
  14#include <linux/sched.h>
  15#include <linux/capability.h>
  16#include <linux/sysfs.h>
  17#include <linux/pci.h>
  18
  19#include <asm/cpufeature.h>
  20#include <asm/cacheinfo.h>
  21#include <asm/amd_nb.h>
  22#include <asm/smp.h>
  23
  24#include "cpu.h"
  25
  26#define LVL_1_INST      1
  27#define LVL_1_DATA      2
  28#define LVL_2           3
  29#define LVL_3           4
  30#define LVL_TRACE       5
  31
  32struct _cache_table {
  33        unsigned char descriptor;
  34        char cache_type;
  35        short size;
  36};
  37
  38#define MB(x)   ((x) * 1024)
  39
  40/* All the cache descriptor types we care about (no TLB or
  41   trace cache entries) */
  42
  43static const struct _cache_table cache_table[] =
  44{
  45        { 0x06, LVL_1_INST, 8 },        /* 4-way set assoc, 32 byte line size */
  46        { 0x08, LVL_1_INST, 16 },       /* 4-way set assoc, 32 byte line size */
  47        { 0x09, LVL_1_INST, 32 },       /* 4-way set assoc, 64 byte line size */
  48        { 0x0a, LVL_1_DATA, 8 },        /* 2 way set assoc, 32 byte line size */
  49        { 0x0c, LVL_1_DATA, 16 },       /* 4-way set assoc, 32 byte line size */
  50        { 0x0d, LVL_1_DATA, 16 },       /* 4-way set assoc, 64 byte line size */
  51        { 0x0e, LVL_1_DATA, 24 },       /* 6-way set assoc, 64 byte line size */
  52        { 0x21, LVL_2,      256 },      /* 8-way set assoc, 64 byte line size */
  53        { 0x22, LVL_3,      512 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  54        { 0x23, LVL_3,      MB(1) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  55        { 0x25, LVL_3,      MB(2) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  56        { 0x29, LVL_3,      MB(4) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  57        { 0x2c, LVL_1_DATA, 32 },       /* 8-way set assoc, 64 byte line size */
  58        { 0x30, LVL_1_INST, 32 },       /* 8-way set assoc, 64 byte line size */
  59        { 0x39, LVL_2,      128 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  60        { 0x3a, LVL_2,      192 },      /* 6-way set assoc, sectored cache, 64 byte line size */
  61        { 0x3b, LVL_2,      128 },      /* 2-way set assoc, sectored cache, 64 byte line size */
  62        { 0x3c, LVL_2,      256 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  63        { 0x3d, LVL_2,      384 },      /* 6-way set assoc, sectored cache, 64 byte line size */
  64        { 0x3e, LVL_2,      512 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  65        { 0x3f, LVL_2,      256 },      /* 2-way set assoc, 64 byte line size */
  66        { 0x41, LVL_2,      128 },      /* 4-way set assoc, 32 byte line size */
  67        { 0x42, LVL_2,      256 },      /* 4-way set assoc, 32 byte line size */
  68        { 0x43, LVL_2,      512 },      /* 4-way set assoc, 32 byte line size */
  69        { 0x44, LVL_2,      MB(1) },    /* 4-way set assoc, 32 byte line size */
  70        { 0x45, LVL_2,      MB(2) },    /* 4-way set assoc, 32 byte line size */
  71        { 0x46, LVL_3,      MB(4) },    /* 4-way set assoc, 64 byte line size */
  72        { 0x47, LVL_3,      MB(8) },    /* 8-way set assoc, 64 byte line size */
  73        { 0x48, LVL_2,      MB(3) },    /* 12-way set assoc, 64 byte line size */
  74        { 0x49, LVL_3,      MB(4) },    /* 16-way set assoc, 64 byte line size */
  75        { 0x4a, LVL_3,      MB(6) },    /* 12-way set assoc, 64 byte line size */
  76        { 0x4b, LVL_3,      MB(8) },    /* 16-way set assoc, 64 byte line size */
  77        { 0x4c, LVL_3,      MB(12) },   /* 12-way set assoc, 64 byte line size */
  78        { 0x4d, LVL_3,      MB(16) },   /* 16-way set assoc, 64 byte line size */
  79        { 0x4e, LVL_2,      MB(6) },    /* 24-way set assoc, 64 byte line size */
  80        { 0x60, LVL_1_DATA, 16 },       /* 8-way set assoc, sectored cache, 64 byte line size */
  81        { 0x66, LVL_1_DATA, 8 },        /* 4-way set assoc, sectored cache, 64 byte line size */
  82        { 0x67, LVL_1_DATA, 16 },       /* 4-way set assoc, sectored cache, 64 byte line size */
  83        { 0x68, LVL_1_DATA, 32 },       /* 4-way set assoc, sectored cache, 64 byte line size */
  84        { 0x70, LVL_TRACE,  12 },       /* 8-way set assoc */
  85        { 0x71, LVL_TRACE,  16 },       /* 8-way set assoc */
  86        { 0x72, LVL_TRACE,  32 },       /* 8-way set assoc */
  87        { 0x73, LVL_TRACE,  64 },       /* 8-way set assoc */
  88        { 0x78, LVL_2,      MB(1) },    /* 4-way set assoc, 64 byte line size */
  89        { 0x79, LVL_2,      128 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  90        { 0x7a, LVL_2,      256 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  91        { 0x7b, LVL_2,      512 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  92        { 0x7c, LVL_2,      MB(1) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  93        { 0x7d, LVL_2,      MB(2) },    /* 8-way set assoc, 64 byte line size */
  94        { 0x7f, LVL_2,      512 },      /* 2-way set assoc, 64 byte line size */
  95        { 0x80, LVL_2,      512 },      /* 8-way set assoc, 64 byte line size */
  96        { 0x82, LVL_2,      256 },      /* 8-way set assoc, 32 byte line size */
  97        { 0x83, LVL_2,      512 },      /* 8-way set assoc, 32 byte line size */
  98        { 0x84, LVL_2,      MB(1) },    /* 8-way set assoc, 32 byte line size */
  99        { 0x85, LVL_2,      MB(2) },    /* 8-way set assoc, 32 byte line size */
 100        { 0x86, LVL_2,      512 },      /* 4-way set assoc, 64 byte line size */
 101        { 0x87, LVL_2,      MB(1) },    /* 8-way set assoc, 64 byte line size */
 102        { 0xd0, LVL_3,      512 },      /* 4-way set assoc, 64 byte line size */
 103        { 0xd1, LVL_3,      MB(1) },    /* 4-way set assoc, 64 byte line size */
 104        { 0xd2, LVL_3,      MB(2) },    /* 4-way set assoc, 64 byte line size */
 105        { 0xd6, LVL_3,      MB(1) },    /* 8-way set assoc, 64 byte line size */
 106        { 0xd7, LVL_3,      MB(2) },    /* 8-way set assoc, 64 byte line size */
 107        { 0xd8, LVL_3,      MB(4) },    /* 12-way set assoc, 64 byte line size */
 108        { 0xdc, LVL_3,      MB(2) },    /* 12-way set assoc, 64 byte line size */
 109        { 0xdd, LVL_3,      MB(4) },    /* 12-way set assoc, 64 byte line size */
 110        { 0xde, LVL_3,      MB(8) },    /* 12-way set assoc, 64 byte line size */
 111        { 0xe2, LVL_3,      MB(2) },    /* 16-way set assoc, 64 byte line size */
 112        { 0xe3, LVL_3,      MB(4) },    /* 16-way set assoc, 64 byte line size */
 113        { 0xe4, LVL_3,      MB(8) },    /* 16-way set assoc, 64 byte line size */
 114        { 0xea, LVL_3,      MB(12) },   /* 24-way set assoc, 64 byte line size */
 115        { 0xeb, LVL_3,      MB(18) },   /* 24-way set assoc, 64 byte line size */
 116        { 0xec, LVL_3,      MB(24) },   /* 24-way set assoc, 64 byte line size */
 117        { 0x00, 0, 0}
 118};
 119
 120
 121enum _cache_type {
 122        CTYPE_NULL = 0,
 123        CTYPE_DATA = 1,
 124        CTYPE_INST = 2,
 125        CTYPE_UNIFIED = 3
 126};
 127
 128union _cpuid4_leaf_eax {
 129        struct {
 130                enum _cache_type        type:5;
 131                unsigned int            level:3;
 132                unsigned int            is_self_initializing:1;
 133                unsigned int            is_fully_associative:1;
 134                unsigned int            reserved:4;
 135                unsigned int            num_threads_sharing:12;
 136                unsigned int            num_cores_on_die:6;
 137        } split;
 138        u32 full;
 139};
 140
 141union _cpuid4_leaf_ebx {
 142        struct {
 143                unsigned int            coherency_line_size:12;
 144                unsigned int            physical_line_partition:10;
 145                unsigned int            ways_of_associativity:10;
 146        } split;
 147        u32 full;
 148};
 149
 150union _cpuid4_leaf_ecx {
 151        struct {
 152                unsigned int            number_of_sets:32;
 153        } split;
 154        u32 full;
 155};
 156
 157struct _cpuid4_info_regs {
 158        union _cpuid4_leaf_eax eax;
 159        union _cpuid4_leaf_ebx ebx;
 160        union _cpuid4_leaf_ecx ecx;
 161        unsigned int id;
 162        unsigned long size;
 163        struct amd_northbridge *nb;
 164};
 165
 166static unsigned short num_cache_leaves;
 167
 168/* AMD doesn't have CPUID4. Emulate it here to report the same
 169   information to the user.  This makes some assumptions about the machine:
 170   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
 171
 172   In theory the TLBs could be reported as fake type (they are in "dummy").
 173   Maybe later */
 174union l1_cache {
 175        struct {
 176                unsigned line_size:8;
 177                unsigned lines_per_tag:8;
 178                unsigned assoc:8;
 179                unsigned size_in_kb:8;
 180        };
 181        unsigned val;
 182};
 183
 184union l2_cache {
 185        struct {
 186                unsigned line_size:8;
 187                unsigned lines_per_tag:4;
 188                unsigned assoc:4;
 189                unsigned size_in_kb:16;
 190        };
 191        unsigned val;
 192};
 193
 194union l3_cache {
 195        struct {
 196                unsigned line_size:8;
 197                unsigned lines_per_tag:4;
 198                unsigned assoc:4;
 199                unsigned res:2;
 200                unsigned size_encoded:14;
 201        };
 202        unsigned val;
 203};
 204
 205static const unsigned short assocs[] = {
 206        [1] = 1,
 207        [2] = 2,
 208        [4] = 4,
 209        [6] = 8,
 210        [8] = 16,
 211        [0xa] = 32,
 212        [0xb] = 48,
 213        [0xc] = 64,
 214        [0xd] = 96,
 215        [0xe] = 128,
 216        [0xf] = 0xffff /* fully associative - no way to show this currently */
 217};
 218
 219static const unsigned char levels[] = { 1, 1, 2, 3 };
 220static const unsigned char types[] = { 1, 2, 3, 3 };
 221
 222static const enum cache_type cache_type_map[] = {
 223        [CTYPE_NULL] = CACHE_TYPE_NOCACHE,
 224        [CTYPE_DATA] = CACHE_TYPE_DATA,
 225        [CTYPE_INST] = CACHE_TYPE_INST,
 226        [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
 227};
 228
 229static void
 230amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 231                     union _cpuid4_leaf_ebx *ebx,
 232                     union _cpuid4_leaf_ecx *ecx)
 233{
 234        unsigned dummy;
 235        unsigned line_size, lines_per_tag, assoc, size_in_kb;
 236        union l1_cache l1i, l1d;
 237        union l2_cache l2;
 238        union l3_cache l3;
 239        union l1_cache *l1 = &l1d;
 240
 241        eax->full = 0;
 242        ebx->full = 0;
 243        ecx->full = 0;
 244
 245        cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
 246        cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
 247
 248        switch (leaf) {
 249        case 1:
 250                l1 = &l1i;
 251                /* fall through */
 252        case 0:
 253                if (!l1->val)
 254                        return;
 255                assoc = assocs[l1->assoc];
 256                line_size = l1->line_size;
 257                lines_per_tag = l1->lines_per_tag;
 258                size_in_kb = l1->size_in_kb;
 259                break;
 260        case 2:
 261                if (!l2.val)
 262                        return;
 263                assoc = assocs[l2.assoc];
 264                line_size = l2.line_size;
 265                lines_per_tag = l2.lines_per_tag;
 266                /* cpu_data has errata corrections for K7 applied */
 267                size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
 268                break;
 269        case 3:
 270                if (!l3.val)
 271                        return;
 272                assoc = assocs[l3.assoc];
 273                line_size = l3.line_size;
 274                lines_per_tag = l3.lines_per_tag;
 275                size_in_kb = l3.size_encoded * 512;
 276                if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
 277                        size_in_kb = size_in_kb >> 1;
 278                        assoc = assoc >> 1;
 279                }
 280                break;
 281        default:
 282                return;
 283        }
 284
 285        eax->split.is_self_initializing = 1;
 286        eax->split.type = types[leaf];
 287        eax->split.level = levels[leaf];
 288        eax->split.num_threads_sharing = 0;
 289        eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
 290
 291
 292        if (assoc == 0xffff)
 293                eax->split.is_fully_associative = 1;
 294        ebx->split.coherency_line_size = line_size - 1;
 295        ebx->split.ways_of_associativity = assoc - 1;
 296        ebx->split.physical_line_partition = lines_per_tag - 1;
 297        ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
 298                (ebx->split.ways_of_associativity + 1) - 1;
 299}
 300
 301#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
 302
 303/*
 304 * L3 cache descriptors
 305 */
 306static void amd_calc_l3_indices(struct amd_northbridge *nb)
 307{
 308        struct amd_l3_cache *l3 = &nb->l3_cache;
 309        unsigned int sc0, sc1, sc2, sc3;
 310        u32 val = 0;
 311
 312        pci_read_config_dword(nb->misc, 0x1C4, &val);
 313
 314        /* calculate subcache sizes */
 315        l3->subcaches[0] = sc0 = !(val & BIT(0));
 316        l3->subcaches[1] = sc1 = !(val & BIT(4));
 317
 318        if (boot_cpu_data.x86 == 0x15) {
 319                l3->subcaches[0] = sc0 += !(val & BIT(1));
 320                l3->subcaches[1] = sc1 += !(val & BIT(5));
 321        }
 322
 323        l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 324        l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 325
 326        l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 327}
 328
 329/*
 330 * check whether a slot used for disabling an L3 index is occupied.
 331 * @l3: L3 cache descriptor
 332 * @slot: slot number (0..1)
 333 *
 334 * @returns: the disabled index if used or negative value if slot free.
 335 */
 336static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
 337{
 338        unsigned int reg = 0;
 339
 340        pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
 341
 342        /* check whether this slot is activated already */
 343        if (reg & (3UL << 30))
 344                return reg & 0xfff;
 345
 346        return -1;
 347}
 348
 349static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
 350                                  unsigned int slot)
 351{
 352        int index;
 353        struct amd_northbridge *nb = this_leaf->priv;
 354
 355        index = amd_get_l3_disable_slot(nb, slot);
 356        if (index >= 0)
 357                return sprintf(buf, "%d\n", index);
 358
 359        return sprintf(buf, "FREE\n");
 360}
 361
 362#define SHOW_CACHE_DISABLE(slot)                                        \
 363static ssize_t                                                          \
 364cache_disable_##slot##_show(struct device *dev,                         \
 365                            struct device_attribute *attr, char *buf)   \
 366{                                                                       \
 367        struct cacheinfo *this_leaf = dev_get_drvdata(dev);             \
 368        return show_cache_disable(this_leaf, buf, slot);                \
 369}
 370SHOW_CACHE_DISABLE(0)
 371SHOW_CACHE_DISABLE(1)
 372
 373static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
 374                                 unsigned slot, unsigned long idx)
 375{
 376        int i;
 377
 378        idx |= BIT(30);
 379
 380        /*
 381         *  disable index in all 4 subcaches
 382         */
 383        for (i = 0; i < 4; i++) {
 384                u32 reg = idx | (i << 20);
 385
 386                if (!nb->l3_cache.subcaches[i])
 387                        continue;
 388
 389                pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 390
 391                /*
 392                 * We need to WBINVD on a core on the node containing the L3
 393                 * cache which indices we disable therefore a simple wbinvd()
 394                 * is not sufficient.
 395                 */
 396                wbinvd_on_cpu(cpu);
 397
 398                reg |= BIT(31);
 399                pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 400        }
 401}
 402
 403/*
 404 * disable a L3 cache index by using a disable-slot
 405 *
 406 * @l3:    L3 cache descriptor
 407 * @cpu:   A CPU on the node containing the L3 cache
 408 * @slot:  slot number (0..1)
 409 * @index: index to disable
 410 *
 411 * @return: 0 on success, error status on failure
 412 */
 413static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
 414                            unsigned slot, unsigned long index)
 415{
 416        int ret = 0;
 417
 418        /*  check if @slot is already used or the index is already disabled */
 419        ret = amd_get_l3_disable_slot(nb, slot);
 420        if (ret >= 0)
 421                return -EEXIST;
 422
 423        if (index > nb->l3_cache.indices)
 424                return -EINVAL;
 425
 426        /* check whether the other slot has disabled the same index already */
 427        if (index == amd_get_l3_disable_slot(nb, !slot))
 428                return -EEXIST;
 429
 430        amd_l3_disable_index(nb, cpu, slot, index);
 431
 432        return 0;
 433}
 434
 435static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
 436                                   const char *buf, size_t count,
 437                                   unsigned int slot)
 438{
 439        unsigned long val = 0;
 440        int cpu, err = 0;
 441        struct amd_northbridge *nb = this_leaf->priv;
 442
 443        if (!capable(CAP_SYS_ADMIN))
 444                return -EPERM;
 445
 446        cpu = cpumask_first(&this_leaf->shared_cpu_map);
 447
 448        if (kstrtoul(buf, 10, &val) < 0)
 449                return -EINVAL;
 450
 451        err = amd_set_l3_disable_slot(nb, cpu, slot, val);
 452        if (err) {
 453                if (err == -EEXIST)
 454                        pr_warn("L3 slot %d in use/index already disabled!\n",
 455                                   slot);
 456                return err;
 457        }
 458        return count;
 459}
 460
 461#define STORE_CACHE_DISABLE(slot)                                       \
 462static ssize_t                                                          \
 463cache_disable_##slot##_store(struct device *dev,                        \
 464                             struct device_attribute *attr,             \
 465                             const char *buf, size_t count)             \
 466{                                                                       \
 467        struct cacheinfo *this_leaf = dev_get_drvdata(dev);             \
 468        return store_cache_disable(this_leaf, buf, count, slot);        \
 469}
 470STORE_CACHE_DISABLE(0)
 471STORE_CACHE_DISABLE(1)
 472
 473static ssize_t subcaches_show(struct device *dev,
 474                              struct device_attribute *attr, char *buf)
 475{
 476        struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 477        int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 478
 479        return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
 480}
 481
 482static ssize_t subcaches_store(struct device *dev,
 483                               struct device_attribute *attr,
 484                               const char *buf, size_t count)
 485{
 486        struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 487        int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 488        unsigned long val;
 489
 490        if (!capable(CAP_SYS_ADMIN))
 491                return -EPERM;
 492
 493        if (kstrtoul(buf, 16, &val) < 0)
 494                return -EINVAL;
 495
 496        if (amd_set_subcaches(cpu, val))
 497                return -EINVAL;
 498
 499        return count;
 500}
 501
 502static DEVICE_ATTR_RW(cache_disable_0);
 503static DEVICE_ATTR_RW(cache_disable_1);
 504static DEVICE_ATTR_RW(subcaches);
 505
 506static umode_t
 507cache_private_attrs_is_visible(struct kobject *kobj,
 508                               struct attribute *attr, int unused)
 509{
 510        struct device *dev = kobj_to_dev(kobj);
 511        struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 512        umode_t mode = attr->mode;
 513
 514        if (!this_leaf->priv)
 515                return 0;
 516
 517        if ((attr == &dev_attr_subcaches.attr) &&
 518            amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 519                return mode;
 520
 521        if ((attr == &dev_attr_cache_disable_0.attr ||
 522             attr == &dev_attr_cache_disable_1.attr) &&
 523            amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 524                return mode;
 525
 526        return 0;
 527}
 528
 529static struct attribute_group cache_private_group = {
 530        .is_visible = cache_private_attrs_is_visible,
 531};
 532
 533static void init_amd_l3_attrs(void)
 534{
 535        int n = 1;
 536        static struct attribute **amd_l3_attrs;
 537
 538        if (amd_l3_attrs) /* already initialized */
 539                return;
 540
 541        if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 542                n += 2;
 543        if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 544                n += 1;
 545
 546        amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
 547        if (!amd_l3_attrs)
 548                return;
 549
 550        n = 0;
 551        if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
 552                amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
 553                amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
 554        }
 555        if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 556                amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
 557
 558        cache_private_group.attrs = amd_l3_attrs;
 559}
 560
 561const struct attribute_group *
 562cache_get_priv_group(struct cacheinfo *this_leaf)
 563{
 564        struct amd_northbridge *nb = this_leaf->priv;
 565
 566        if (this_leaf->level < 3 || !nb)
 567                return NULL;
 568
 569        if (nb && nb->l3_cache.indices)
 570                init_amd_l3_attrs();
 571
 572        return &cache_private_group;
 573}
 574
 575static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
 576{
 577        int node;
 578
 579        /* only for L3, and not in virtualized environments */
 580        if (index < 3)
 581                return;
 582
 583        node = amd_get_nb_id(smp_processor_id());
 584        this_leaf->nb = node_to_amd_nb(node);
 585        if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
 586                amd_calc_l3_indices(this_leaf->nb);
 587}
 588#else
 589#define amd_init_l3_cache(x, y)
 590#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
 591
 592static int
 593cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
 594{
 595        union _cpuid4_leaf_eax  eax;
 596        union _cpuid4_leaf_ebx  ebx;
 597        union _cpuid4_leaf_ecx  ecx;
 598        unsigned                edx;
 599
 600        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 601                if (boot_cpu_has(X86_FEATURE_TOPOEXT))
 602                        cpuid_count(0x8000001d, index, &eax.full,
 603                                    &ebx.full, &ecx.full, &edx);
 604                else
 605                        amd_cpuid4(index, &eax, &ebx, &ecx);
 606                amd_init_l3_cache(this_leaf, index);
 607        } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
 608                cpuid_count(0x8000001d, index, &eax.full,
 609                            &ebx.full, &ecx.full, &edx);
 610                amd_init_l3_cache(this_leaf, index);
 611        } else {
 612                cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 613        }
 614
 615        if (eax.split.type == CTYPE_NULL)
 616                return -EIO; /* better error ? */
 617
 618        this_leaf->eax = eax;
 619        this_leaf->ebx = ebx;
 620        this_leaf->ecx = ecx;
 621        this_leaf->size = (ecx.split.number_of_sets          + 1) *
 622                          (ebx.split.coherency_line_size     + 1) *
 623                          (ebx.split.physical_line_partition + 1) *
 624                          (ebx.split.ways_of_associativity   + 1);
 625        return 0;
 626}
 627
 628static int find_num_cache_leaves(struct cpuinfo_x86 *c)
 629{
 630        unsigned int            eax, ebx, ecx, edx, op;
 631        union _cpuid4_leaf_eax  cache_eax;
 632        int                     i = -1;
 633
 634        if (c->x86_vendor == X86_VENDOR_AMD ||
 635            c->x86_vendor == X86_VENDOR_HYGON)
 636                op = 0x8000001d;
 637        else
 638                op = 4;
 639
 640        do {
 641                ++i;
 642                /* Do cpuid(op) loop to find out num_cache_leaves */
 643                cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
 644                cache_eax.full = eax;
 645        } while (cache_eax.split.type != CTYPE_NULL);
 646        return i;
 647}
 648
 649void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
 650{
 651        /*
 652         * We may have multiple LLCs if L3 caches exist, so check if we
 653         * have an L3 cache by looking at the L3 cache CPUID leaf.
 654         */
 655        if (!cpuid_edx(0x80000006))
 656                return;
 657
 658        if (c->x86 < 0x17) {
 659                /* LLC is at the node level. */
 660                per_cpu(cpu_llc_id, cpu) = node_id;
 661        } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
 662                /*
 663                 * LLC is at the core complex level.
 664                 * Core complex ID is ApicId[3] for these processors.
 665                 */
 666                per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
 667        } else {
 668                /*
 669                 * LLC ID is calculated from the number of threads sharing the
 670                 * cache.
 671                 * */
 672                u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
 673                u32 llc_index = find_num_cache_leaves(c) - 1;
 674
 675                cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
 676                if (eax)
 677                        num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
 678
 679                if (num_sharing_cache) {
 680                        int bits = get_count_order(num_sharing_cache);
 681
 682                        per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
 683                }
 684        }
 685}
 686
 687void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
 688{
 689        /*
 690         * We may have multiple LLCs if L3 caches exist, so check if we
 691         * have an L3 cache by looking at the L3 cache CPUID leaf.
 692         */
 693        if (!cpuid_edx(0x80000006))
 694                return;
 695
 696        /*
 697         * LLC is at the core complex level.
 698         * Core complex ID is ApicId[3] for these processors.
 699         */
 700        per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
 701}
 702
 703void init_amd_cacheinfo(struct cpuinfo_x86 *c)
 704{
 705
 706        if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 707                num_cache_leaves = find_num_cache_leaves(c);
 708        } else if (c->extended_cpuid_level >= 0x80000006) {
 709                if (cpuid_edx(0x80000006) & 0xf000)
 710                        num_cache_leaves = 4;
 711                else
 712                        num_cache_leaves = 3;
 713        }
 714}
 715
 716void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
 717{
 718        num_cache_leaves = find_num_cache_leaves(c);
 719}
 720
 721void init_intel_cacheinfo(struct cpuinfo_x86 *c)
 722{
 723        /* Cache sizes */
 724        unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
 725        unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 726        unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 727        unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
 728#ifdef CONFIG_SMP
 729        unsigned int cpu = c->cpu_index;
 730#endif
 731
 732        if (c->cpuid_level > 3) {
 733                static int is_initialized;
 734
 735                if (is_initialized == 0) {
 736                        /* Init num_cache_leaves from boot CPU */
 737                        num_cache_leaves = find_num_cache_leaves(c);
 738                        is_initialized++;
 739                }
 740
 741                /*
 742                 * Whenever possible use cpuid(4), deterministic cache
 743                 * parameters cpuid leaf to find the cache details
 744                 */
 745                for (i = 0; i < num_cache_leaves; i++) {
 746                        struct _cpuid4_info_regs this_leaf = {};
 747                        int retval;
 748
 749                        retval = cpuid4_cache_lookup_regs(i, &this_leaf);
 750                        if (retval < 0)
 751                                continue;
 752
 753                        switch (this_leaf.eax.split.level) {
 754                        case 1:
 755                                if (this_leaf.eax.split.type == CTYPE_DATA)
 756                                        new_l1d = this_leaf.size/1024;
 757                                else if (this_leaf.eax.split.type == CTYPE_INST)
 758                                        new_l1i = this_leaf.size/1024;
 759                                break;
 760                        case 2:
 761                                new_l2 = this_leaf.size/1024;
 762                                num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 763                                index_msb = get_count_order(num_threads_sharing);
 764                                l2_id = c->apicid & ~((1 << index_msb) - 1);
 765                                break;
 766                        case 3:
 767                                new_l3 = this_leaf.size/1024;
 768                                num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 769                                index_msb = get_count_order(num_threads_sharing);
 770                                l3_id = c->apicid & ~((1 << index_msb) - 1);
 771                                break;
 772                        default:
 773                                break;
 774                        }
 775                }
 776        }
 777        /*
 778         * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 779         * trace cache
 780         */
 781        if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
 782                /* supports eax=2  call */
 783                int j, n;
 784                unsigned int regs[4];
 785                unsigned char *dp = (unsigned char *)regs;
 786                int only_trace = 0;
 787
 788                if (num_cache_leaves != 0 && c->x86 == 15)
 789                        only_trace = 1;
 790
 791                /* Number of times to iterate */
 792                n = cpuid_eax(2) & 0xFF;
 793
 794                for (i = 0 ; i < n ; i++) {
 795                        cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
 796
 797                        /* If bit 31 is set, this is an unknown format */
 798                        for (j = 0 ; j < 3 ; j++)
 799                                if (regs[j] & (1 << 31))
 800                                        regs[j] = 0;
 801
 802                        /* Byte 0 is level count, not a descriptor */
 803                        for (j = 1 ; j < 16 ; j++) {
 804                                unsigned char des = dp[j];
 805                                unsigned char k = 0;
 806
 807                                /* look up this descriptor in the table */
 808                                while (cache_table[k].descriptor != 0) {
 809                                        if (cache_table[k].descriptor == des) {
 810                                                if (only_trace && cache_table[k].cache_type != LVL_TRACE)
 811                                                        break;
 812                                                switch (cache_table[k].cache_type) {
 813                                                case LVL_1_INST:
 814                                                        l1i += cache_table[k].size;
 815                                                        break;
 816                                                case LVL_1_DATA:
 817                                                        l1d += cache_table[k].size;
 818                                                        break;
 819                                                case LVL_2:
 820                                                        l2 += cache_table[k].size;
 821                                                        break;
 822                                                case LVL_3:
 823                                                        l3 += cache_table[k].size;
 824                                                        break;
 825                                                case LVL_TRACE:
 826                                                        trace += cache_table[k].size;
 827                                                        break;
 828                                                }
 829
 830                                                break;
 831                                        }
 832
 833                                        k++;
 834                                }
 835                        }
 836                }
 837        }
 838
 839        if (new_l1d)
 840                l1d = new_l1d;
 841
 842        if (new_l1i)
 843                l1i = new_l1i;
 844
 845        if (new_l2) {
 846                l2 = new_l2;
 847#ifdef CONFIG_SMP
 848                per_cpu(cpu_llc_id, cpu) = l2_id;
 849#endif
 850        }
 851
 852        if (new_l3) {
 853                l3 = new_l3;
 854#ifdef CONFIG_SMP
 855                per_cpu(cpu_llc_id, cpu) = l3_id;
 856#endif
 857        }
 858
 859#ifdef CONFIG_SMP
 860        /*
 861         * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
 862         * turns means that the only possibility is SMT (as indicated in
 863         * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
 864         * that SMT shares all caches, we can unconditionally set cpu_llc_id to
 865         * c->phys_proc_id.
 866         */
 867        if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
 868                per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
 869#endif
 870
 871        c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 872
 873        if (!l2)
 874                cpu_detect_cache_sizes(c);
 875}
 876
 877static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
 878                                    struct _cpuid4_info_regs *base)
 879{
 880        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 881        struct cacheinfo *this_leaf;
 882        int i, sibling;
 883
 884        /*
 885         * For L3, always use the pre-calculated cpu_llc_shared_mask
 886         * to derive shared_cpu_map.
 887         */
 888        if (index == 3) {
 889                for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
 890                        this_cpu_ci = get_cpu_cacheinfo(i);
 891                        if (!this_cpu_ci->info_list)
 892                                continue;
 893                        this_leaf = this_cpu_ci->info_list + index;
 894                        for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
 895                                if (!cpu_online(sibling))
 896                                        continue;
 897                                cpumask_set_cpu(sibling,
 898                                                &this_leaf->shared_cpu_map);
 899                        }
 900                }
 901        } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 902                unsigned int apicid, nshared, first, last;
 903
 904                nshared = base->eax.split.num_threads_sharing + 1;
 905                apicid = cpu_data(cpu).apicid;
 906                first = apicid - (apicid % nshared);
 907                last = first + nshared - 1;
 908
 909                for_each_online_cpu(i) {
 910                        this_cpu_ci = get_cpu_cacheinfo(i);
 911                        if (!this_cpu_ci->info_list)
 912                                continue;
 913
 914                        apicid = cpu_data(i).apicid;
 915                        if ((apicid < first) || (apicid > last))
 916                                continue;
 917
 918                        this_leaf = this_cpu_ci->info_list + index;
 919
 920                        for_each_online_cpu(sibling) {
 921                                apicid = cpu_data(sibling).apicid;
 922                                if ((apicid < first) || (apicid > last))
 923                                        continue;
 924                                cpumask_set_cpu(sibling,
 925                                                &this_leaf->shared_cpu_map);
 926                        }
 927                }
 928        } else
 929                return 0;
 930
 931        return 1;
 932}
 933
 934static void __cache_cpumap_setup(unsigned int cpu, int index,
 935                                 struct _cpuid4_info_regs *base)
 936{
 937        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 938        struct cacheinfo *this_leaf, *sibling_leaf;
 939        unsigned long num_threads_sharing;
 940        int index_msb, i;
 941        struct cpuinfo_x86 *c = &cpu_data(cpu);
 942
 943        if (c->x86_vendor == X86_VENDOR_AMD ||
 944            c->x86_vendor == X86_VENDOR_HYGON) {
 945                if (__cache_amd_cpumap_setup(cpu, index, base))
 946                        return;
 947        }
 948
 949        this_leaf = this_cpu_ci->info_list + index;
 950        num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
 951
 952        cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
 953        if (num_threads_sharing == 1)
 954                return;
 955
 956        index_msb = get_count_order(num_threads_sharing);
 957
 958        for_each_online_cpu(i)
 959                if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
 960                        struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
 961
 962                        if (i == cpu || !sib_cpu_ci->info_list)
 963                                continue;/* skip if itself or no cacheinfo */
 964                        sibling_leaf = sib_cpu_ci->info_list + index;
 965                        cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
 966                        cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
 967                }
 968}
 969
 970static void ci_leaf_init(struct cacheinfo *this_leaf,
 971                         struct _cpuid4_info_regs *base)
 972{
 973        this_leaf->id = base->id;
 974        this_leaf->attributes = CACHE_ID;
 975        this_leaf->level = base->eax.split.level;
 976        this_leaf->type = cache_type_map[base->eax.split.type];
 977        this_leaf->coherency_line_size =
 978                                base->ebx.split.coherency_line_size + 1;
 979        this_leaf->ways_of_associativity =
 980                                base->ebx.split.ways_of_associativity + 1;
 981        this_leaf->size = base->size;
 982        this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
 983        this_leaf->physical_line_partition =
 984                                base->ebx.split.physical_line_partition + 1;
 985        this_leaf->priv = base->nb;
 986}
 987
 988static int __init_cache_level(unsigned int cpu)
 989{
 990        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 991
 992        if (!num_cache_leaves)
 993                return -ENOENT;
 994        if (!this_cpu_ci)
 995                return -EINVAL;
 996        this_cpu_ci->num_levels = 3;
 997        this_cpu_ci->num_leaves = num_cache_leaves;
 998        return 0;
 999}
1000
1001/*
1002 * The max shared threads number comes from CPUID.4:EAX[25-14] with input
1003 * ECX as cache index. Then right shift apicid by the number's order to get
1004 * cache id for this cache node.
1005 */
1006static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
1007{
1008        struct cpuinfo_x86 *c = &cpu_data(cpu);
1009        unsigned long num_threads_sharing;
1010        int index_msb;
1011
1012        num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1013        index_msb = get_count_order(num_threads_sharing);
1014        id4_regs->id = c->apicid >> index_msb;
1015}
1016
1017static int __populate_cache_leaves(unsigned int cpu)
1018{
1019        unsigned int idx, ret;
1020        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1021        struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1022        struct _cpuid4_info_regs id4_regs = {};
1023
1024        for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1025                ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1026                if (ret)
1027                        return ret;
1028                get_cache_id(cpu, &id4_regs);
1029                ci_leaf_init(this_leaf++, &id4_regs);
1030                __cache_cpumap_setup(cpu, idx, &id4_regs);
1031        }
1032        this_cpu_ci->cpu_map_populated = true;
1033
1034        return 0;
1035}
1036
1037DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
1038DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
1039