linux/arch/x86/kernel/cpu/cacheinfo.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *      Routines to identify caches on Intel CPU.
   4 *
   5 *      Changes:
   6 *      Venkatesh Pallipadi     : Adding cache identification through cpuid(4)
   7 *      Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
   8 *      Andi Kleen / Andreas Herrmann   : CPUID4 emulation on AMD.
   9 */
  10
  11#include <linux/slab.h>
  12#include <linux/cacheinfo.h>
  13#include <linux/cpu.h>
  14#include <linux/sched.h>
  15#include <linux/capability.h>
  16#include <linux/sysfs.h>
  17#include <linux/pci.h>
  18
  19#include <asm/cpufeature.h>
  20#include <asm/cacheinfo.h>
  21#include <asm/amd_nb.h>
  22#include <asm/smp.h>
  23
  24#include "cpu.h"
  25
  26#define LVL_1_INST      1
  27#define LVL_1_DATA      2
  28#define LVL_2           3
  29#define LVL_3           4
  30#define LVL_TRACE       5
  31
  32struct _cache_table {
  33        unsigned char descriptor;
  34        char cache_type;
  35        short size;
  36};
  37
  38#define MB(x)   ((x) * 1024)
  39
  40/* All the cache descriptor types we care about (no TLB or
  41   trace cache entries) */
  42
  43static const struct _cache_table cache_table[] =
  44{
  45        { 0x06, LVL_1_INST, 8 },        /* 4-way set assoc, 32 byte line size */
  46        { 0x08, LVL_1_INST, 16 },       /* 4-way set assoc, 32 byte line size */
  47        { 0x09, LVL_1_INST, 32 },       /* 4-way set assoc, 64 byte line size */
  48        { 0x0a, LVL_1_DATA, 8 },        /* 2 way set assoc, 32 byte line size */
  49        { 0x0c, LVL_1_DATA, 16 },       /* 4-way set assoc, 32 byte line size */
  50        { 0x0d, LVL_1_DATA, 16 },       /* 4-way set assoc, 64 byte line size */
  51        { 0x0e, LVL_1_DATA, 24 },       /* 6-way set assoc, 64 byte line size */
  52        { 0x21, LVL_2,      256 },      /* 8-way set assoc, 64 byte line size */
  53        { 0x22, LVL_3,      512 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  54        { 0x23, LVL_3,      MB(1) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  55        { 0x25, LVL_3,      MB(2) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  56        { 0x29, LVL_3,      MB(4) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  57        { 0x2c, LVL_1_DATA, 32 },       /* 8-way set assoc, 64 byte line size */
  58        { 0x30, LVL_1_INST, 32 },       /* 8-way set assoc, 64 byte line size */
  59        { 0x39, LVL_2,      128 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  60        { 0x3a, LVL_2,      192 },      /* 6-way set assoc, sectored cache, 64 byte line size */
  61        { 0x3b, LVL_2,      128 },      /* 2-way set assoc, sectored cache, 64 byte line size */
  62        { 0x3c, LVL_2,      256 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  63        { 0x3d, LVL_2,      384 },      /* 6-way set assoc, sectored cache, 64 byte line size */
  64        { 0x3e, LVL_2,      512 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  65        { 0x3f, LVL_2,      256 },      /* 2-way set assoc, 64 byte line size */
  66        { 0x41, LVL_2,      128 },      /* 4-way set assoc, 32 byte line size */
  67        { 0x42, LVL_2,      256 },      /* 4-way set assoc, 32 byte line size */
  68        { 0x43, LVL_2,      512 },      /* 4-way set assoc, 32 byte line size */
  69        { 0x44, LVL_2,      MB(1) },    /* 4-way set assoc, 32 byte line size */
  70        { 0x45, LVL_2,      MB(2) },    /* 4-way set assoc, 32 byte line size */
  71        { 0x46, LVL_3,      MB(4) },    /* 4-way set assoc, 64 byte line size */
  72        { 0x47, LVL_3,      MB(8) },    /* 8-way set assoc, 64 byte line size */
  73        { 0x48, LVL_2,      MB(3) },    /* 12-way set assoc, 64 byte line size */
  74        { 0x49, LVL_3,      MB(4) },    /* 16-way set assoc, 64 byte line size */
  75        { 0x4a, LVL_3,      MB(6) },    /* 12-way set assoc, 64 byte line size */
  76        { 0x4b, LVL_3,      MB(8) },    /* 16-way set assoc, 64 byte line size */
  77        { 0x4c, LVL_3,      MB(12) },   /* 12-way set assoc, 64 byte line size */
  78        { 0x4d, LVL_3,      MB(16) },   /* 16-way set assoc, 64 byte line size */
  79        { 0x4e, LVL_2,      MB(6) },    /* 24-way set assoc, 64 byte line size */
  80        { 0x60, LVL_1_DATA, 16 },       /* 8-way set assoc, sectored cache, 64 byte line size */
  81        { 0x66, LVL_1_DATA, 8 },        /* 4-way set assoc, sectored cache, 64 byte line size */
  82        { 0x67, LVL_1_DATA, 16 },       /* 4-way set assoc, sectored cache, 64 byte line size */
  83        { 0x68, LVL_1_DATA, 32 },       /* 4-way set assoc, sectored cache, 64 byte line size */
  84        { 0x70, LVL_TRACE,  12 },       /* 8-way set assoc */
  85        { 0x71, LVL_TRACE,  16 },       /* 8-way set assoc */
  86        { 0x72, LVL_TRACE,  32 },       /* 8-way set assoc */
  87        { 0x73, LVL_TRACE,  64 },       /* 8-way set assoc */
  88        { 0x78, LVL_2,      MB(1) },    /* 4-way set assoc, 64 byte line size */
  89        { 0x79, LVL_2,      128 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  90        { 0x7a, LVL_2,      256 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  91        { 0x7b, LVL_2,      512 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  92        { 0x7c, LVL_2,      MB(1) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  93        { 0x7d, LVL_2,      MB(2) },    /* 8-way set assoc, 64 byte line size */
  94        { 0x7f, LVL_2,      512 },      /* 2-way set assoc, 64 byte line size */
  95        { 0x80, LVL_2,      512 },      /* 8-way set assoc, 64 byte line size */
  96        { 0x82, LVL_2,      256 },      /* 8-way set assoc, 32 byte line size */
  97        { 0x83, LVL_2,      512 },      /* 8-way set assoc, 32 byte line size */
  98        { 0x84, LVL_2,      MB(1) },    /* 8-way set assoc, 32 byte line size */
  99        { 0x85, LVL_2,      MB(2) },    /* 8-way set assoc, 32 byte line size */
 100        { 0x86, LVL_2,      512 },      /* 4-way set assoc, 64 byte line size */
 101        { 0x87, LVL_2,      MB(1) },    /* 8-way set assoc, 64 byte line size */
 102        { 0xd0, LVL_3,      512 },      /* 4-way set assoc, 64 byte line size */
 103        { 0xd1, LVL_3,      MB(1) },    /* 4-way set assoc, 64 byte line size */
 104        { 0xd2, LVL_3,      MB(2) },    /* 4-way set assoc, 64 byte line size */
 105        { 0xd6, LVL_3,      MB(1) },    /* 8-way set assoc, 64 byte line size */
 106        { 0xd7, LVL_3,      MB(2) },    /* 8-way set assoc, 64 byte line size */
 107        { 0xd8, LVL_3,      MB(4) },    /* 12-way set assoc, 64 byte line size */
 108        { 0xdc, LVL_3,      MB(2) },    /* 12-way set assoc, 64 byte line size */
 109        { 0xdd, LVL_3,      MB(4) },    /* 12-way set assoc, 64 byte line size */
 110        { 0xde, LVL_3,      MB(8) },    /* 12-way set assoc, 64 byte line size */
 111        { 0xe2, LVL_3,      MB(2) },    /* 16-way set assoc, 64 byte line size */
 112        { 0xe3, LVL_3,      MB(4) },    /* 16-way set assoc, 64 byte line size */
 113        { 0xe4, LVL_3,      MB(8) },    /* 16-way set assoc, 64 byte line size */
 114        { 0xea, LVL_3,      MB(12) },   /* 24-way set assoc, 64 byte line size */
 115        { 0xeb, LVL_3,      MB(18) },   /* 24-way set assoc, 64 byte line size */
 116        { 0xec, LVL_3,      MB(24) },   /* 24-way set assoc, 64 byte line size */
 117        { 0x00, 0, 0}
 118};
 119
 120
 121enum _cache_type {
 122        CTYPE_NULL = 0,
 123        CTYPE_DATA = 1,
 124        CTYPE_INST = 2,
 125        CTYPE_UNIFIED = 3
 126};
 127
 128union _cpuid4_leaf_eax {
 129        struct {
 130                enum _cache_type        type:5;
 131                unsigned int            level:3;
 132                unsigned int            is_self_initializing:1;
 133                unsigned int            is_fully_associative:1;
 134                unsigned int            reserved:4;
 135                unsigned int            num_threads_sharing:12;
 136                unsigned int            num_cores_on_die:6;
 137        } split;
 138        u32 full;
 139};
 140
 141union _cpuid4_leaf_ebx {
 142        struct {
 143                unsigned int            coherency_line_size:12;
 144                unsigned int            physical_line_partition:10;
 145                unsigned int            ways_of_associativity:10;
 146        } split;
 147        u32 full;
 148};
 149
 150union _cpuid4_leaf_ecx {
 151        struct {
 152                unsigned int            number_of_sets:32;
 153        } split;
 154        u32 full;
 155};
 156
 157struct _cpuid4_info_regs {
 158        union _cpuid4_leaf_eax eax;
 159        union _cpuid4_leaf_ebx ebx;
 160        union _cpuid4_leaf_ecx ecx;
 161        unsigned int id;
 162        unsigned long size;
 163        struct amd_northbridge *nb;
 164};
 165
 166static unsigned short num_cache_leaves;
 167
 168/* AMD doesn't have CPUID4. Emulate it here to report the same
 169   information to the user.  This makes some assumptions about the machine:
 170   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
 171
 172   In theory the TLBs could be reported as fake type (they are in "dummy").
 173   Maybe later */
 174union l1_cache {
 175        struct {
 176                unsigned line_size:8;
 177                unsigned lines_per_tag:8;
 178                unsigned assoc:8;
 179                unsigned size_in_kb:8;
 180        };
 181        unsigned val;
 182};
 183
 184union l2_cache {
 185        struct {
 186                unsigned line_size:8;
 187                unsigned lines_per_tag:4;
 188                unsigned assoc:4;
 189                unsigned size_in_kb:16;
 190        };
 191        unsigned val;
 192};
 193
 194union l3_cache {
 195        struct {
 196                unsigned line_size:8;
 197                unsigned lines_per_tag:4;
 198                unsigned assoc:4;
 199                unsigned res:2;
 200                unsigned size_encoded:14;
 201        };
 202        unsigned val;
 203};
 204
 205static const unsigned short assocs[] = {
 206        [1] = 1,
 207        [2] = 2,
 208        [4] = 4,
 209        [6] = 8,
 210        [8] = 16,
 211        [0xa] = 32,
 212        [0xb] = 48,
 213        [0xc] = 64,
 214        [0xd] = 96,
 215        [0xe] = 128,
 216        [0xf] = 0xffff /* fully associative - no way to show this currently */
 217};
 218
 219static const unsigned char levels[] = { 1, 1, 2, 3 };
 220static const unsigned char types[] = { 1, 2, 3, 3 };
 221
 222static const enum cache_type cache_type_map[] = {
 223        [CTYPE_NULL] = CACHE_TYPE_NOCACHE,
 224        [CTYPE_DATA] = CACHE_TYPE_DATA,
 225        [CTYPE_INST] = CACHE_TYPE_INST,
 226        [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
 227};
 228
 229static void
 230amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 231                     union _cpuid4_leaf_ebx *ebx,
 232                     union _cpuid4_leaf_ecx *ecx)
 233{
 234        unsigned dummy;
 235        unsigned line_size, lines_per_tag, assoc, size_in_kb;
 236        union l1_cache l1i, l1d;
 237        union l2_cache l2;
 238        union l3_cache l3;
 239        union l1_cache *l1 = &l1d;
 240
 241        eax->full = 0;
 242        ebx->full = 0;
 243        ecx->full = 0;
 244
 245        cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
 246        cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
 247
 248        switch (leaf) {
 249        case 1:
 250                l1 = &l1i;
 251                /* fall through */
 252        case 0:
 253                if (!l1->val)
 254                        return;
 255                assoc = assocs[l1->assoc];
 256                line_size = l1->line_size;
 257                lines_per_tag = l1->lines_per_tag;
 258                size_in_kb = l1->size_in_kb;
 259                break;
 260        case 2:
 261                if (!l2.val)
 262                        return;
 263                assoc = assocs[l2.assoc];
 264                line_size = l2.line_size;
 265                lines_per_tag = l2.lines_per_tag;
 266                /* cpu_data has errata corrections for K7 applied */
 267                size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
 268                break;
 269        case 3:
 270                if (!l3.val)
 271                        return;
 272                assoc = assocs[l3.assoc];
 273                line_size = l3.line_size;
 274                lines_per_tag = l3.lines_per_tag;
 275                size_in_kb = l3.size_encoded * 512;
 276                if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
 277                        size_in_kb = size_in_kb >> 1;
 278                        assoc = assoc >> 1;
 279                }
 280                break;
 281        default:
 282                return;
 283        }
 284
 285        eax->split.is_self_initializing = 1;
 286        eax->split.type = types[leaf];
 287        eax->split.level = levels[leaf];
 288        eax->split.num_threads_sharing = 0;
 289        eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
 290
 291
 292        if (assoc == 0xffff)
 293                eax->split.is_fully_associative = 1;
 294        ebx->split.coherency_line_size = line_size - 1;
 295        ebx->split.ways_of_associativity = assoc - 1;
 296        ebx->split.physical_line_partition = lines_per_tag - 1;
 297        ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
 298                (ebx->split.ways_of_associativity + 1) - 1;
 299}
 300
 301#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
 302
 303/*
 304 * L3 cache descriptors
 305 */
 306static void amd_calc_l3_indices(struct amd_northbridge *nb)
 307{
 308        struct amd_l3_cache *l3 = &nb->l3_cache;
 309        unsigned int sc0, sc1, sc2, sc3;
 310        u32 val = 0;
 311
 312        pci_read_config_dword(nb->misc, 0x1C4, &val);
 313
 314        /* calculate subcache sizes */
 315        l3->subcaches[0] = sc0 = !(val & BIT(0));
 316        l3->subcaches[1] = sc1 = !(val & BIT(4));
 317
 318        if (boot_cpu_data.x86 == 0x15) {
 319                l3->subcaches[0] = sc0 += !(val & BIT(1));
 320                l3->subcaches[1] = sc1 += !(val & BIT(5));
 321        }
 322
 323        l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 324        l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 325
 326        l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 327}
 328
 329/*
 330 * check whether a slot used for disabling an L3 index is occupied.
 331 * @l3: L3 cache descriptor
 332 * @slot: slot number (0..1)
 333 *
 334 * @returns: the disabled index if used or negative value if slot free.
 335 */
 336static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
 337{
 338        unsigned int reg = 0;
 339
 340        pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
 341
 342        /* check whether this slot is activated already */
 343        if (reg & (3UL << 30))
 344                return reg & 0xfff;
 345
 346        return -1;
 347}
 348
 349static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
 350                                  unsigned int slot)
 351{
 352        int index;
 353        struct amd_northbridge *nb = this_leaf->priv;
 354
 355        index = amd_get_l3_disable_slot(nb, slot);
 356        if (index >= 0)
 357                return sprintf(buf, "%d\n", index);
 358
 359        return sprintf(buf, "FREE\n");
 360}
 361
 362#define SHOW_CACHE_DISABLE(slot)                                        \
 363static ssize_t                                                          \
 364cache_disable_##slot##_show(struct device *dev,                         \
 365                            struct device_attribute *attr, char *buf)   \
 366{                                                                       \
 367        struct cacheinfo *this_leaf = dev_get_drvdata(dev);             \
 368        return show_cache_disable(this_leaf, buf, slot);                \
 369}
 370SHOW_CACHE_DISABLE(0)
 371SHOW_CACHE_DISABLE(1)
 372
 373static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
 374                                 unsigned slot, unsigned long idx)
 375{
 376        int i;
 377
 378        idx |= BIT(30);
 379
 380        /*
 381         *  disable index in all 4 subcaches
 382         */
 383        for (i = 0; i < 4; i++) {
 384                u32 reg = idx | (i << 20);
 385
 386                if (!nb->l3_cache.subcaches[i])
 387                        continue;
 388
 389                pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 390
 391                /*
 392                 * We need to WBINVD on a core on the node containing the L3
 393                 * cache which indices we disable therefore a simple wbinvd()
 394                 * is not sufficient.
 395                 */
 396                wbinvd_on_cpu(cpu);
 397
 398                reg |= BIT(31);
 399                pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 400        }
 401}
 402
 403/*
 404 * disable a L3 cache index by using a disable-slot
 405 *
 406 * @l3:    L3 cache descriptor
 407 * @cpu:   A CPU on the node containing the L3 cache
 408 * @slot:  slot number (0..1)
 409 * @index: index to disable
 410 *
 411 * @return: 0 on success, error status on failure
 412 */
 413static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
 414                            unsigned slot, unsigned long index)
 415{
 416        int ret = 0;
 417
 418        /*  check if @slot is already used or the index is already disabled */
 419        ret = amd_get_l3_disable_slot(nb, slot);
 420        if (ret >= 0)
 421                return -EEXIST;
 422
 423        if (index > nb->l3_cache.indices)
 424                return -EINVAL;
 425
 426        /* check whether the other slot has disabled the same index already */
 427        if (index == amd_get_l3_disable_slot(nb, !slot))
 428                return -EEXIST;
 429
 430        amd_l3_disable_index(nb, cpu, slot, index);
 431
 432        return 0;
 433}
 434
 435static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
 436                                   const char *buf, size_t count,
 437                                   unsigned int slot)
 438{
 439        unsigned long val = 0;
 440        int cpu, err = 0;
 441        struct amd_northbridge *nb = this_leaf->priv;
 442
 443        if (!capable(CAP_SYS_ADMIN))
 444                return -EPERM;
 445
 446        cpu = cpumask_first(&this_leaf->shared_cpu_map);
 447
 448        if (kstrtoul(buf, 10, &val) < 0)
 449                return -EINVAL;
 450
 451        err = amd_set_l3_disable_slot(nb, cpu, slot, val);
 452        if (err) {
 453                if (err == -EEXIST)
 454                        pr_warn("L3 slot %d in use/index already disabled!\n",
 455                                   slot);
 456                return err;
 457        }
 458        return count;
 459}
 460
 461#define STORE_CACHE_DISABLE(slot)                                       \
 462static ssize_t                                                          \
 463cache_disable_##slot##_store(struct device *dev,                        \
 464                             struct device_attribute *attr,             \
 465                             const char *buf, size_t count)             \
 466{                                                                       \
 467        struct cacheinfo *this_leaf = dev_get_drvdata(dev);             \
 468        return store_cache_disable(this_leaf, buf, count, slot);        \
 469}
 470STORE_CACHE_DISABLE(0)
 471STORE_CACHE_DISABLE(1)
 472
 473static ssize_t subcaches_show(struct device *dev,
 474                              struct device_attribute *attr, char *buf)
 475{
 476        struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 477        int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 478
 479        return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
 480}
 481
 482static ssize_t subcaches_store(struct device *dev,
 483                               struct device_attribute *attr,
 484                               const char *buf, size_t count)
 485{
 486        struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 487        int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 488        unsigned long val;
 489
 490        if (!capable(CAP_SYS_ADMIN))
 491                return -EPERM;
 492
 493        if (kstrtoul(buf, 16, &val) < 0)
 494                return -EINVAL;
 495
 496        if (amd_set_subcaches(cpu, val))
 497                return -EINVAL;
 498
 499        return count;
 500}
 501
 502static DEVICE_ATTR_RW(cache_disable_0);
 503static DEVICE_ATTR_RW(cache_disable_1);
 504static DEVICE_ATTR_RW(subcaches);
 505
 506static umode_t
 507cache_private_attrs_is_visible(struct kobject *kobj,
 508                               struct attribute *attr, int unused)
 509{
 510        struct device *dev = kobj_to_dev(kobj);
 511        struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 512        umode_t mode = attr->mode;
 513
 514        if (!this_leaf->priv)
 515                return 0;
 516
 517        if ((attr == &dev_attr_subcaches.attr) &&
 518            amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 519                return mode;
 520
 521        if ((attr == &dev_attr_cache_disable_0.attr ||
 522             attr == &dev_attr_cache_disable_1.attr) &&
 523            amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 524                return mode;
 525
 526        return 0;
 527}
 528
 529static struct attribute_group cache_private_group = {
 530        .is_visible = cache_private_attrs_is_visible,
 531};
 532
 533static void init_amd_l3_attrs(void)
 534{
 535        int n = 1;
 536        static struct attribute **amd_l3_attrs;
 537
 538        if (amd_l3_attrs) /* already initialized */
 539                return;
 540
 541        if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 542                n += 2;
 543        if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 544                n += 1;
 545
 546        amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
 547        if (!amd_l3_attrs)
 548                return;
 549
 550        n = 0;
 551        if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
 552                amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
 553                amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
 554        }
 555        if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 556                amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
 557
 558        cache_private_group.attrs = amd_l3_attrs;
 559}
 560
 561const struct attribute_group *
 562cache_get_priv_group(struct cacheinfo *this_leaf)
 563{
 564        struct amd_northbridge *nb = this_leaf->priv;
 565
 566        if (this_leaf->level < 3 || !nb)
 567                return NULL;
 568
 569        if (nb && nb->l3_cache.indices)
 570                init_amd_l3_attrs();
 571
 572        return &cache_private_group;
 573}
 574
 575static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
 576{
 577        int node;
 578
 579        /* only for L3, and not in virtualized environments */
 580        if (index < 3)
 581                return;
 582
 583        node = amd_get_nb_id(smp_processor_id());
 584        this_leaf->nb = node_to_amd_nb(node);
 585        if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
 586                amd_calc_l3_indices(this_leaf->nb);
 587}
 588#else
 589#define amd_init_l3_cache(x, y)
 590#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
 591
 592static int
 593cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
 594{
 595        union _cpuid4_leaf_eax  eax;
 596        union _cpuid4_leaf_ebx  ebx;
 597        union _cpuid4_leaf_ecx  ecx;
 598        unsigned                edx;
 599
 600        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 601                if (boot_cpu_has(X86_FEATURE_TOPOEXT))
 602                        cpuid_count(0x8000001d, index, &eax.full,
 603                                    &ebx.full, &ecx.full, &edx);
 604                else
 605                        amd_cpuid4(index, &eax, &ebx, &ecx);
 606                amd_init_l3_cache(this_leaf, index);
 607        } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
 608                cpuid_count(0x8000001d, index, &eax.full,
 609                            &ebx.full, &ecx.full, &edx);
 610                amd_init_l3_cache(this_leaf, index);
 611        } else {
 612                cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 613        }
 614
 615        if (eax.split.type == CTYPE_NULL)
 616                return -EIO; /* better error ? */
 617
 618        this_leaf->eax = eax;
 619        this_leaf->ebx = ebx;
 620        this_leaf->ecx = ecx;
 621        this_leaf->size = (ecx.split.number_of_sets          + 1) *
 622                          (ebx.split.coherency_line_size     + 1) *
 623                          (ebx.split.physical_line_partition + 1) *
 624                          (ebx.split.ways_of_associativity   + 1);
 625        return 0;
 626}
 627
 628static int find_num_cache_leaves(struct cpuinfo_x86 *c)
 629{
 630        unsigned int            eax, ebx, ecx, edx, op;
 631        union _cpuid4_leaf_eax  cache_eax;
 632        int                     i = -1;
 633
 634        if (c->x86_vendor == X86_VENDOR_AMD ||
 635            c->x86_vendor == X86_VENDOR_HYGON)
 636                op = 0x8000001d;
 637        else
 638                op = 4;
 639
 640        do {
 641                ++i;
 642                /* Do cpuid(op) loop to find out num_cache_leaves */
 643                cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
 644                cache_eax.full = eax;
 645        } while (cache_eax.split.type != CTYPE_NULL);
 646        return i;
 647}
 648
 649void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
 650{
 651        /*
 652         * We may have multiple LLCs if L3 caches exist, so check if we
 653         * have an L3 cache by looking at the L3 cache CPUID leaf.
 654         */
 655        if (!cpuid_edx(0x80000006))
 656                return;
 657
 658        if (c->x86 < 0x17) {
 659                /* LLC is at the node level. */
 660                per_cpu(cpu_llc_id, cpu) = node_id;
 661        } else if (c->x86 == 0x17 &&
 662                   c->x86_model >= 0 && c->x86_model <= 0x1F) {
 663                /*
 664                 * LLC is at the core complex level.
 665                 * Core complex ID is ApicId[3] for these processors.
 666                 */
 667                per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
 668        } else {
 669                /*
 670                 * LLC ID is calculated from the number of threads sharing the
 671                 * cache.
 672                 * */
 673                u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
 674                u32 llc_index = find_num_cache_leaves(c) - 1;
 675
 676                cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
 677                if (eax)
 678                        num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
 679
 680                if (num_sharing_cache) {
 681                        int bits = get_count_order(num_sharing_cache);
 682
 683                        per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
 684                }
 685        }
 686}
 687
 688void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
 689{
 690        /*
 691         * We may have multiple LLCs if L3 caches exist, so check if we
 692         * have an L3 cache by looking at the L3 cache CPUID leaf.
 693         */
 694        if (!cpuid_edx(0x80000006))
 695                return;
 696
 697        /*
 698         * LLC is at the core complex level.
 699         * Core complex ID is ApicId[3] for these processors.
 700         */
 701        per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
 702}
 703
 704void init_amd_cacheinfo(struct cpuinfo_x86 *c)
 705{
 706
 707        if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 708                num_cache_leaves = find_num_cache_leaves(c);
 709        } else if (c->extended_cpuid_level >= 0x80000006) {
 710                if (cpuid_edx(0x80000006) & 0xf000)
 711                        num_cache_leaves = 4;
 712                else
 713                        num_cache_leaves = 3;
 714        }
 715}
 716
 717void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
 718{
 719        num_cache_leaves = find_num_cache_leaves(c);
 720}
 721
 722void init_intel_cacheinfo(struct cpuinfo_x86 *c)
 723{
 724        /* Cache sizes */
 725        unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
 726        unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 727        unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 728        unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
 729#ifdef CONFIG_SMP
 730        unsigned int cpu = c->cpu_index;
 731#endif
 732
 733        if (c->cpuid_level > 3) {
 734                static int is_initialized;
 735
 736                if (is_initialized == 0) {
 737                        /* Init num_cache_leaves from boot CPU */
 738                        num_cache_leaves = find_num_cache_leaves(c);
 739                        is_initialized++;
 740                }
 741
 742                /*
 743                 * Whenever possible use cpuid(4), deterministic cache
 744                 * parameters cpuid leaf to find the cache details
 745                 */
 746                for (i = 0; i < num_cache_leaves; i++) {
 747                        struct _cpuid4_info_regs this_leaf = {};
 748                        int retval;
 749
 750                        retval = cpuid4_cache_lookup_regs(i, &this_leaf);
 751                        if (retval < 0)
 752                                continue;
 753
 754                        switch (this_leaf.eax.split.level) {
 755                        case 1:
 756                                if (this_leaf.eax.split.type == CTYPE_DATA)
 757                                        new_l1d = this_leaf.size/1024;
 758                                else if (this_leaf.eax.split.type == CTYPE_INST)
 759                                        new_l1i = this_leaf.size/1024;
 760                                break;
 761                        case 2:
 762                                new_l2 = this_leaf.size/1024;
 763                                num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 764                                index_msb = get_count_order(num_threads_sharing);
 765                                l2_id = c->apicid & ~((1 << index_msb) - 1);
 766                                break;
 767                        case 3:
 768                                new_l3 = this_leaf.size/1024;
 769                                num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 770                                index_msb = get_count_order(num_threads_sharing);
 771                                l3_id = c->apicid & ~((1 << index_msb) - 1);
 772                                break;
 773                        default:
 774                                break;
 775                        }
 776                }
 777        }
 778        /*
 779         * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 780         * trace cache
 781         */
 782        if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
 783                /* supports eax=2  call */
 784                int j, n;
 785                unsigned int regs[4];
 786                unsigned char *dp = (unsigned char *)regs;
 787                int only_trace = 0;
 788
 789                if (num_cache_leaves != 0 && c->x86 == 15)
 790                        only_trace = 1;
 791
 792                /* Number of times to iterate */
 793                n = cpuid_eax(2) & 0xFF;
 794
 795                for (i = 0 ; i < n ; i++) {
 796                        cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
 797
 798                        /* If bit 31 is set, this is an unknown format */
 799                        for (j = 0 ; j < 3 ; j++)
 800                                if (regs[j] & (1 << 31))
 801                                        regs[j] = 0;
 802
 803                        /* Byte 0 is level count, not a descriptor */
 804                        for (j = 1 ; j < 16 ; j++) {
 805                                unsigned char des = dp[j];
 806                                unsigned char k = 0;
 807
 808                                /* look up this descriptor in the table */
 809                                while (cache_table[k].descriptor != 0) {
 810                                        if (cache_table[k].descriptor == des) {
 811                                                if (only_trace && cache_table[k].cache_type != LVL_TRACE)
 812                                                        break;
 813                                                switch (cache_table[k].cache_type) {
 814                                                case LVL_1_INST:
 815                                                        l1i += cache_table[k].size;
 816                                                        break;
 817                                                case LVL_1_DATA:
 818                                                        l1d += cache_table[k].size;
 819                                                        break;
 820                                                case LVL_2:
 821                                                        l2 += cache_table[k].size;
 822                                                        break;
 823                                                case LVL_3:
 824                                                        l3 += cache_table[k].size;
 825                                                        break;
 826                                                case LVL_TRACE:
 827                                                        trace += cache_table[k].size;
 828                                                        break;
 829                                                }
 830
 831                                                break;
 832                                        }
 833
 834                                        k++;
 835                                }
 836                        }
 837                }
 838        }
 839
 840        if (new_l1d)
 841                l1d = new_l1d;
 842
 843        if (new_l1i)
 844                l1i = new_l1i;
 845
 846        if (new_l2) {
 847                l2 = new_l2;
 848#ifdef CONFIG_SMP
 849                per_cpu(cpu_llc_id, cpu) = l2_id;
 850#endif
 851        }
 852
 853        if (new_l3) {
 854                l3 = new_l3;
 855#ifdef CONFIG_SMP
 856                per_cpu(cpu_llc_id, cpu) = l3_id;
 857#endif
 858        }
 859
 860#ifdef CONFIG_SMP
 861        /*
 862         * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
 863         * turns means that the only possibility is SMT (as indicated in
 864         * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
 865         * that SMT shares all caches, we can unconditionally set cpu_llc_id to
 866         * c->phys_proc_id.
 867         */
 868        if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
 869                per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
 870#endif
 871
 872        c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 873
 874        if (!l2)
 875                cpu_detect_cache_sizes(c);
 876}
 877
 878static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
 879                                    struct _cpuid4_info_regs *base)
 880{
 881        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 882        struct cacheinfo *this_leaf;
 883        int i, sibling;
 884
 885        /*
 886         * For L3, always use the pre-calculated cpu_llc_shared_mask
 887         * to derive shared_cpu_map.
 888         */
 889        if (index == 3) {
 890                for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
 891                        this_cpu_ci = get_cpu_cacheinfo(i);
 892                        if (!this_cpu_ci->info_list)
 893                                continue;
 894                        this_leaf = this_cpu_ci->info_list + index;
 895                        for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
 896                                if (!cpu_online(sibling))
 897                                        continue;
 898                                cpumask_set_cpu(sibling,
 899                                                &this_leaf->shared_cpu_map);
 900                        }
 901                }
 902        } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 903                unsigned int apicid, nshared, first, last;
 904
 905                nshared = base->eax.split.num_threads_sharing + 1;
 906                apicid = cpu_data(cpu).apicid;
 907                first = apicid - (apicid % nshared);
 908                last = first + nshared - 1;
 909
 910                for_each_online_cpu(i) {
 911                        this_cpu_ci = get_cpu_cacheinfo(i);
 912                        if (!this_cpu_ci->info_list)
 913                                continue;
 914
 915                        apicid = cpu_data(i).apicid;
 916                        if ((apicid < first) || (apicid > last))
 917                                continue;
 918
 919                        this_leaf = this_cpu_ci->info_list + index;
 920
 921                        for_each_online_cpu(sibling) {
 922                                apicid = cpu_data(sibling).apicid;
 923                                if ((apicid < first) || (apicid > last))
 924                                        continue;
 925                                cpumask_set_cpu(sibling,
 926                                                &this_leaf->shared_cpu_map);
 927                        }
 928                }
 929        } else
 930                return 0;
 931
 932        return 1;
 933}
 934
 935static void __cache_cpumap_setup(unsigned int cpu, int index,
 936                                 struct _cpuid4_info_regs *base)
 937{
 938        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 939        struct cacheinfo *this_leaf, *sibling_leaf;
 940        unsigned long num_threads_sharing;
 941        int index_msb, i;
 942        struct cpuinfo_x86 *c = &cpu_data(cpu);
 943
 944        if (c->x86_vendor == X86_VENDOR_AMD ||
 945            c->x86_vendor == X86_VENDOR_HYGON) {
 946                if (__cache_amd_cpumap_setup(cpu, index, base))
 947                        return;
 948        }
 949
 950        this_leaf = this_cpu_ci->info_list + index;
 951        num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
 952
 953        cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
 954        if (num_threads_sharing == 1)
 955                return;
 956
 957        index_msb = get_count_order(num_threads_sharing);
 958
 959        for_each_online_cpu(i)
 960                if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
 961                        struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
 962
 963                        if (i == cpu || !sib_cpu_ci->info_list)
 964                                continue;/* skip if itself or no cacheinfo */
 965                        sibling_leaf = sib_cpu_ci->info_list + index;
 966                        cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
 967                        cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
 968                }
 969}
 970
 971static void ci_leaf_init(struct cacheinfo *this_leaf,
 972                         struct _cpuid4_info_regs *base)
 973{
 974        this_leaf->id = base->id;
 975        this_leaf->attributes = CACHE_ID;
 976        this_leaf->level = base->eax.split.level;
 977        this_leaf->type = cache_type_map[base->eax.split.type];
 978        this_leaf->coherency_line_size =
 979                                base->ebx.split.coherency_line_size + 1;
 980        this_leaf->ways_of_associativity =
 981                                base->ebx.split.ways_of_associativity + 1;
 982        this_leaf->size = base->size;
 983        this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
 984        this_leaf->physical_line_partition =
 985                                base->ebx.split.physical_line_partition + 1;
 986        this_leaf->priv = base->nb;
 987}
 988
 989static int __init_cache_level(unsigned int cpu)
 990{
 991        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 992
 993        if (!num_cache_leaves)
 994                return -ENOENT;
 995        if (!this_cpu_ci)
 996                return -EINVAL;
 997        this_cpu_ci->num_levels = 3;
 998        this_cpu_ci->num_leaves = num_cache_leaves;
 999        return 0;
1000}
1001
1002/*
1003 * The max shared threads number comes from CPUID.4:EAX[25-14] with input
1004 * ECX as cache index. Then right shift apicid by the number's order to get
1005 * cache id for this cache node.
1006 */
1007static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
1008{
1009        struct cpuinfo_x86 *c = &cpu_data(cpu);
1010        unsigned long num_threads_sharing;
1011        int index_msb;
1012
1013        num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1014        index_msb = get_count_order(num_threads_sharing);
1015        id4_regs->id = c->apicid >> index_msb;
1016}
1017
1018static int __populate_cache_leaves(unsigned int cpu)
1019{
1020        unsigned int idx, ret;
1021        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1022        struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1023        struct _cpuid4_info_regs id4_regs = {};
1024
1025        for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1026                ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1027                if (ret)
1028                        return ret;
1029                get_cache_id(cpu, &id4_regs);
1030                ci_leaf_init(this_leaf++, &id4_regs);
1031                __cache_cpumap_setup(cpu, idx, &id4_regs);
1032        }
1033        this_cpu_ci->cpu_map_populated = true;
1034
1035        return 0;
1036}
1037
1038DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
1039DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
1040