linux/arch/x86/kernel/cpu/cacheinfo.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *      Routines to identify caches on Intel CPU.
   4 *
   5 *      Changes:
   6 *      Venkatesh Pallipadi     : Adding cache identification through cpuid(4)
   7 *      Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
   8 *      Andi Kleen / Andreas Herrmann   : CPUID4 emulation on AMD.
   9 */
  10
  11#include <linux/slab.h>
  12#include <linux/cacheinfo.h>
  13#include <linux/cpu.h>
  14#include <linux/sched.h>
  15#include <linux/capability.h>
  16#include <linux/sysfs.h>
  17#include <linux/pci.h>
  18
  19#include <asm/cpufeature.h>
  20#include <asm/amd_nb.h>
  21#include <asm/smp.h>
  22
  23#include "cpu.h"
  24
  25#define LVL_1_INST      1
  26#define LVL_1_DATA      2
  27#define LVL_2           3
  28#define LVL_3           4
  29#define LVL_TRACE       5
  30
  31struct _cache_table {
  32        unsigned char descriptor;
  33        char cache_type;
  34        short size;
  35};
  36
  37#define MB(x)   ((x) * 1024)
  38
  39/* All the cache descriptor types we care about (no TLB or
  40   trace cache entries) */
  41
  42static const struct _cache_table cache_table[] =
  43{
  44        { 0x06, LVL_1_INST, 8 },        /* 4-way set assoc, 32 byte line size */
  45        { 0x08, LVL_1_INST, 16 },       /* 4-way set assoc, 32 byte line size */
  46        { 0x09, LVL_1_INST, 32 },       /* 4-way set assoc, 64 byte line size */
  47        { 0x0a, LVL_1_DATA, 8 },        /* 2 way set assoc, 32 byte line size */
  48        { 0x0c, LVL_1_DATA, 16 },       /* 4-way set assoc, 32 byte line size */
  49        { 0x0d, LVL_1_DATA, 16 },       /* 4-way set assoc, 64 byte line size */
  50        { 0x0e, LVL_1_DATA, 24 },       /* 6-way set assoc, 64 byte line size */
  51        { 0x21, LVL_2,      256 },      /* 8-way set assoc, 64 byte line size */
  52        { 0x22, LVL_3,      512 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  53        { 0x23, LVL_3,      MB(1) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  54        { 0x25, LVL_3,      MB(2) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  55        { 0x29, LVL_3,      MB(4) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  56        { 0x2c, LVL_1_DATA, 32 },       /* 8-way set assoc, 64 byte line size */
  57        { 0x30, LVL_1_INST, 32 },       /* 8-way set assoc, 64 byte line size */
  58        { 0x39, LVL_2,      128 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  59        { 0x3a, LVL_2,      192 },      /* 6-way set assoc, sectored cache, 64 byte line size */
  60        { 0x3b, LVL_2,      128 },      /* 2-way set assoc, sectored cache, 64 byte line size */
  61        { 0x3c, LVL_2,      256 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  62        { 0x3d, LVL_2,      384 },      /* 6-way set assoc, sectored cache, 64 byte line size */
  63        { 0x3e, LVL_2,      512 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  64        { 0x3f, LVL_2,      256 },      /* 2-way set assoc, 64 byte line size */
  65        { 0x41, LVL_2,      128 },      /* 4-way set assoc, 32 byte line size */
  66        { 0x42, LVL_2,      256 },      /* 4-way set assoc, 32 byte line size */
  67        { 0x43, LVL_2,      512 },      /* 4-way set assoc, 32 byte line size */
  68        { 0x44, LVL_2,      MB(1) },    /* 4-way set assoc, 32 byte line size */
  69        { 0x45, LVL_2,      MB(2) },    /* 4-way set assoc, 32 byte line size */
  70        { 0x46, LVL_3,      MB(4) },    /* 4-way set assoc, 64 byte line size */
  71        { 0x47, LVL_3,      MB(8) },    /* 8-way set assoc, 64 byte line size */
  72        { 0x48, LVL_2,      MB(3) },    /* 12-way set assoc, 64 byte line size */
  73        { 0x49, LVL_3,      MB(4) },    /* 16-way set assoc, 64 byte line size */
  74        { 0x4a, LVL_3,      MB(6) },    /* 12-way set assoc, 64 byte line size */
  75        { 0x4b, LVL_3,      MB(8) },    /* 16-way set assoc, 64 byte line size */
  76        { 0x4c, LVL_3,      MB(12) },   /* 12-way set assoc, 64 byte line size */
  77        { 0x4d, LVL_3,      MB(16) },   /* 16-way set assoc, 64 byte line size */
  78        { 0x4e, LVL_2,      MB(6) },    /* 24-way set assoc, 64 byte line size */
  79        { 0x60, LVL_1_DATA, 16 },       /* 8-way set assoc, sectored cache, 64 byte line size */
  80        { 0x66, LVL_1_DATA, 8 },        /* 4-way set assoc, sectored cache, 64 byte line size */
  81        { 0x67, LVL_1_DATA, 16 },       /* 4-way set assoc, sectored cache, 64 byte line size */
  82        { 0x68, LVL_1_DATA, 32 },       /* 4-way set assoc, sectored cache, 64 byte line size */
  83        { 0x70, LVL_TRACE,  12 },       /* 8-way set assoc */
  84        { 0x71, LVL_TRACE,  16 },       /* 8-way set assoc */
  85        { 0x72, LVL_TRACE,  32 },       /* 8-way set assoc */
  86        { 0x73, LVL_TRACE,  64 },       /* 8-way set assoc */
  87        { 0x78, LVL_2,      MB(1) },    /* 4-way set assoc, 64 byte line size */
  88        { 0x79, LVL_2,      128 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  89        { 0x7a, LVL_2,      256 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  90        { 0x7b, LVL_2,      512 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  91        { 0x7c, LVL_2,      MB(1) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  92        { 0x7d, LVL_2,      MB(2) },    /* 8-way set assoc, 64 byte line size */
  93        { 0x7f, LVL_2,      512 },      /* 2-way set assoc, 64 byte line size */
  94        { 0x80, LVL_2,      512 },      /* 8-way set assoc, 64 byte line size */
  95        { 0x82, LVL_2,      256 },      /* 8-way set assoc, 32 byte line size */
  96        { 0x83, LVL_2,      512 },      /* 8-way set assoc, 32 byte line size */
  97        { 0x84, LVL_2,      MB(1) },    /* 8-way set assoc, 32 byte line size */
  98        { 0x85, LVL_2,      MB(2) },    /* 8-way set assoc, 32 byte line size */
  99        { 0x86, LVL_2,      512 },      /* 4-way set assoc, 64 byte line size */
 100        { 0x87, LVL_2,      MB(1) },    /* 8-way set assoc, 64 byte line size */
 101        { 0xd0, LVL_3,      512 },      /* 4-way set assoc, 64 byte line size */
 102        { 0xd1, LVL_3,      MB(1) },    /* 4-way set assoc, 64 byte line size */
 103        { 0xd2, LVL_3,      MB(2) },    /* 4-way set assoc, 64 byte line size */
 104        { 0xd6, LVL_3,      MB(1) },    /* 8-way set assoc, 64 byte line size */
 105        { 0xd7, LVL_3,      MB(2) },    /* 8-way set assoc, 64 byte line size */
 106        { 0xd8, LVL_3,      MB(4) },    /* 12-way set assoc, 64 byte line size */
 107        { 0xdc, LVL_3,      MB(2) },    /* 12-way set assoc, 64 byte line size */
 108        { 0xdd, LVL_3,      MB(4) },    /* 12-way set assoc, 64 byte line size */
 109        { 0xde, LVL_3,      MB(8) },    /* 12-way set assoc, 64 byte line size */
 110        { 0xe2, LVL_3,      MB(2) },    /* 16-way set assoc, 64 byte line size */
 111        { 0xe3, LVL_3,      MB(4) },    /* 16-way set assoc, 64 byte line size */
 112        { 0xe4, LVL_3,      MB(8) },    /* 16-way set assoc, 64 byte line size */
 113        { 0xea, LVL_3,      MB(12) },   /* 24-way set assoc, 64 byte line size */
 114        { 0xeb, LVL_3,      MB(18) },   /* 24-way set assoc, 64 byte line size */
 115        { 0xec, LVL_3,      MB(24) },   /* 24-way set assoc, 64 byte line size */
 116        { 0x00, 0, 0}
 117};
 118
 119
 120enum _cache_type {
 121        CTYPE_NULL = 0,
 122        CTYPE_DATA = 1,
 123        CTYPE_INST = 2,
 124        CTYPE_UNIFIED = 3
 125};
 126
 127union _cpuid4_leaf_eax {
 128        struct {
 129                enum _cache_type        type:5;
 130                unsigned int            level:3;
 131                unsigned int            is_self_initializing:1;
 132                unsigned int            is_fully_associative:1;
 133                unsigned int            reserved:4;
 134                unsigned int            num_threads_sharing:12;
 135                unsigned int            num_cores_on_die:6;
 136        } split;
 137        u32 full;
 138};
 139
 140union _cpuid4_leaf_ebx {
 141        struct {
 142                unsigned int            coherency_line_size:12;
 143                unsigned int            physical_line_partition:10;
 144                unsigned int            ways_of_associativity:10;
 145        } split;
 146        u32 full;
 147};
 148
 149union _cpuid4_leaf_ecx {
 150        struct {
 151                unsigned int            number_of_sets:32;
 152        } split;
 153        u32 full;
 154};
 155
 156struct _cpuid4_info_regs {
 157        union _cpuid4_leaf_eax eax;
 158        union _cpuid4_leaf_ebx ebx;
 159        union _cpuid4_leaf_ecx ecx;
 160        unsigned int id;
 161        unsigned long size;
 162        struct amd_northbridge *nb;
 163};
 164
 165static unsigned short num_cache_leaves;
 166
 167/* AMD doesn't have CPUID4. Emulate it here to report the same
 168   information to the user.  This makes some assumptions about the machine:
 169   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
 170
 171   In theory the TLBs could be reported as fake type (they are in "dummy").
 172   Maybe later */
 173union l1_cache {
 174        struct {
 175                unsigned line_size:8;
 176                unsigned lines_per_tag:8;
 177                unsigned assoc:8;
 178                unsigned size_in_kb:8;
 179        };
 180        unsigned val;
 181};
 182
 183union l2_cache {
 184        struct {
 185                unsigned line_size:8;
 186                unsigned lines_per_tag:4;
 187                unsigned assoc:4;
 188                unsigned size_in_kb:16;
 189        };
 190        unsigned val;
 191};
 192
 193union l3_cache {
 194        struct {
 195                unsigned line_size:8;
 196                unsigned lines_per_tag:4;
 197                unsigned assoc:4;
 198                unsigned res:2;
 199                unsigned size_encoded:14;
 200        };
 201        unsigned val;
 202};
 203
 204static const unsigned short assocs[] = {
 205        [1] = 1,
 206        [2] = 2,
 207        [4] = 4,
 208        [6] = 8,
 209        [8] = 16,
 210        [0xa] = 32,
 211        [0xb] = 48,
 212        [0xc] = 64,
 213        [0xd] = 96,
 214        [0xe] = 128,
 215        [0xf] = 0xffff /* fully associative - no way to show this currently */
 216};
 217
 218static const unsigned char levels[] = { 1, 1, 2, 3 };
 219static const unsigned char types[] = { 1, 2, 3, 3 };
 220
 221static const enum cache_type cache_type_map[] = {
 222        [CTYPE_NULL] = CACHE_TYPE_NOCACHE,
 223        [CTYPE_DATA] = CACHE_TYPE_DATA,
 224        [CTYPE_INST] = CACHE_TYPE_INST,
 225        [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
 226};
 227
 228static void
 229amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 230                     union _cpuid4_leaf_ebx *ebx,
 231                     union _cpuid4_leaf_ecx *ecx)
 232{
 233        unsigned dummy;
 234        unsigned line_size, lines_per_tag, assoc, size_in_kb;
 235        union l1_cache l1i, l1d;
 236        union l2_cache l2;
 237        union l3_cache l3;
 238        union l1_cache *l1 = &l1d;
 239
 240        eax->full = 0;
 241        ebx->full = 0;
 242        ecx->full = 0;
 243
 244        cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
 245        cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
 246
 247        switch (leaf) {
 248        case 1:
 249                l1 = &l1i;
 250        case 0:
 251                if (!l1->val)
 252                        return;
 253                assoc = assocs[l1->assoc];
 254                line_size = l1->line_size;
 255                lines_per_tag = l1->lines_per_tag;
 256                size_in_kb = l1->size_in_kb;
 257                break;
 258        case 2:
 259                if (!l2.val)
 260                        return;
 261                assoc = assocs[l2.assoc];
 262                line_size = l2.line_size;
 263                lines_per_tag = l2.lines_per_tag;
 264                /* cpu_data has errata corrections for K7 applied */
 265                size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
 266                break;
 267        case 3:
 268                if (!l3.val)
 269                        return;
 270                assoc = assocs[l3.assoc];
 271                line_size = l3.line_size;
 272                lines_per_tag = l3.lines_per_tag;
 273                size_in_kb = l3.size_encoded * 512;
 274                if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
 275                        size_in_kb = size_in_kb >> 1;
 276                        assoc = assoc >> 1;
 277                }
 278                break;
 279        default:
 280                return;
 281        }
 282
 283        eax->split.is_self_initializing = 1;
 284        eax->split.type = types[leaf];
 285        eax->split.level = levels[leaf];
 286        eax->split.num_threads_sharing = 0;
 287        eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
 288
 289
 290        if (assoc == 0xffff)
 291                eax->split.is_fully_associative = 1;
 292        ebx->split.coherency_line_size = line_size - 1;
 293        ebx->split.ways_of_associativity = assoc - 1;
 294        ebx->split.physical_line_partition = lines_per_tag - 1;
 295        ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
 296                (ebx->split.ways_of_associativity + 1) - 1;
 297}
 298
 299#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
 300
 301/*
 302 * L3 cache descriptors
 303 */
 304static void amd_calc_l3_indices(struct amd_northbridge *nb)
 305{
 306        struct amd_l3_cache *l3 = &nb->l3_cache;
 307        unsigned int sc0, sc1, sc2, sc3;
 308        u32 val = 0;
 309
 310        pci_read_config_dword(nb->misc, 0x1C4, &val);
 311
 312        /* calculate subcache sizes */
 313        l3->subcaches[0] = sc0 = !(val & BIT(0));
 314        l3->subcaches[1] = sc1 = !(val & BIT(4));
 315
 316        if (boot_cpu_data.x86 == 0x15) {
 317                l3->subcaches[0] = sc0 += !(val & BIT(1));
 318                l3->subcaches[1] = sc1 += !(val & BIT(5));
 319        }
 320
 321        l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 322        l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 323
 324        l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 325}
 326
 327/*
 328 * check whether a slot used for disabling an L3 index is occupied.
 329 * @l3: L3 cache descriptor
 330 * @slot: slot number (0..1)
 331 *
 332 * @returns: the disabled index if used or negative value if slot free.
 333 */
 334static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
 335{
 336        unsigned int reg = 0;
 337
 338        pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
 339
 340        /* check whether this slot is activated already */
 341        if (reg & (3UL << 30))
 342                return reg & 0xfff;
 343
 344        return -1;
 345}
 346
 347static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
 348                                  unsigned int slot)
 349{
 350        int index;
 351        struct amd_northbridge *nb = this_leaf->priv;
 352
 353        index = amd_get_l3_disable_slot(nb, slot);
 354        if (index >= 0)
 355                return sprintf(buf, "%d\n", index);
 356
 357        return sprintf(buf, "FREE\n");
 358}
 359
 360#define SHOW_CACHE_DISABLE(slot)                                        \
 361static ssize_t                                                          \
 362cache_disable_##slot##_show(struct device *dev,                         \
 363                            struct device_attribute *attr, char *buf)   \
 364{                                                                       \
 365        struct cacheinfo *this_leaf = dev_get_drvdata(dev);             \
 366        return show_cache_disable(this_leaf, buf, slot);                \
 367}
 368SHOW_CACHE_DISABLE(0)
 369SHOW_CACHE_DISABLE(1)
 370
 371static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
 372                                 unsigned slot, unsigned long idx)
 373{
 374        int i;
 375
 376        idx |= BIT(30);
 377
 378        /*
 379         *  disable index in all 4 subcaches
 380         */
 381        for (i = 0; i < 4; i++) {
 382                u32 reg = idx | (i << 20);
 383
 384                if (!nb->l3_cache.subcaches[i])
 385                        continue;
 386
 387                pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 388
 389                /*
 390                 * We need to WBINVD on a core on the node containing the L3
 391                 * cache which indices we disable therefore a simple wbinvd()
 392                 * is not sufficient.
 393                 */
 394                wbinvd_on_cpu(cpu);
 395
 396                reg |= BIT(31);
 397                pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 398        }
 399}
 400
 401/*
 402 * disable a L3 cache index by using a disable-slot
 403 *
 404 * @l3:    L3 cache descriptor
 405 * @cpu:   A CPU on the node containing the L3 cache
 406 * @slot:  slot number (0..1)
 407 * @index: index to disable
 408 *
 409 * @return: 0 on success, error status on failure
 410 */
 411static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
 412                            unsigned slot, unsigned long index)
 413{
 414        int ret = 0;
 415
 416        /*  check if @slot is already used or the index is already disabled */
 417        ret = amd_get_l3_disable_slot(nb, slot);
 418        if (ret >= 0)
 419                return -EEXIST;
 420
 421        if (index > nb->l3_cache.indices)
 422                return -EINVAL;
 423
 424        /* check whether the other slot has disabled the same index already */
 425        if (index == amd_get_l3_disable_slot(nb, !slot))
 426                return -EEXIST;
 427
 428        amd_l3_disable_index(nb, cpu, slot, index);
 429
 430        return 0;
 431}
 432
 433static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
 434                                   const char *buf, size_t count,
 435                                   unsigned int slot)
 436{
 437        unsigned long val = 0;
 438        int cpu, err = 0;
 439        struct amd_northbridge *nb = this_leaf->priv;
 440
 441        if (!capable(CAP_SYS_ADMIN))
 442                return -EPERM;
 443
 444        cpu = cpumask_first(&this_leaf->shared_cpu_map);
 445
 446        if (kstrtoul(buf, 10, &val) < 0)
 447                return -EINVAL;
 448
 449        err = amd_set_l3_disable_slot(nb, cpu, slot, val);
 450        if (err) {
 451                if (err == -EEXIST)
 452                        pr_warn("L3 slot %d in use/index already disabled!\n",
 453                                   slot);
 454                return err;
 455        }
 456        return count;
 457}
 458
 459#define STORE_CACHE_DISABLE(slot)                                       \
 460static ssize_t                                                          \
 461cache_disable_##slot##_store(struct device *dev,                        \
 462                             struct device_attribute *attr,             \
 463                             const char *buf, size_t count)             \
 464{                                                                       \
 465        struct cacheinfo *this_leaf = dev_get_drvdata(dev);             \
 466        return store_cache_disable(this_leaf, buf, count, slot);        \
 467}
 468STORE_CACHE_DISABLE(0)
 469STORE_CACHE_DISABLE(1)
 470
 471static ssize_t subcaches_show(struct device *dev,
 472                              struct device_attribute *attr, char *buf)
 473{
 474        struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 475        int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 476
 477        return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
 478}
 479
 480static ssize_t subcaches_store(struct device *dev,
 481                               struct device_attribute *attr,
 482                               const char *buf, size_t count)
 483{
 484        struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 485        int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 486        unsigned long val;
 487
 488        if (!capable(CAP_SYS_ADMIN))
 489                return -EPERM;
 490
 491        if (kstrtoul(buf, 16, &val) < 0)
 492                return -EINVAL;
 493
 494        if (amd_set_subcaches(cpu, val))
 495                return -EINVAL;
 496
 497        return count;
 498}
 499
 500static DEVICE_ATTR_RW(cache_disable_0);
 501static DEVICE_ATTR_RW(cache_disable_1);
 502static DEVICE_ATTR_RW(subcaches);
 503
 504static umode_t
 505cache_private_attrs_is_visible(struct kobject *kobj,
 506                               struct attribute *attr, int unused)
 507{
 508        struct device *dev = kobj_to_dev(kobj);
 509        struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 510        umode_t mode = attr->mode;
 511
 512        if (!this_leaf->priv)
 513                return 0;
 514
 515        if ((attr == &dev_attr_subcaches.attr) &&
 516            amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 517                return mode;
 518
 519        if ((attr == &dev_attr_cache_disable_0.attr ||
 520             attr == &dev_attr_cache_disable_1.attr) &&
 521            amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 522                return mode;
 523
 524        return 0;
 525}
 526
 527static struct attribute_group cache_private_group = {
 528        .is_visible = cache_private_attrs_is_visible,
 529};
 530
 531static void init_amd_l3_attrs(void)
 532{
 533        int n = 1;
 534        static struct attribute **amd_l3_attrs;
 535
 536        if (amd_l3_attrs) /* already initialized */
 537                return;
 538
 539        if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 540                n += 2;
 541        if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 542                n += 1;
 543
 544        amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
 545        if (!amd_l3_attrs)
 546                return;
 547
 548        n = 0;
 549        if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
 550                amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
 551                amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
 552        }
 553        if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 554                amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
 555
 556        cache_private_group.attrs = amd_l3_attrs;
 557}
 558
 559const struct attribute_group *
 560cache_get_priv_group(struct cacheinfo *this_leaf)
 561{
 562        struct amd_northbridge *nb = this_leaf->priv;
 563
 564        if (this_leaf->level < 3 || !nb)
 565                return NULL;
 566
 567        if (nb && nb->l3_cache.indices)
 568                init_amd_l3_attrs();
 569
 570        return &cache_private_group;
 571}
 572
 573static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
 574{
 575        int node;
 576
 577        /* only for L3, and not in virtualized environments */
 578        if (index < 3)
 579                return;
 580
 581        node = amd_get_nb_id(smp_processor_id());
 582        this_leaf->nb = node_to_amd_nb(node);
 583        if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
 584                amd_calc_l3_indices(this_leaf->nb);
 585}
 586#else
 587#define amd_init_l3_cache(x, y)
 588#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
 589
 590static int
 591cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
 592{
 593        union _cpuid4_leaf_eax  eax;
 594        union _cpuid4_leaf_ebx  ebx;
 595        union _cpuid4_leaf_ecx  ecx;
 596        unsigned                edx;
 597
 598        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 599                if (boot_cpu_has(X86_FEATURE_TOPOEXT))
 600                        cpuid_count(0x8000001d, index, &eax.full,
 601                                    &ebx.full, &ecx.full, &edx);
 602                else
 603                        amd_cpuid4(index, &eax, &ebx, &ecx);
 604                amd_init_l3_cache(this_leaf, index);
 605        } else {
 606                cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 607        }
 608
 609        if (eax.split.type == CTYPE_NULL)
 610                return -EIO; /* better error ? */
 611
 612        this_leaf->eax = eax;
 613        this_leaf->ebx = ebx;
 614        this_leaf->ecx = ecx;
 615        this_leaf->size = (ecx.split.number_of_sets          + 1) *
 616                          (ebx.split.coherency_line_size     + 1) *
 617                          (ebx.split.physical_line_partition + 1) *
 618                          (ebx.split.ways_of_associativity   + 1);
 619        return 0;
 620}
 621
 622static int find_num_cache_leaves(struct cpuinfo_x86 *c)
 623{
 624        unsigned int            eax, ebx, ecx, edx, op;
 625        union _cpuid4_leaf_eax  cache_eax;
 626        int                     i = -1;
 627
 628        if (c->x86_vendor == X86_VENDOR_AMD)
 629                op = 0x8000001d;
 630        else
 631                op = 4;
 632
 633        do {
 634                ++i;
 635                /* Do cpuid(op) loop to find out num_cache_leaves */
 636                cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
 637                cache_eax.full = eax;
 638        } while (cache_eax.split.type != CTYPE_NULL);
 639        return i;
 640}
 641
 642void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
 643{
 644        /*
 645         * We may have multiple LLCs if L3 caches exist, so check if we
 646         * have an L3 cache by looking at the L3 cache CPUID leaf.
 647         */
 648        if (!cpuid_edx(0x80000006))
 649                return;
 650
 651        if (c->x86 < 0x17) {
 652                /* LLC is at the node level. */
 653                per_cpu(cpu_llc_id, cpu) = node_id;
 654        } else if (c->x86 == 0x17 &&
 655                   c->x86_model >= 0 && c->x86_model <= 0x1F) {
 656                /*
 657                 * LLC is at the core complex level.
 658                 * Core complex ID is ApicId[3] for these processors.
 659                 */
 660                per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
 661        } else {
 662                /*
 663                 * LLC ID is calculated from the number of threads sharing the
 664                 * cache.
 665                 * */
 666                u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
 667                u32 llc_index = find_num_cache_leaves(c) - 1;
 668
 669                cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
 670                if (eax)
 671                        num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
 672
 673                if (num_sharing_cache) {
 674                        int bits = get_count_order(num_sharing_cache);
 675
 676                        per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
 677                }
 678        }
 679}
 680
 681void init_amd_cacheinfo(struct cpuinfo_x86 *c)
 682{
 683
 684        if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 685                num_cache_leaves = find_num_cache_leaves(c);
 686        } else if (c->extended_cpuid_level >= 0x80000006) {
 687                if (cpuid_edx(0x80000006) & 0xf000)
 688                        num_cache_leaves = 4;
 689                else
 690                        num_cache_leaves = 3;
 691        }
 692}
 693
 694void init_intel_cacheinfo(struct cpuinfo_x86 *c)
 695{
 696        /* Cache sizes */
 697        unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
 698        unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 699        unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 700        unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
 701#ifdef CONFIG_SMP
 702        unsigned int cpu = c->cpu_index;
 703#endif
 704
 705        if (c->cpuid_level > 3) {
 706                static int is_initialized;
 707
 708                if (is_initialized == 0) {
 709                        /* Init num_cache_leaves from boot CPU */
 710                        num_cache_leaves = find_num_cache_leaves(c);
 711                        is_initialized++;
 712                }
 713
 714                /*
 715                 * Whenever possible use cpuid(4), deterministic cache
 716                 * parameters cpuid leaf to find the cache details
 717                 */
 718                for (i = 0; i < num_cache_leaves; i++) {
 719                        struct _cpuid4_info_regs this_leaf = {};
 720                        int retval;
 721
 722                        retval = cpuid4_cache_lookup_regs(i, &this_leaf);
 723                        if (retval < 0)
 724                                continue;
 725
 726                        switch (this_leaf.eax.split.level) {
 727                        case 1:
 728                                if (this_leaf.eax.split.type == CTYPE_DATA)
 729                                        new_l1d = this_leaf.size/1024;
 730                                else if (this_leaf.eax.split.type == CTYPE_INST)
 731                                        new_l1i = this_leaf.size/1024;
 732                                break;
 733                        case 2:
 734                                new_l2 = this_leaf.size/1024;
 735                                num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 736                                index_msb = get_count_order(num_threads_sharing);
 737                                l2_id = c->apicid & ~((1 << index_msb) - 1);
 738                                break;
 739                        case 3:
 740                                new_l3 = this_leaf.size/1024;
 741                                num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 742                                index_msb = get_count_order(num_threads_sharing);
 743                                l3_id = c->apicid & ~((1 << index_msb) - 1);
 744                                break;
 745                        default:
 746                                break;
 747                        }
 748                }
 749        }
 750        /*
 751         * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 752         * trace cache
 753         */
 754        if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
 755                /* supports eax=2  call */
 756                int j, n;
 757                unsigned int regs[4];
 758                unsigned char *dp = (unsigned char *)regs;
 759                int only_trace = 0;
 760
 761                if (num_cache_leaves != 0 && c->x86 == 15)
 762                        only_trace = 1;
 763
 764                /* Number of times to iterate */
 765                n = cpuid_eax(2) & 0xFF;
 766
 767                for (i = 0 ; i < n ; i++) {
 768                        cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
 769
 770                        /* If bit 31 is set, this is an unknown format */
 771                        for (j = 0 ; j < 3 ; j++)
 772                                if (regs[j] & (1 << 31))
 773                                        regs[j] = 0;
 774
 775                        /* Byte 0 is level count, not a descriptor */
 776                        for (j = 1 ; j < 16 ; j++) {
 777                                unsigned char des = dp[j];
 778                                unsigned char k = 0;
 779
 780                                /* look up this descriptor in the table */
 781                                while (cache_table[k].descriptor != 0) {
 782                                        if (cache_table[k].descriptor == des) {
 783                                                if (only_trace && cache_table[k].cache_type != LVL_TRACE)
 784                                                        break;
 785                                                switch (cache_table[k].cache_type) {
 786                                                case LVL_1_INST:
 787                                                        l1i += cache_table[k].size;
 788                                                        break;
 789                                                case LVL_1_DATA:
 790                                                        l1d += cache_table[k].size;
 791                                                        break;
 792                                                case LVL_2:
 793                                                        l2 += cache_table[k].size;
 794                                                        break;
 795                                                case LVL_3:
 796                                                        l3 += cache_table[k].size;
 797                                                        break;
 798                                                case LVL_TRACE:
 799                                                        trace += cache_table[k].size;
 800                                                        break;
 801                                                }
 802
 803                                                break;
 804                                        }
 805
 806                                        k++;
 807                                }
 808                        }
 809                }
 810        }
 811
 812        if (new_l1d)
 813                l1d = new_l1d;
 814
 815        if (new_l1i)
 816                l1i = new_l1i;
 817
 818        if (new_l2) {
 819                l2 = new_l2;
 820#ifdef CONFIG_SMP
 821                per_cpu(cpu_llc_id, cpu) = l2_id;
 822#endif
 823        }
 824
 825        if (new_l3) {
 826                l3 = new_l3;
 827#ifdef CONFIG_SMP
 828                per_cpu(cpu_llc_id, cpu) = l3_id;
 829#endif
 830        }
 831
 832#ifdef CONFIG_SMP
 833        /*
 834         * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
 835         * turns means that the only possibility is SMT (as indicated in
 836         * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
 837         * that SMT shares all caches, we can unconditionally set cpu_llc_id to
 838         * c->phys_proc_id.
 839         */
 840        if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
 841                per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
 842#endif
 843
 844        c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 845
 846        if (!l2)
 847                cpu_detect_cache_sizes(c);
 848}
 849
 850static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
 851                                    struct _cpuid4_info_regs *base)
 852{
 853        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 854        struct cacheinfo *this_leaf;
 855        int i, sibling;
 856
 857        /*
 858         * For L3, always use the pre-calculated cpu_llc_shared_mask
 859         * to derive shared_cpu_map.
 860         */
 861        if (index == 3) {
 862                for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
 863                        this_cpu_ci = get_cpu_cacheinfo(i);
 864                        if (!this_cpu_ci->info_list)
 865                                continue;
 866                        this_leaf = this_cpu_ci->info_list + index;
 867                        for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
 868                                if (!cpu_online(sibling))
 869                                        continue;
 870                                cpumask_set_cpu(sibling,
 871                                                &this_leaf->shared_cpu_map);
 872                        }
 873                }
 874        } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 875                unsigned int apicid, nshared, first, last;
 876
 877                nshared = base->eax.split.num_threads_sharing + 1;
 878                apicid = cpu_data(cpu).apicid;
 879                first = apicid - (apicid % nshared);
 880                last = first + nshared - 1;
 881
 882                for_each_online_cpu(i) {
 883                        this_cpu_ci = get_cpu_cacheinfo(i);
 884                        if (!this_cpu_ci->info_list)
 885                                continue;
 886
 887                        apicid = cpu_data(i).apicid;
 888                        if ((apicid < first) || (apicid > last))
 889                                continue;
 890
 891                        this_leaf = this_cpu_ci->info_list + index;
 892
 893                        for_each_online_cpu(sibling) {
 894                                apicid = cpu_data(sibling).apicid;
 895                                if ((apicid < first) || (apicid > last))
 896                                        continue;
 897                                cpumask_set_cpu(sibling,
 898                                                &this_leaf->shared_cpu_map);
 899                        }
 900                }
 901        } else
 902                return 0;
 903
 904        return 1;
 905}
 906
 907static void __cache_cpumap_setup(unsigned int cpu, int index,
 908                                 struct _cpuid4_info_regs *base)
 909{
 910        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 911        struct cacheinfo *this_leaf, *sibling_leaf;
 912        unsigned long num_threads_sharing;
 913        int index_msb, i;
 914        struct cpuinfo_x86 *c = &cpu_data(cpu);
 915
 916        if (c->x86_vendor == X86_VENDOR_AMD) {
 917                if (__cache_amd_cpumap_setup(cpu, index, base))
 918                        return;
 919        }
 920
 921        this_leaf = this_cpu_ci->info_list + index;
 922        num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
 923
 924        cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
 925        if (num_threads_sharing == 1)
 926                return;
 927
 928        index_msb = get_count_order(num_threads_sharing);
 929
 930        for_each_online_cpu(i)
 931                if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
 932                        struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
 933
 934                        if (i == cpu || !sib_cpu_ci->info_list)
 935                                continue;/* skip if itself or no cacheinfo */
 936                        sibling_leaf = sib_cpu_ci->info_list + index;
 937                        cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
 938                        cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
 939                }
 940}
 941
 942static void ci_leaf_init(struct cacheinfo *this_leaf,
 943                         struct _cpuid4_info_regs *base)
 944{
 945        this_leaf->id = base->id;
 946        this_leaf->attributes = CACHE_ID;
 947        this_leaf->level = base->eax.split.level;
 948        this_leaf->type = cache_type_map[base->eax.split.type];
 949        this_leaf->coherency_line_size =
 950                                base->ebx.split.coherency_line_size + 1;
 951        this_leaf->ways_of_associativity =
 952                                base->ebx.split.ways_of_associativity + 1;
 953        this_leaf->size = base->size;
 954        this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
 955        this_leaf->physical_line_partition =
 956                                base->ebx.split.physical_line_partition + 1;
 957        this_leaf->priv = base->nb;
 958}
 959
 960static int __init_cache_level(unsigned int cpu)
 961{
 962        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 963
 964        if (!num_cache_leaves)
 965                return -ENOENT;
 966        if (!this_cpu_ci)
 967                return -EINVAL;
 968        this_cpu_ci->num_levels = 3;
 969        this_cpu_ci->num_leaves = num_cache_leaves;
 970        return 0;
 971}
 972
 973/*
 974 * The max shared threads number comes from CPUID.4:EAX[25-14] with input
 975 * ECX as cache index. Then right shift apicid by the number's order to get
 976 * cache id for this cache node.
 977 */
 978static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
 979{
 980        struct cpuinfo_x86 *c = &cpu_data(cpu);
 981        unsigned long num_threads_sharing;
 982        int index_msb;
 983
 984        num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
 985        index_msb = get_count_order(num_threads_sharing);
 986        id4_regs->id = c->apicid >> index_msb;
 987}
 988
 989static int __populate_cache_leaves(unsigned int cpu)
 990{
 991        unsigned int idx, ret;
 992        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 993        struct cacheinfo *this_leaf = this_cpu_ci->info_list;
 994        struct _cpuid4_info_regs id4_regs = {};
 995
 996        for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
 997                ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
 998                if (ret)
 999                        return ret;
1000                get_cache_id(cpu, &id4_regs);
1001                ci_leaf_init(this_leaf++, &id4_regs);
1002                __cache_cpumap_setup(cpu, idx, &id4_regs);
1003        }
1004        this_cpu_ci->cpu_map_populated = true;
1005
1006        return 0;
1007}
1008
1009DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
1010DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
1011