linux/arch/x86/kernel/cpu/intel_cacheinfo.c
<<
>>
Prefs
   1/*
   2 *      Routines to identify caches on Intel CPU.
   3 *
   4 *      Changes:
   5 *      Venkatesh Pallipadi     : Adding cache identification through cpuid(4)
   6 *      Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
   7 *      Andi Kleen / Andreas Herrmann   : CPUID4 emulation on AMD.
   8 */
   9
  10#include <linux/slab.h>
  11#include <linux/cacheinfo.h>
  12#include <linux/cpu.h>
  13#include <linux/sched.h>
  14#include <linux/sysfs.h>
  15#include <linux/pci.h>
  16
  17#include <asm/cpufeature.h>
  18#include <asm/amd_nb.h>
  19#include <asm/smp.h>
  20
  21#define LVL_1_INST      1
  22#define LVL_1_DATA      2
  23#define LVL_2           3
  24#define LVL_3           4
  25#define LVL_TRACE       5
  26
  27struct _cache_table {
  28        unsigned char descriptor;
  29        char cache_type;
  30        short size;
  31};
  32
  33#define MB(x)   ((x) * 1024)
  34
  35/* All the cache descriptor types we care about (no TLB or
  36   trace cache entries) */
  37
  38static const struct _cache_table cache_table[] =
  39{
  40        { 0x06, LVL_1_INST, 8 },        /* 4-way set assoc, 32 byte line size */
  41        { 0x08, LVL_1_INST, 16 },       /* 4-way set assoc, 32 byte line size */
  42        { 0x09, LVL_1_INST, 32 },       /* 4-way set assoc, 64 byte line size */
  43        { 0x0a, LVL_1_DATA, 8 },        /* 2 way set assoc, 32 byte line size */
  44        { 0x0c, LVL_1_DATA, 16 },       /* 4-way set assoc, 32 byte line size */
  45        { 0x0d, LVL_1_DATA, 16 },       /* 4-way set assoc, 64 byte line size */
  46        { 0x0e, LVL_1_DATA, 24 },       /* 6-way set assoc, 64 byte line size */
  47        { 0x21, LVL_2,      256 },      /* 8-way set assoc, 64 byte line size */
  48        { 0x22, LVL_3,      512 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  49        { 0x23, LVL_3,      MB(1) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  50        { 0x25, LVL_3,      MB(2) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  51        { 0x29, LVL_3,      MB(4) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  52        { 0x2c, LVL_1_DATA, 32 },       /* 8-way set assoc, 64 byte line size */
  53        { 0x30, LVL_1_INST, 32 },       /* 8-way set assoc, 64 byte line size */
  54        { 0x39, LVL_2,      128 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  55        { 0x3a, LVL_2,      192 },      /* 6-way set assoc, sectored cache, 64 byte line size */
  56        { 0x3b, LVL_2,      128 },      /* 2-way set assoc, sectored cache, 64 byte line size */
  57        { 0x3c, LVL_2,      256 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  58        { 0x3d, LVL_2,      384 },      /* 6-way set assoc, sectored cache, 64 byte line size */
  59        { 0x3e, LVL_2,      512 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  60        { 0x3f, LVL_2,      256 },      /* 2-way set assoc, 64 byte line size */
  61        { 0x41, LVL_2,      128 },      /* 4-way set assoc, 32 byte line size */
  62        { 0x42, LVL_2,      256 },      /* 4-way set assoc, 32 byte line size */
  63        { 0x43, LVL_2,      512 },      /* 4-way set assoc, 32 byte line size */
  64        { 0x44, LVL_2,      MB(1) },    /* 4-way set assoc, 32 byte line size */
  65        { 0x45, LVL_2,      MB(2) },    /* 4-way set assoc, 32 byte line size */
  66        { 0x46, LVL_3,      MB(4) },    /* 4-way set assoc, 64 byte line size */
  67        { 0x47, LVL_3,      MB(8) },    /* 8-way set assoc, 64 byte line size */
  68        { 0x48, LVL_2,      MB(3) },    /* 12-way set assoc, 64 byte line size */
  69        { 0x49, LVL_3,      MB(4) },    /* 16-way set assoc, 64 byte line size */
  70        { 0x4a, LVL_3,      MB(6) },    /* 12-way set assoc, 64 byte line size */
  71        { 0x4b, LVL_3,      MB(8) },    /* 16-way set assoc, 64 byte line size */
  72        { 0x4c, LVL_3,      MB(12) },   /* 12-way set assoc, 64 byte line size */
  73        { 0x4d, LVL_3,      MB(16) },   /* 16-way set assoc, 64 byte line size */
  74        { 0x4e, LVL_2,      MB(6) },    /* 24-way set assoc, 64 byte line size */
  75        { 0x60, LVL_1_DATA, 16 },       /* 8-way set assoc, sectored cache, 64 byte line size */
  76        { 0x66, LVL_1_DATA, 8 },        /* 4-way set assoc, sectored cache, 64 byte line size */
  77        { 0x67, LVL_1_DATA, 16 },       /* 4-way set assoc, sectored cache, 64 byte line size */
  78        { 0x68, LVL_1_DATA, 32 },       /* 4-way set assoc, sectored cache, 64 byte line size */
  79        { 0x70, LVL_TRACE,  12 },       /* 8-way set assoc */
  80        { 0x71, LVL_TRACE,  16 },       /* 8-way set assoc */
  81        { 0x72, LVL_TRACE,  32 },       /* 8-way set assoc */
  82        { 0x73, LVL_TRACE,  64 },       /* 8-way set assoc */
  83        { 0x78, LVL_2,      MB(1) },    /* 4-way set assoc, 64 byte line size */
  84        { 0x79, LVL_2,      128 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  85        { 0x7a, LVL_2,      256 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  86        { 0x7b, LVL_2,      512 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  87        { 0x7c, LVL_2,      MB(1) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  88        { 0x7d, LVL_2,      MB(2) },    /* 8-way set assoc, 64 byte line size */
  89        { 0x7f, LVL_2,      512 },      /* 2-way set assoc, 64 byte line size */
  90        { 0x80, LVL_2,      512 },      /* 8-way set assoc, 64 byte line size */
  91        { 0x82, LVL_2,      256 },      /* 8-way set assoc, 32 byte line size */
  92        { 0x83, LVL_2,      512 },      /* 8-way set assoc, 32 byte line size */
  93        { 0x84, LVL_2,      MB(1) },    /* 8-way set assoc, 32 byte line size */
  94        { 0x85, LVL_2,      MB(2) },    /* 8-way set assoc, 32 byte line size */
  95        { 0x86, LVL_2,      512 },      /* 4-way set assoc, 64 byte line size */
  96        { 0x87, LVL_2,      MB(1) },    /* 8-way set assoc, 64 byte line size */
  97        { 0xd0, LVL_3,      512 },      /* 4-way set assoc, 64 byte line size */
  98        { 0xd1, LVL_3,      MB(1) },    /* 4-way set assoc, 64 byte line size */
  99        { 0xd2, LVL_3,      MB(2) },    /* 4-way set assoc, 64 byte line size */
 100        { 0xd6, LVL_3,      MB(1) },    /* 8-way set assoc, 64 byte line size */
 101        { 0xd7, LVL_3,      MB(2) },    /* 8-way set assoc, 64 byte line size */
 102        { 0xd8, LVL_3,      MB(4) },    /* 12-way set assoc, 64 byte line size */
 103        { 0xdc, LVL_3,      MB(2) },    /* 12-way set assoc, 64 byte line size */
 104        { 0xdd, LVL_3,      MB(4) },    /* 12-way set assoc, 64 byte line size */
 105        { 0xde, LVL_3,      MB(8) },    /* 12-way set assoc, 64 byte line size */
 106        { 0xe2, LVL_3,      MB(2) },    /* 16-way set assoc, 64 byte line size */
 107        { 0xe3, LVL_3,      MB(4) },    /* 16-way set assoc, 64 byte line size */
 108        { 0xe4, LVL_3,      MB(8) },    /* 16-way set assoc, 64 byte line size */
 109        { 0xea, LVL_3,      MB(12) },   /* 24-way set assoc, 64 byte line size */
 110        { 0xeb, LVL_3,      MB(18) },   /* 24-way set assoc, 64 byte line size */
 111        { 0xec, LVL_3,      MB(24) },   /* 24-way set assoc, 64 byte line size */
 112        { 0x00, 0, 0}
 113};
 114
 115
 116enum _cache_type {
 117        CTYPE_NULL = 0,
 118        CTYPE_DATA = 1,
 119        CTYPE_INST = 2,
 120        CTYPE_UNIFIED = 3
 121};
 122
 123union _cpuid4_leaf_eax {
 124        struct {
 125                enum _cache_type        type:5;
 126                unsigned int            level:3;
 127                unsigned int            is_self_initializing:1;
 128                unsigned int            is_fully_associative:1;
 129                unsigned int            reserved:4;
 130                unsigned int            num_threads_sharing:12;
 131                unsigned int            num_cores_on_die:6;
 132        } split;
 133        u32 full;
 134};
 135
 136union _cpuid4_leaf_ebx {
 137        struct {
 138                unsigned int            coherency_line_size:12;
 139                unsigned int            physical_line_partition:10;
 140                unsigned int            ways_of_associativity:10;
 141        } split;
 142        u32 full;
 143};
 144
 145union _cpuid4_leaf_ecx {
 146        struct {
 147                unsigned int            number_of_sets:32;
 148        } split;
 149        u32 full;
 150};
 151
 152struct _cpuid4_info_regs {
 153        union _cpuid4_leaf_eax eax;
 154        union _cpuid4_leaf_ebx ebx;
 155        union _cpuid4_leaf_ecx ecx;
 156        unsigned long size;
 157        struct amd_northbridge *nb;
 158};
 159
 160static unsigned short num_cache_leaves;
 161
 162/* AMD doesn't have CPUID4. Emulate it here to report the same
 163   information to the user.  This makes some assumptions about the machine:
 164   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
 165
 166   In theory the TLBs could be reported as fake type (they are in "dummy").
 167   Maybe later */
 168union l1_cache {
 169        struct {
 170                unsigned line_size:8;
 171                unsigned lines_per_tag:8;
 172                unsigned assoc:8;
 173                unsigned size_in_kb:8;
 174        };
 175        unsigned val;
 176};
 177
 178union l2_cache {
 179        struct {
 180                unsigned line_size:8;
 181                unsigned lines_per_tag:4;
 182                unsigned assoc:4;
 183                unsigned size_in_kb:16;
 184        };
 185        unsigned val;
 186};
 187
 188union l3_cache {
 189        struct {
 190                unsigned line_size:8;
 191                unsigned lines_per_tag:4;
 192                unsigned assoc:4;
 193                unsigned res:2;
 194                unsigned size_encoded:14;
 195        };
 196        unsigned val;
 197};
 198
 199static const unsigned short assocs[] = {
 200        [1] = 1,
 201        [2] = 2,
 202        [4] = 4,
 203        [6] = 8,
 204        [8] = 16,
 205        [0xa] = 32,
 206        [0xb] = 48,
 207        [0xc] = 64,
 208        [0xd] = 96,
 209        [0xe] = 128,
 210        [0xf] = 0xffff /* fully associative - no way to show this currently */
 211};
 212
 213static const unsigned char levels[] = { 1, 1, 2, 3 };
 214static const unsigned char types[] = { 1, 2, 3, 3 };
 215
 216static const enum cache_type cache_type_map[] = {
 217        [CTYPE_NULL] = CACHE_TYPE_NOCACHE,
 218        [CTYPE_DATA] = CACHE_TYPE_DATA,
 219        [CTYPE_INST] = CACHE_TYPE_INST,
 220        [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
 221};
 222
 223static void
 224amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 225                     union _cpuid4_leaf_ebx *ebx,
 226                     union _cpuid4_leaf_ecx *ecx)
 227{
 228        unsigned dummy;
 229        unsigned line_size, lines_per_tag, assoc, size_in_kb;
 230        union l1_cache l1i, l1d;
 231        union l2_cache l2;
 232        union l3_cache l3;
 233        union l1_cache *l1 = &l1d;
 234
 235        eax->full = 0;
 236        ebx->full = 0;
 237        ecx->full = 0;
 238
 239        cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
 240        cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
 241
 242        switch (leaf) {
 243        case 1:
 244                l1 = &l1i;
 245        case 0:
 246                if (!l1->val)
 247                        return;
 248                assoc = assocs[l1->assoc];
 249                line_size = l1->line_size;
 250                lines_per_tag = l1->lines_per_tag;
 251                size_in_kb = l1->size_in_kb;
 252                break;
 253        case 2:
 254                if (!l2.val)
 255                        return;
 256                assoc = assocs[l2.assoc];
 257                line_size = l2.line_size;
 258                lines_per_tag = l2.lines_per_tag;
 259                /* cpu_data has errata corrections for K7 applied */
 260                size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
 261                break;
 262        case 3:
 263                if (!l3.val)
 264                        return;
 265                assoc = assocs[l3.assoc];
 266                line_size = l3.line_size;
 267                lines_per_tag = l3.lines_per_tag;
 268                size_in_kb = l3.size_encoded * 512;
 269                if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
 270                        size_in_kb = size_in_kb >> 1;
 271                        assoc = assoc >> 1;
 272                }
 273                break;
 274        default:
 275                return;
 276        }
 277
 278        eax->split.is_self_initializing = 1;
 279        eax->split.type = types[leaf];
 280        eax->split.level = levels[leaf];
 281        eax->split.num_threads_sharing = 0;
 282        eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
 283
 284
 285        if (assoc == 0xffff)
 286                eax->split.is_fully_associative = 1;
 287        ebx->split.coherency_line_size = line_size - 1;
 288        ebx->split.ways_of_associativity = assoc - 1;
 289        ebx->split.physical_line_partition = lines_per_tag - 1;
 290        ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
 291                (ebx->split.ways_of_associativity + 1) - 1;
 292}
 293
 294#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
 295
 296/*
 297 * L3 cache descriptors
 298 */
 299static void amd_calc_l3_indices(struct amd_northbridge *nb)
 300{
 301        struct amd_l3_cache *l3 = &nb->l3_cache;
 302        unsigned int sc0, sc1, sc2, sc3;
 303        u32 val = 0;
 304
 305        pci_read_config_dword(nb->misc, 0x1C4, &val);
 306
 307        /* calculate subcache sizes */
 308        l3->subcaches[0] = sc0 = !(val & BIT(0));
 309        l3->subcaches[1] = sc1 = !(val & BIT(4));
 310
 311        if (boot_cpu_data.x86 == 0x15) {
 312                l3->subcaches[0] = sc0 += !(val & BIT(1));
 313                l3->subcaches[1] = sc1 += !(val & BIT(5));
 314        }
 315
 316        l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 317        l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 318
 319        l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 320}
 321
 322/*
 323 * check whether a slot used for disabling an L3 index is occupied.
 324 * @l3: L3 cache descriptor
 325 * @slot: slot number (0..1)
 326 *
 327 * @returns: the disabled index if used or negative value if slot free.
 328 */
 329static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
 330{
 331        unsigned int reg = 0;
 332
 333        pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
 334
 335        /* check whether this slot is activated already */
 336        if (reg & (3UL << 30))
 337                return reg & 0xfff;
 338
 339        return -1;
 340}
 341
 342static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
 343                                  unsigned int slot)
 344{
 345        int index;
 346        struct amd_northbridge *nb = this_leaf->priv;
 347
 348        index = amd_get_l3_disable_slot(nb, slot);
 349        if (index >= 0)
 350                return sprintf(buf, "%d\n", index);
 351
 352        return sprintf(buf, "FREE\n");
 353}
 354
 355#define SHOW_CACHE_DISABLE(slot)                                        \
 356static ssize_t                                                          \
 357cache_disable_##slot##_show(struct device *dev,                         \
 358                            struct device_attribute *attr, char *buf)   \
 359{                                                                       \
 360        struct cacheinfo *this_leaf = dev_get_drvdata(dev);             \
 361        return show_cache_disable(this_leaf, buf, slot);                \
 362}
 363SHOW_CACHE_DISABLE(0)
 364SHOW_CACHE_DISABLE(1)
 365
 366static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
 367                                 unsigned slot, unsigned long idx)
 368{
 369        int i;
 370
 371        idx |= BIT(30);
 372
 373        /*
 374         *  disable index in all 4 subcaches
 375         */
 376        for (i = 0; i < 4; i++) {
 377                u32 reg = idx | (i << 20);
 378
 379                if (!nb->l3_cache.subcaches[i])
 380                        continue;
 381
 382                pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 383
 384                /*
 385                 * We need to WBINVD on a core on the node containing the L3
 386                 * cache which indices we disable therefore a simple wbinvd()
 387                 * is not sufficient.
 388                 */
 389                wbinvd_on_cpu(cpu);
 390
 391                reg |= BIT(31);
 392                pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 393        }
 394}
 395
 396/*
 397 * disable a L3 cache index by using a disable-slot
 398 *
 399 * @l3:    L3 cache descriptor
 400 * @cpu:   A CPU on the node containing the L3 cache
 401 * @slot:  slot number (0..1)
 402 * @index: index to disable
 403 *
 404 * @return: 0 on success, error status on failure
 405 */
 406static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
 407                            unsigned slot, unsigned long index)
 408{
 409        int ret = 0;
 410
 411        /*  check if @slot is already used or the index is already disabled */
 412        ret = amd_get_l3_disable_slot(nb, slot);
 413        if (ret >= 0)
 414                return -EEXIST;
 415
 416        if (index > nb->l3_cache.indices)
 417                return -EINVAL;
 418
 419        /* check whether the other slot has disabled the same index already */
 420        if (index == amd_get_l3_disable_slot(nb, !slot))
 421                return -EEXIST;
 422
 423        amd_l3_disable_index(nb, cpu, slot, index);
 424
 425        return 0;
 426}
 427
 428static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
 429                                   const char *buf, size_t count,
 430                                   unsigned int slot)
 431{
 432        unsigned long val = 0;
 433        int cpu, err = 0;
 434        struct amd_northbridge *nb = this_leaf->priv;
 435
 436        if (!capable(CAP_SYS_ADMIN))
 437                return -EPERM;
 438
 439        cpu = cpumask_first(&this_leaf->shared_cpu_map);
 440
 441        if (kstrtoul(buf, 10, &val) < 0)
 442                return -EINVAL;
 443
 444        err = amd_set_l3_disable_slot(nb, cpu, slot, val);
 445        if (err) {
 446                if (err == -EEXIST)
 447                        pr_warn("L3 slot %d in use/index already disabled!\n",
 448                                   slot);
 449                return err;
 450        }
 451        return count;
 452}
 453
 454#define STORE_CACHE_DISABLE(slot)                                       \
 455static ssize_t                                                          \
 456cache_disable_##slot##_store(struct device *dev,                        \
 457                             struct device_attribute *attr,             \
 458                             const char *buf, size_t count)             \
 459{                                                                       \
 460        struct cacheinfo *this_leaf = dev_get_drvdata(dev);             \
 461        return store_cache_disable(this_leaf, buf, count, slot);        \
 462}
 463STORE_CACHE_DISABLE(0)
 464STORE_CACHE_DISABLE(1)
 465
 466static ssize_t subcaches_show(struct device *dev,
 467                              struct device_attribute *attr, char *buf)
 468{
 469        struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 470        int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 471
 472        return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
 473}
 474
 475static ssize_t subcaches_store(struct device *dev,
 476                               struct device_attribute *attr,
 477                               const char *buf, size_t count)
 478{
 479        struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 480        int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 481        unsigned long val;
 482
 483        if (!capable(CAP_SYS_ADMIN))
 484                return -EPERM;
 485
 486        if (kstrtoul(buf, 16, &val) < 0)
 487                return -EINVAL;
 488
 489        if (amd_set_subcaches(cpu, val))
 490                return -EINVAL;
 491
 492        return count;
 493}
 494
 495static DEVICE_ATTR_RW(cache_disable_0);
 496static DEVICE_ATTR_RW(cache_disable_1);
 497static DEVICE_ATTR_RW(subcaches);
 498
 499static umode_t
 500cache_private_attrs_is_visible(struct kobject *kobj,
 501                               struct attribute *attr, int unused)
 502{
 503        struct device *dev = kobj_to_dev(kobj);
 504        struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 505        umode_t mode = attr->mode;
 506
 507        if (!this_leaf->priv)
 508                return 0;
 509
 510        if ((attr == &dev_attr_subcaches.attr) &&
 511            amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 512                return mode;
 513
 514        if ((attr == &dev_attr_cache_disable_0.attr ||
 515             attr == &dev_attr_cache_disable_1.attr) &&
 516            amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 517                return mode;
 518
 519        return 0;
 520}
 521
 522static struct attribute_group cache_private_group = {
 523        .is_visible = cache_private_attrs_is_visible,
 524};
 525
 526static void init_amd_l3_attrs(void)
 527{
 528        int n = 1;
 529        static struct attribute **amd_l3_attrs;
 530
 531        if (amd_l3_attrs) /* already initialized */
 532                return;
 533
 534        if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 535                n += 2;
 536        if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 537                n += 1;
 538
 539        amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
 540        if (!amd_l3_attrs)
 541                return;
 542
 543        n = 0;
 544        if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
 545                amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
 546                amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
 547        }
 548        if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 549                amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
 550
 551        cache_private_group.attrs = amd_l3_attrs;
 552}
 553
 554const struct attribute_group *
 555cache_get_priv_group(struct cacheinfo *this_leaf)
 556{
 557        struct amd_northbridge *nb = this_leaf->priv;
 558
 559        if (this_leaf->level < 3 || !nb)
 560                return NULL;
 561
 562        if (nb && nb->l3_cache.indices)
 563                init_amd_l3_attrs();
 564
 565        return &cache_private_group;
 566}
 567
 568static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
 569{
 570        int node;
 571
 572        /* only for L3, and not in virtualized environments */
 573        if (index < 3)
 574                return;
 575
 576        node = amd_get_nb_id(smp_processor_id());
 577        this_leaf->nb = node_to_amd_nb(node);
 578        if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
 579                amd_calc_l3_indices(this_leaf->nb);
 580}
 581#else
 582#define amd_init_l3_cache(x, y)
 583#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
 584
 585static int
 586cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
 587{
 588        union _cpuid4_leaf_eax  eax;
 589        union _cpuid4_leaf_ebx  ebx;
 590        union _cpuid4_leaf_ecx  ecx;
 591        unsigned                edx;
 592
 593        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 594                if (boot_cpu_has(X86_FEATURE_TOPOEXT))
 595                        cpuid_count(0x8000001d, index, &eax.full,
 596                                    &ebx.full, &ecx.full, &edx);
 597                else
 598                        amd_cpuid4(index, &eax, &ebx, &ecx);
 599                amd_init_l3_cache(this_leaf, index);
 600        } else {
 601                cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 602        }
 603
 604        if (eax.split.type == CTYPE_NULL)
 605                return -EIO; /* better error ? */
 606
 607        this_leaf->eax = eax;
 608        this_leaf->ebx = ebx;
 609        this_leaf->ecx = ecx;
 610        this_leaf->size = (ecx.split.number_of_sets          + 1) *
 611                          (ebx.split.coherency_line_size     + 1) *
 612                          (ebx.split.physical_line_partition + 1) *
 613                          (ebx.split.ways_of_associativity   + 1);
 614        return 0;
 615}
 616
 617static int find_num_cache_leaves(struct cpuinfo_x86 *c)
 618{
 619        unsigned int            eax, ebx, ecx, edx, op;
 620        union _cpuid4_leaf_eax  cache_eax;
 621        int                     i = -1;
 622
 623        if (c->x86_vendor == X86_VENDOR_AMD)
 624                op = 0x8000001d;
 625        else
 626                op = 4;
 627
 628        do {
 629                ++i;
 630                /* Do cpuid(op) loop to find out num_cache_leaves */
 631                cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
 632                cache_eax.full = eax;
 633        } while (cache_eax.split.type != CTYPE_NULL);
 634        return i;
 635}
 636
 637void init_amd_cacheinfo(struct cpuinfo_x86 *c)
 638{
 639
 640        if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 641                num_cache_leaves = find_num_cache_leaves(c);
 642        } else if (c->extended_cpuid_level >= 0x80000006) {
 643                if (cpuid_edx(0x80000006) & 0xf000)
 644                        num_cache_leaves = 4;
 645                else
 646                        num_cache_leaves = 3;
 647        }
 648}
 649
 650unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
 651{
 652        /* Cache sizes */
 653        unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
 654        unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 655        unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 656        unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
 657#ifdef CONFIG_SMP
 658        unsigned int cpu = c->cpu_index;
 659#endif
 660
 661        if (c->cpuid_level > 3) {
 662                static int is_initialized;
 663
 664                if (is_initialized == 0) {
 665                        /* Init num_cache_leaves from boot CPU */
 666                        num_cache_leaves = find_num_cache_leaves(c);
 667                        is_initialized++;
 668                }
 669
 670                /*
 671                 * Whenever possible use cpuid(4), deterministic cache
 672                 * parameters cpuid leaf to find the cache details
 673                 */
 674                for (i = 0; i < num_cache_leaves; i++) {
 675                        struct _cpuid4_info_regs this_leaf = {};
 676                        int retval;
 677
 678                        retval = cpuid4_cache_lookup_regs(i, &this_leaf);
 679                        if (retval < 0)
 680                                continue;
 681
 682                        switch (this_leaf.eax.split.level) {
 683                        case 1:
 684                                if (this_leaf.eax.split.type == CTYPE_DATA)
 685                                        new_l1d = this_leaf.size/1024;
 686                                else if (this_leaf.eax.split.type == CTYPE_INST)
 687                                        new_l1i = this_leaf.size/1024;
 688                                break;
 689                        case 2:
 690                                new_l2 = this_leaf.size/1024;
 691                                num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 692                                index_msb = get_count_order(num_threads_sharing);
 693                                l2_id = c->apicid & ~((1 << index_msb) - 1);
 694                                break;
 695                        case 3:
 696                                new_l3 = this_leaf.size/1024;
 697                                num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 698                                index_msb = get_count_order(num_threads_sharing);
 699                                l3_id = c->apicid & ~((1 << index_msb) - 1);
 700                                break;
 701                        default:
 702                                break;
 703                        }
 704                }
 705        }
 706        /*
 707         * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 708         * trace cache
 709         */
 710        if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
 711                /* supports eax=2  call */
 712                int j, n;
 713                unsigned int regs[4];
 714                unsigned char *dp = (unsigned char *)regs;
 715                int only_trace = 0;
 716
 717                if (num_cache_leaves != 0 && c->x86 == 15)
 718                        only_trace = 1;
 719
 720                /* Number of times to iterate */
 721                n = cpuid_eax(2) & 0xFF;
 722
 723                for (i = 0 ; i < n ; i++) {
 724                        cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
 725
 726                        /* If bit 31 is set, this is an unknown format */
 727                        for (j = 0 ; j < 3 ; j++)
 728                                if (regs[j] & (1 << 31))
 729                                        regs[j] = 0;
 730
 731                        /* Byte 0 is level count, not a descriptor */
 732                        for (j = 1 ; j < 16 ; j++) {
 733                                unsigned char des = dp[j];
 734                                unsigned char k = 0;
 735
 736                                /* look up this descriptor in the table */
 737                                while (cache_table[k].descriptor != 0) {
 738                                        if (cache_table[k].descriptor == des) {
 739                                                if (only_trace && cache_table[k].cache_type != LVL_TRACE)
 740                                                        break;
 741                                                switch (cache_table[k].cache_type) {
 742                                                case LVL_1_INST:
 743                                                        l1i += cache_table[k].size;
 744                                                        break;
 745                                                case LVL_1_DATA:
 746                                                        l1d += cache_table[k].size;
 747                                                        break;
 748                                                case LVL_2:
 749                                                        l2 += cache_table[k].size;
 750                                                        break;
 751                                                case LVL_3:
 752                                                        l3 += cache_table[k].size;
 753                                                        break;
 754                                                case LVL_TRACE:
 755                                                        trace += cache_table[k].size;
 756                                                        break;
 757                                                }
 758
 759                                                break;
 760                                        }
 761
 762                                        k++;
 763                                }
 764                        }
 765                }
 766        }
 767
 768        if (new_l1d)
 769                l1d = new_l1d;
 770
 771        if (new_l1i)
 772                l1i = new_l1i;
 773
 774        if (new_l2) {
 775                l2 = new_l2;
 776#ifdef CONFIG_SMP
 777                per_cpu(cpu_llc_id, cpu) = l2_id;
 778#endif
 779        }
 780
 781        if (new_l3) {
 782                l3 = new_l3;
 783#ifdef CONFIG_SMP
 784                per_cpu(cpu_llc_id, cpu) = l3_id;
 785#endif
 786        }
 787
 788#ifdef CONFIG_SMP
 789        /*
 790         * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
 791         * turns means that the only possibility is SMT (as indicated in
 792         * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
 793         * that SMT shares all caches, we can unconditionally set cpu_llc_id to
 794         * c->phys_proc_id.
 795         */
 796        if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
 797                per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
 798#endif
 799
 800        c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 801
 802        return l2;
 803}
 804
 805static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
 806                                    struct _cpuid4_info_regs *base)
 807{
 808        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 809        struct cacheinfo *this_leaf;
 810        int i, sibling;
 811
 812        if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 813                unsigned int apicid, nshared, first, last;
 814
 815                this_leaf = this_cpu_ci->info_list + index;
 816                nshared = base->eax.split.num_threads_sharing + 1;
 817                apicid = cpu_data(cpu).apicid;
 818                first = apicid - (apicid % nshared);
 819                last = first + nshared - 1;
 820
 821                for_each_online_cpu(i) {
 822                        this_cpu_ci = get_cpu_cacheinfo(i);
 823                        if (!this_cpu_ci->info_list)
 824                                continue;
 825
 826                        apicid = cpu_data(i).apicid;
 827                        if ((apicid < first) || (apicid > last))
 828                                continue;
 829
 830                        this_leaf = this_cpu_ci->info_list + index;
 831
 832                        for_each_online_cpu(sibling) {
 833                                apicid = cpu_data(sibling).apicid;
 834                                if ((apicid < first) || (apicid > last))
 835                                        continue;
 836                                cpumask_set_cpu(sibling,
 837                                                &this_leaf->shared_cpu_map);
 838                        }
 839                }
 840        } else if (index == 3) {
 841                for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
 842                        this_cpu_ci = get_cpu_cacheinfo(i);
 843                        if (!this_cpu_ci->info_list)
 844                                continue;
 845                        this_leaf = this_cpu_ci->info_list + index;
 846                        for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
 847                                if (!cpu_online(sibling))
 848                                        continue;
 849                                cpumask_set_cpu(sibling,
 850                                                &this_leaf->shared_cpu_map);
 851                        }
 852                }
 853        } else
 854                return 0;
 855
 856        return 1;
 857}
 858
 859static void __cache_cpumap_setup(unsigned int cpu, int index,
 860                                 struct _cpuid4_info_regs *base)
 861{
 862        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 863        struct cacheinfo *this_leaf, *sibling_leaf;
 864        unsigned long num_threads_sharing;
 865        int index_msb, i;
 866        struct cpuinfo_x86 *c = &cpu_data(cpu);
 867
 868        if (c->x86_vendor == X86_VENDOR_AMD) {
 869                if (__cache_amd_cpumap_setup(cpu, index, base))
 870                        return;
 871        }
 872
 873        this_leaf = this_cpu_ci->info_list + index;
 874        num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
 875
 876        cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
 877        if (num_threads_sharing == 1)
 878                return;
 879
 880        index_msb = get_count_order(num_threads_sharing);
 881
 882        for_each_online_cpu(i)
 883                if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
 884                        struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
 885
 886                        if (i == cpu || !sib_cpu_ci->info_list)
 887                                continue;/* skip if itself or no cacheinfo */
 888                        sibling_leaf = sib_cpu_ci->info_list + index;
 889                        cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
 890                        cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
 891                }
 892}
 893
 894static void ci_leaf_init(struct cacheinfo *this_leaf,
 895                         struct _cpuid4_info_regs *base)
 896{
 897        this_leaf->level = base->eax.split.level;
 898        this_leaf->type = cache_type_map[base->eax.split.type];
 899        this_leaf->coherency_line_size =
 900                                base->ebx.split.coherency_line_size + 1;
 901        this_leaf->ways_of_associativity =
 902                                base->ebx.split.ways_of_associativity + 1;
 903        this_leaf->size = base->size;
 904        this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
 905        this_leaf->physical_line_partition =
 906                                base->ebx.split.physical_line_partition + 1;
 907        this_leaf->priv = base->nb;
 908}
 909
 910static int __init_cache_level(unsigned int cpu)
 911{
 912        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 913
 914        if (!num_cache_leaves)
 915                return -ENOENT;
 916        if (!this_cpu_ci)
 917                return -EINVAL;
 918        this_cpu_ci->num_levels = 3;
 919        this_cpu_ci->num_leaves = num_cache_leaves;
 920        return 0;
 921}
 922
 923static int __populate_cache_leaves(unsigned int cpu)
 924{
 925        unsigned int idx, ret;
 926        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 927        struct cacheinfo *this_leaf = this_cpu_ci->info_list;
 928        struct _cpuid4_info_regs id4_regs = {};
 929
 930        for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
 931                ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
 932                if (ret)
 933                        return ret;
 934                ci_leaf_init(this_leaf++, &id4_regs);
 935                __cache_cpumap_setup(cpu, idx, &id4_regs);
 936        }
 937        return 0;
 938}
 939
 940DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
 941DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
 942