linux/arch/x86/kernel/cpu/intel_cacheinfo.c
<<
>>
Prefs
   1/*
   2 *      Routines to indentify caches on Intel CPU.
   3 *
   4 *      Changes:
   5 *      Venkatesh Pallipadi     : Adding cache identification through cpuid(4)
   6 *      Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
   7 *      Andi Kleen / Andreas Herrmann   : CPUID4 emulation on AMD.
   8 */
   9
  10#include <linux/init.h>
  11#include <linux/slab.h>
  12#include <linux/device.h>
  13#include <linux/compiler.h>
  14#include <linux/cpu.h>
  15#include <linux/sched.h>
  16#include <linux/pci.h>
  17
  18#include <asm/processor.h>
  19#include <linux/smp.h>
  20#include <asm/amd_nb.h>
  21#include <asm/smp.h>
  22
  23#define LVL_1_INST      1
  24#define LVL_1_DATA      2
  25#define LVL_2           3
  26#define LVL_3           4
  27#define LVL_TRACE       5
  28
  29struct _cache_table {
  30        unsigned char descriptor;
  31        char cache_type;
  32        short size;
  33};
  34
  35#define MB(x)   ((x) * 1024)
  36
  37/* All the cache descriptor types we care about (no TLB or
  38   trace cache entries) */
  39
  40static const struct _cache_table __cpuinitconst cache_table[] =
  41{
  42        { 0x06, LVL_1_INST, 8 },        /* 4-way set assoc, 32 byte line size */
  43        { 0x08, LVL_1_INST, 16 },       /* 4-way set assoc, 32 byte line size */
  44        { 0x09, LVL_1_INST, 32 },       /* 4-way set assoc, 64 byte line size */
  45        { 0x0a, LVL_1_DATA, 8 },        /* 2 way set assoc, 32 byte line size */
  46        { 0x0c, LVL_1_DATA, 16 },       /* 4-way set assoc, 32 byte line size */
  47        { 0x0d, LVL_1_DATA, 16 },       /* 4-way set assoc, 64 byte line size */
  48        { 0x0e, LVL_1_DATA, 24 },       /* 6-way set assoc, 64 byte line size */
  49        { 0x21, LVL_2,      256 },      /* 8-way set assoc, 64 byte line size */
  50        { 0x22, LVL_3,      512 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  51        { 0x23, LVL_3,      MB(1) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  52        { 0x25, LVL_3,      MB(2) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  53        { 0x29, LVL_3,      MB(4) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  54        { 0x2c, LVL_1_DATA, 32 },       /* 8-way set assoc, 64 byte line size */
  55        { 0x30, LVL_1_INST, 32 },       /* 8-way set assoc, 64 byte line size */
  56        { 0x39, LVL_2,      128 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  57        { 0x3a, LVL_2,      192 },      /* 6-way set assoc, sectored cache, 64 byte line size */
  58        { 0x3b, LVL_2,      128 },      /* 2-way set assoc, sectored cache, 64 byte line size */
  59        { 0x3c, LVL_2,      256 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  60        { 0x3d, LVL_2,      384 },      /* 6-way set assoc, sectored cache, 64 byte line size */
  61        { 0x3e, LVL_2,      512 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  62        { 0x3f, LVL_2,      256 },      /* 2-way set assoc, 64 byte line size */
  63        { 0x41, LVL_2,      128 },      /* 4-way set assoc, 32 byte line size */
  64        { 0x42, LVL_2,      256 },      /* 4-way set assoc, 32 byte line size */
  65        { 0x43, LVL_2,      512 },      /* 4-way set assoc, 32 byte line size */
  66        { 0x44, LVL_2,      MB(1) },    /* 4-way set assoc, 32 byte line size */
  67        { 0x45, LVL_2,      MB(2) },    /* 4-way set assoc, 32 byte line size */
  68        { 0x46, LVL_3,      MB(4) },    /* 4-way set assoc, 64 byte line size */
  69        { 0x47, LVL_3,      MB(8) },    /* 8-way set assoc, 64 byte line size */
  70        { 0x48, LVL_2,      MB(3) },    /* 12-way set assoc, 64 byte line size */
  71        { 0x49, LVL_3,      MB(4) },    /* 16-way set assoc, 64 byte line size */
  72        { 0x4a, LVL_3,      MB(6) },    /* 12-way set assoc, 64 byte line size */
  73        { 0x4b, LVL_3,      MB(8) },    /* 16-way set assoc, 64 byte line size */
  74        { 0x4c, LVL_3,      MB(12) },   /* 12-way set assoc, 64 byte line size */
  75        { 0x4d, LVL_3,      MB(16) },   /* 16-way set assoc, 64 byte line size */
  76        { 0x4e, LVL_2,      MB(6) },    /* 24-way set assoc, 64 byte line size */
  77        { 0x60, LVL_1_DATA, 16 },       /* 8-way set assoc, sectored cache, 64 byte line size */
  78        { 0x66, LVL_1_DATA, 8 },        /* 4-way set assoc, sectored cache, 64 byte line size */
  79        { 0x67, LVL_1_DATA, 16 },       /* 4-way set assoc, sectored cache, 64 byte line size */
  80        { 0x68, LVL_1_DATA, 32 },       /* 4-way set assoc, sectored cache, 64 byte line size */
  81        { 0x70, LVL_TRACE,  12 },       /* 8-way set assoc */
  82        { 0x71, LVL_TRACE,  16 },       /* 8-way set assoc */
  83        { 0x72, LVL_TRACE,  32 },       /* 8-way set assoc */
  84        { 0x73, LVL_TRACE,  64 },       /* 8-way set assoc */
  85        { 0x78, LVL_2,      MB(1) },    /* 4-way set assoc, 64 byte line size */
  86        { 0x79, LVL_2,      128 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  87        { 0x7a, LVL_2,      256 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  88        { 0x7b, LVL_2,      512 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  89        { 0x7c, LVL_2,      MB(1) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  90        { 0x7d, LVL_2,      MB(2) },    /* 8-way set assoc, 64 byte line size */
  91        { 0x7f, LVL_2,      512 },      /* 2-way set assoc, 64 byte line size */
  92        { 0x80, LVL_2,      512 },      /* 8-way set assoc, 64 byte line size */
  93        { 0x82, LVL_2,      256 },      /* 8-way set assoc, 32 byte line size */
  94        { 0x83, LVL_2,      512 },      /* 8-way set assoc, 32 byte line size */
  95        { 0x84, LVL_2,      MB(1) },    /* 8-way set assoc, 32 byte line size */
  96        { 0x85, LVL_2,      MB(2) },    /* 8-way set assoc, 32 byte line size */
  97        { 0x86, LVL_2,      512 },      /* 4-way set assoc, 64 byte line size */
  98        { 0x87, LVL_2,      MB(1) },    /* 8-way set assoc, 64 byte line size */
  99        { 0xd0, LVL_3,      512 },      /* 4-way set assoc, 64 byte line size */
 100        { 0xd1, LVL_3,      MB(1) },    /* 4-way set assoc, 64 byte line size */
 101        { 0xd2, LVL_3,      MB(2) },    /* 4-way set assoc, 64 byte line size */
 102        { 0xd6, LVL_3,      MB(1) },    /* 8-way set assoc, 64 byte line size */
 103        { 0xd7, LVL_3,      MB(2) },    /* 8-way set assoc, 64 byte line size */
 104        { 0xd8, LVL_3,      MB(4) },    /* 12-way set assoc, 64 byte line size */
 105        { 0xdc, LVL_3,      MB(2) },    /* 12-way set assoc, 64 byte line size */
 106        { 0xdd, LVL_3,      MB(4) },    /* 12-way set assoc, 64 byte line size */
 107        { 0xde, LVL_3,      MB(8) },    /* 12-way set assoc, 64 byte line size */
 108        { 0xe2, LVL_3,      MB(2) },    /* 16-way set assoc, 64 byte line size */
 109        { 0xe3, LVL_3,      MB(4) },    /* 16-way set assoc, 64 byte line size */
 110        { 0xe4, LVL_3,      MB(8) },    /* 16-way set assoc, 64 byte line size */
 111        { 0xea, LVL_3,      MB(12) },   /* 24-way set assoc, 64 byte line size */
 112        { 0xeb, LVL_3,      MB(18) },   /* 24-way set assoc, 64 byte line size */
 113        { 0xec, LVL_3,      MB(24) },   /* 24-way set assoc, 64 byte line size */
 114        { 0x00, 0, 0}
 115};
 116
 117
 118enum _cache_type {
 119        CACHE_TYPE_NULL = 0,
 120        CACHE_TYPE_DATA = 1,
 121        CACHE_TYPE_INST = 2,
 122        CACHE_TYPE_UNIFIED = 3
 123};
 124
 125union _cpuid4_leaf_eax {
 126        struct {
 127                enum _cache_type        type:5;
 128                unsigned int            level:3;
 129                unsigned int            is_self_initializing:1;
 130                unsigned int            is_fully_associative:1;
 131                unsigned int            reserved:4;
 132                unsigned int            num_threads_sharing:12;
 133                unsigned int            num_cores_on_die:6;
 134        } split;
 135        u32 full;
 136};
 137
 138union _cpuid4_leaf_ebx {
 139        struct {
 140                unsigned int            coherency_line_size:12;
 141                unsigned int            physical_line_partition:10;
 142                unsigned int            ways_of_associativity:10;
 143        } split;
 144        u32 full;
 145};
 146
 147union _cpuid4_leaf_ecx {
 148        struct {
 149                unsigned int            number_of_sets:32;
 150        } split;
 151        u32 full;
 152};
 153
 154struct amd_l3_cache {
 155        struct   amd_northbridge *nb;
 156        unsigned indices;
 157        u8       subcaches[4];
 158};
 159
 160struct _cpuid4_info {
 161        union _cpuid4_leaf_eax eax;
 162        union _cpuid4_leaf_ebx ebx;
 163        union _cpuid4_leaf_ecx ecx;
 164        unsigned long size;
 165        struct amd_l3_cache *l3;
 166        DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
 167};
 168
 169/* subset of above _cpuid4_info w/o shared_cpu_map */
 170struct _cpuid4_info_regs {
 171        union _cpuid4_leaf_eax eax;
 172        union _cpuid4_leaf_ebx ebx;
 173        union _cpuid4_leaf_ecx ecx;
 174        unsigned long size;
 175        struct amd_l3_cache *l3;
 176};
 177
 178unsigned short                  num_cache_leaves;
 179
 180/* AMD doesn't have CPUID4. Emulate it here to report the same
 181   information to the user.  This makes some assumptions about the machine:
 182   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
 183
 184   In theory the TLBs could be reported as fake type (they are in "dummy").
 185   Maybe later */
 186union l1_cache {
 187        struct {
 188                unsigned line_size:8;
 189                unsigned lines_per_tag:8;
 190                unsigned assoc:8;
 191                unsigned size_in_kb:8;
 192        };
 193        unsigned val;
 194};
 195
 196union l2_cache {
 197        struct {
 198                unsigned line_size:8;
 199                unsigned lines_per_tag:4;
 200                unsigned assoc:4;
 201                unsigned size_in_kb:16;
 202        };
 203        unsigned val;
 204};
 205
 206union l3_cache {
 207        struct {
 208                unsigned line_size:8;
 209                unsigned lines_per_tag:4;
 210                unsigned assoc:4;
 211                unsigned res:2;
 212                unsigned size_encoded:14;
 213        };
 214        unsigned val;
 215};
 216
 217static const unsigned short __cpuinitconst assocs[] = {
 218        [1] = 1,
 219        [2] = 2,
 220        [4] = 4,
 221        [6] = 8,
 222        [8] = 16,
 223        [0xa] = 32,
 224        [0xb] = 48,
 225        [0xc] = 64,
 226        [0xd] = 96,
 227        [0xe] = 128,
 228        [0xf] = 0xffff /* fully associative - no way to show this currently */
 229};
 230
 231static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
 232static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
 233
 234static void __cpuinit
 235amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 236                     union _cpuid4_leaf_ebx *ebx,
 237                     union _cpuid4_leaf_ecx *ecx)
 238{
 239        unsigned dummy;
 240        unsigned line_size, lines_per_tag, assoc, size_in_kb;
 241        union l1_cache l1i, l1d;
 242        union l2_cache l2;
 243        union l3_cache l3;
 244        union l1_cache *l1 = &l1d;
 245
 246        eax->full = 0;
 247        ebx->full = 0;
 248        ecx->full = 0;
 249
 250        cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
 251        cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
 252
 253        switch (leaf) {
 254        case 1:
 255                l1 = &l1i;
 256        case 0:
 257                if (!l1->val)
 258                        return;
 259                assoc = assocs[l1->assoc];
 260                line_size = l1->line_size;
 261                lines_per_tag = l1->lines_per_tag;
 262                size_in_kb = l1->size_in_kb;
 263                break;
 264        case 2:
 265                if (!l2.val)
 266                        return;
 267                assoc = assocs[l2.assoc];
 268                line_size = l2.line_size;
 269                lines_per_tag = l2.lines_per_tag;
 270                /* cpu_data has errata corrections for K7 applied */
 271                size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
 272                break;
 273        case 3:
 274                if (!l3.val)
 275                        return;
 276                assoc = assocs[l3.assoc];
 277                line_size = l3.line_size;
 278                lines_per_tag = l3.lines_per_tag;
 279                size_in_kb = l3.size_encoded * 512;
 280                if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
 281                        size_in_kb = size_in_kb >> 1;
 282                        assoc = assoc >> 1;
 283                }
 284                break;
 285        default:
 286                return;
 287        }
 288
 289        eax->split.is_self_initializing = 1;
 290        eax->split.type = types[leaf];
 291        eax->split.level = levels[leaf];
 292        eax->split.num_threads_sharing = 0;
 293        eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
 294
 295
 296        if (assoc == 0xffff)
 297                eax->split.is_fully_associative = 1;
 298        ebx->split.coherency_line_size = line_size - 1;
 299        ebx->split.ways_of_associativity = assoc - 1;
 300        ebx->split.physical_line_partition = lines_per_tag - 1;
 301        ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
 302                (ebx->split.ways_of_associativity + 1) - 1;
 303}
 304
 305struct _cache_attr {
 306        struct attribute attr;
 307        ssize_t (*show)(struct _cpuid4_info *, char *);
 308        ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
 309};
 310
 311#ifdef CONFIG_AMD_NB
 312
 313/*
 314 * L3 cache descriptors
 315 */
 316static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
 317{
 318        unsigned int sc0, sc1, sc2, sc3;
 319        u32 val = 0;
 320
 321        pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
 322
 323        /* calculate subcache sizes */
 324        l3->subcaches[0] = sc0 = !(val & BIT(0));
 325        l3->subcaches[1] = sc1 = !(val & BIT(4));
 326        l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 327        l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 328
 329        l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
 330        l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 331}
 332
 333static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
 334                                        int index)
 335{
 336        static struct amd_l3_cache *__cpuinitdata l3_caches;
 337        int node;
 338
 339        /* only for L3, and not in virtualized environments */
 340        if (index < 3 || amd_nb_num() == 0)
 341                return;
 342
 343        /*
 344         * Strictly speaking, the amount in @size below is leaked since it is
 345         * never freed but this is done only on shutdown so it doesn't matter.
 346         */
 347        if (!l3_caches) {
 348                int size = amd_nb_num() * sizeof(struct amd_l3_cache);
 349
 350                l3_caches = kzalloc(size, GFP_ATOMIC);
 351                if (!l3_caches)
 352                        return;
 353        }
 354
 355        node = amd_get_nb_id(smp_processor_id());
 356
 357        if (!l3_caches[node].nb) {
 358                l3_caches[node].nb = node_to_amd_nb(node);
 359                amd_calc_l3_indices(&l3_caches[node]);
 360        }
 361
 362        this_leaf->l3 = &l3_caches[node];
 363}
 364
 365/*
 366 * check whether a slot used for disabling an L3 index is occupied.
 367 * @l3: L3 cache descriptor
 368 * @slot: slot number (0..1)
 369 *
 370 * @returns: the disabled index if used or negative value if slot free.
 371 */
 372int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
 373{
 374        unsigned int reg = 0;
 375
 376        pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
 377
 378        /* check whether this slot is activated already */
 379        if (reg & (3UL << 30))
 380                return reg & 0xfff;
 381
 382        return -1;
 383}
 384
 385static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
 386                                  unsigned int slot)
 387{
 388        int index;
 389
 390        if (!this_leaf->l3 ||
 391            !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 392                return -EINVAL;
 393
 394        index = amd_get_l3_disable_slot(this_leaf->l3, slot);
 395        if (index >= 0)
 396                return sprintf(buf, "%d\n", index);
 397
 398        return sprintf(buf, "FREE\n");
 399}
 400
 401#define SHOW_CACHE_DISABLE(slot)                                        \
 402static ssize_t                                                          \
 403show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf)    \
 404{                                                                       \
 405        return show_cache_disable(this_leaf, buf, slot);                \
 406}
 407SHOW_CACHE_DISABLE(0)
 408SHOW_CACHE_DISABLE(1)
 409
 410static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
 411                                 unsigned slot, unsigned long idx)
 412{
 413        int i;
 414
 415        idx |= BIT(30);
 416
 417        /*
 418         *  disable index in all 4 subcaches
 419         */
 420        for (i = 0; i < 4; i++) {
 421                u32 reg = idx | (i << 20);
 422
 423                if (!l3->subcaches[i])
 424                        continue;
 425
 426                pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
 427
 428                /*
 429                 * We need to WBINVD on a core on the node containing the L3
 430                 * cache which indices we disable therefore a simple wbinvd()
 431                 * is not sufficient.
 432                 */
 433                wbinvd_on_cpu(cpu);
 434
 435                reg |= BIT(31);
 436                pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
 437        }
 438}
 439
 440/*
 441 * disable a L3 cache index by using a disable-slot
 442 *
 443 * @l3:    L3 cache descriptor
 444 * @cpu:   A CPU on the node containing the L3 cache
 445 * @slot:  slot number (0..1)
 446 * @index: index to disable
 447 *
 448 * @return: 0 on success, error status on failure
 449 */
 450int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot,
 451                            unsigned long index)
 452{
 453        int ret = 0;
 454
 455#define SUBCACHE_MASK   (3UL << 20)
 456#define SUBCACHE_INDEX  0xfff
 457
 458        /*
 459         * check whether this slot is already used or
 460         * the index is already disabled
 461         */
 462        ret = amd_get_l3_disable_slot(l3, slot);
 463        if (ret >= 0)
 464                return -EINVAL;
 465
 466        /*
 467         * check whether the other slot has disabled the
 468         * same index already
 469         */
 470        if (index == amd_get_l3_disable_slot(l3, !slot))
 471                return -EINVAL;
 472
 473        /* do not allow writes outside of allowed bits */
 474        if ((index & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
 475            ((index & SUBCACHE_INDEX) > l3->indices))
 476                return -EINVAL;
 477
 478        amd_l3_disable_index(l3, cpu, slot, index);
 479
 480        return 0;
 481}
 482
 483static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 484                                  const char *buf, size_t count,
 485                                  unsigned int slot)
 486{
 487        unsigned long val = 0;
 488        int cpu, err = 0;
 489
 490        if (!capable(CAP_SYS_ADMIN))
 491                return -EPERM;
 492
 493        if (!this_leaf->l3 ||
 494            !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 495                return -EINVAL;
 496
 497        cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
 498
 499        if (strict_strtoul(buf, 10, &val) < 0)
 500                return -EINVAL;
 501
 502        err = amd_set_l3_disable_slot(this_leaf->l3, cpu, slot, val);
 503        if (err) {
 504                if (err == -EEXIST)
 505                        printk(KERN_WARNING "L3 disable slot %d in use!\n",
 506                                            slot);
 507                return err;
 508        }
 509        return count;
 510}
 511
 512#define STORE_CACHE_DISABLE(slot)                                       \
 513static ssize_t                                                          \
 514store_cache_disable_##slot(struct _cpuid4_info *this_leaf,              \
 515                           const char *buf, size_t count)               \
 516{                                                                       \
 517        return store_cache_disable(this_leaf, buf, count, slot);        \
 518}
 519STORE_CACHE_DISABLE(0)
 520STORE_CACHE_DISABLE(1)
 521
 522static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
 523                show_cache_disable_0, store_cache_disable_0);
 524static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
 525                show_cache_disable_1, store_cache_disable_1);
 526
 527#else   /* CONFIG_AMD_NB */
 528#define amd_init_l3_cache(x, y)
 529#endif /* CONFIG_AMD_NB */
 530
 531static int
 532__cpuinit cpuid4_cache_lookup_regs(int index,
 533                                   struct _cpuid4_info_regs *this_leaf)
 534{
 535        union _cpuid4_leaf_eax  eax;
 536        union _cpuid4_leaf_ebx  ebx;
 537        union _cpuid4_leaf_ecx  ecx;
 538        unsigned                edx;
 539
 540        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 541                amd_cpuid4(index, &eax, &ebx, &ecx);
 542                amd_init_l3_cache(this_leaf, index);
 543        } else {
 544                cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 545        }
 546
 547        if (eax.split.type == CACHE_TYPE_NULL)
 548                return -EIO; /* better error ? */
 549
 550        this_leaf->eax = eax;
 551        this_leaf->ebx = ebx;
 552        this_leaf->ecx = ecx;
 553        this_leaf->size = (ecx.split.number_of_sets          + 1) *
 554                          (ebx.split.coherency_line_size     + 1) *
 555                          (ebx.split.physical_line_partition + 1) *
 556                          (ebx.split.ways_of_associativity   + 1);
 557        return 0;
 558}
 559
 560static int __cpuinit find_num_cache_leaves(void)
 561{
 562        unsigned int            eax, ebx, ecx, edx;
 563        union _cpuid4_leaf_eax  cache_eax;
 564        int                     i = -1;
 565
 566        do {
 567                ++i;
 568                /* Do cpuid(4) loop to find out num_cache_leaves */
 569                cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
 570                cache_eax.full = eax;
 571        } while (cache_eax.split.type != CACHE_TYPE_NULL);
 572        return i;
 573}
 574
 575unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 576{
 577        /* Cache sizes */
 578        unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
 579        unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 580        unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 581        unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
 582#ifdef CONFIG_X86_HT
 583        unsigned int cpu = c->cpu_index;
 584#endif
 585
 586        if (c->cpuid_level > 3) {
 587                static int is_initialized;
 588
 589                if (is_initialized == 0) {
 590                        /* Init num_cache_leaves from boot CPU */
 591                        num_cache_leaves = find_num_cache_leaves();
 592                        is_initialized++;
 593                }
 594
 595                /*
 596                 * Whenever possible use cpuid(4), deterministic cache
 597                 * parameters cpuid leaf to find the cache details
 598                 */
 599                for (i = 0; i < num_cache_leaves; i++) {
 600                        struct _cpuid4_info_regs this_leaf;
 601                        int retval;
 602
 603                        retval = cpuid4_cache_lookup_regs(i, &this_leaf);
 604                        if (retval >= 0) {
 605                                switch (this_leaf.eax.split.level) {
 606                                case 1:
 607                                        if (this_leaf.eax.split.type ==
 608                                                        CACHE_TYPE_DATA)
 609                                                new_l1d = this_leaf.size/1024;
 610                                        else if (this_leaf.eax.split.type ==
 611                                                        CACHE_TYPE_INST)
 612                                                new_l1i = this_leaf.size/1024;
 613                                        break;
 614                                case 2:
 615                                        new_l2 = this_leaf.size/1024;
 616                                        num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 617                                        index_msb = get_count_order(num_threads_sharing);
 618                                        l2_id = c->apicid >> index_msb;
 619                                        break;
 620                                case 3:
 621                                        new_l3 = this_leaf.size/1024;
 622                                        num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 623                                        index_msb = get_count_order(
 624                                                        num_threads_sharing);
 625                                        l3_id = c->apicid >> index_msb;
 626                                        break;
 627                                default:
 628                                        break;
 629                                }
 630                        }
 631                }
 632        }
 633        /*
 634         * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 635         * trace cache
 636         */
 637        if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
 638                /* supports eax=2  call */
 639                int j, n;
 640                unsigned int regs[4];
 641                unsigned char *dp = (unsigned char *)regs;
 642                int only_trace = 0;
 643
 644                if (num_cache_leaves != 0 && c->x86 == 15)
 645                        only_trace = 1;
 646
 647                /* Number of times to iterate */
 648                n = cpuid_eax(2) & 0xFF;
 649
 650                for (i = 0 ; i < n ; i++) {
 651                        cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
 652
 653                        /* If bit 31 is set, this is an unknown format */
 654                        for (j = 0 ; j < 3 ; j++)
 655                                if (regs[j] & (1 << 31))
 656                                        regs[j] = 0;
 657
 658                        /* Byte 0 is level count, not a descriptor */
 659                        for (j = 1 ; j < 16 ; j++) {
 660                                unsigned char des = dp[j];
 661                                unsigned char k = 0;
 662
 663                                /* look up this descriptor in the table */
 664                                while (cache_table[k].descriptor != 0) {
 665                                        if (cache_table[k].descriptor == des) {
 666                                                if (only_trace && cache_table[k].cache_type != LVL_TRACE)
 667                                                        break;
 668                                                switch (cache_table[k].cache_type) {
 669                                                case LVL_1_INST:
 670                                                        l1i += cache_table[k].size;
 671                                                        break;
 672                                                case LVL_1_DATA:
 673                                                        l1d += cache_table[k].size;
 674                                                        break;
 675                                                case LVL_2:
 676                                                        l2 += cache_table[k].size;
 677                                                        break;
 678                                                case LVL_3:
 679                                                        l3 += cache_table[k].size;
 680                                                        break;
 681                                                case LVL_TRACE:
 682                                                        trace += cache_table[k].size;
 683                                                        break;
 684                                                }
 685
 686                                                break;
 687                                        }
 688
 689                                        k++;
 690                                }
 691                        }
 692                }
 693        }
 694
 695        if (new_l1d)
 696                l1d = new_l1d;
 697
 698        if (new_l1i)
 699                l1i = new_l1i;
 700
 701        if (new_l2) {
 702                l2 = new_l2;
 703#ifdef CONFIG_X86_HT
 704                per_cpu(cpu_llc_id, cpu) = l2_id;
 705#endif
 706        }
 707
 708        if (new_l3) {
 709                l3 = new_l3;
 710#ifdef CONFIG_X86_HT
 711                per_cpu(cpu_llc_id, cpu) = l3_id;
 712#endif
 713        }
 714
 715        c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 716
 717        return l2;
 718}
 719
 720#ifdef CONFIG_SYSFS
 721
 722/* pointer to _cpuid4_info array (for each cache leaf) */
 723static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
 724#define CPUID4_INFO_IDX(x, y)   (&((per_cpu(ici_cpuid4_info, x))[y]))
 725
 726#ifdef CONFIG_SMP
 727static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 728{
 729        struct _cpuid4_info     *this_leaf, *sibling_leaf;
 730        unsigned long num_threads_sharing;
 731        int index_msb, i, sibling;
 732        struct cpuinfo_x86 *c = &cpu_data(cpu);
 733
 734        if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
 735                for_each_cpu(i, c->llc_shared_map) {
 736                        if (!per_cpu(ici_cpuid4_info, i))
 737                                continue;
 738                        this_leaf = CPUID4_INFO_IDX(i, index);
 739                        for_each_cpu(sibling, c->llc_shared_map) {
 740                                if (!cpu_online(sibling))
 741                                        continue;
 742                                set_bit(sibling, this_leaf->shared_cpu_map);
 743                        }
 744                }
 745                return;
 746        }
 747        this_leaf = CPUID4_INFO_IDX(cpu, index);
 748        num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
 749
 750        if (num_threads_sharing == 1)
 751                cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
 752        else {
 753                index_msb = get_count_order(num_threads_sharing);
 754
 755                for_each_online_cpu(i) {
 756                        if (cpu_data(i).apicid >> index_msb ==
 757                            c->apicid >> index_msb) {
 758                                cpumask_set_cpu(i,
 759                                        to_cpumask(this_leaf->shared_cpu_map));
 760                                if (i != cpu && per_cpu(ici_cpuid4_info, i))  {
 761                                        sibling_leaf =
 762                                                CPUID4_INFO_IDX(i, index);
 763                                        cpumask_set_cpu(cpu, to_cpumask(
 764                                                sibling_leaf->shared_cpu_map));
 765                                }
 766                        }
 767                }
 768        }
 769}
 770static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
 771{
 772        struct _cpuid4_info     *this_leaf, *sibling_leaf;
 773        int sibling;
 774
 775        this_leaf = CPUID4_INFO_IDX(cpu, index);
 776        for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
 777                sibling_leaf = CPUID4_INFO_IDX(sibling, index);
 778                cpumask_clear_cpu(cpu,
 779                                  to_cpumask(sibling_leaf->shared_cpu_map));
 780        }
 781}
 782#else
 783static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 784{
 785}
 786
 787static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
 788{
 789}
 790#endif
 791
 792static void __cpuinit free_cache_attributes(unsigned int cpu)
 793{
 794        int i;
 795
 796        for (i = 0; i < num_cache_leaves; i++)
 797                cache_remove_shared_cpu_map(cpu, i);
 798
 799        kfree(per_cpu(ici_cpuid4_info, cpu)->l3);
 800        kfree(per_cpu(ici_cpuid4_info, cpu));
 801        per_cpu(ici_cpuid4_info, cpu) = NULL;
 802}
 803
 804static int
 805__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
 806{
 807        struct _cpuid4_info_regs *leaf_regs =
 808                (struct _cpuid4_info_regs *)this_leaf;
 809
 810        return cpuid4_cache_lookup_regs(index, leaf_regs);
 811}
 812
 813static void __cpuinit get_cpu_leaves(void *_retval)
 814{
 815        int j, *retval = _retval, cpu = smp_processor_id();
 816
 817        /* Do cpuid and store the results */
 818        for (j = 0; j < num_cache_leaves; j++) {
 819                struct _cpuid4_info *this_leaf;
 820                this_leaf = CPUID4_INFO_IDX(cpu, j);
 821                *retval = cpuid4_cache_lookup(j, this_leaf);
 822                if (unlikely(*retval < 0)) {
 823                        int i;
 824
 825                        for (i = 0; i < j; i++)
 826                                cache_remove_shared_cpu_map(cpu, i);
 827                        break;
 828                }
 829                cache_shared_cpu_map_setup(cpu, j);
 830        }
 831}
 832
 833static int __cpuinit detect_cache_attributes(unsigned int cpu)
 834{
 835        int                     retval;
 836
 837        if (num_cache_leaves == 0)
 838                return -ENOENT;
 839
 840        per_cpu(ici_cpuid4_info, cpu) = kzalloc(
 841            sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
 842        if (per_cpu(ici_cpuid4_info, cpu) == NULL)
 843                return -ENOMEM;
 844
 845        smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
 846        if (retval) {
 847                kfree(per_cpu(ici_cpuid4_info, cpu));
 848                per_cpu(ici_cpuid4_info, cpu) = NULL;
 849        }
 850
 851        return retval;
 852}
 853
 854#include <linux/kobject.h>
 855#include <linux/sysfs.h>
 856
 857extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
 858
 859/* pointer to kobject for cpuX/cache */
 860static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
 861
 862struct _index_kobject {
 863        struct kobject kobj;
 864        unsigned int cpu;
 865        unsigned short index;
 866};
 867
 868/* pointer to array of kobjects for cpuX/cache/indexY */
 869static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
 870#define INDEX_KOBJECT_PTR(x, y)         (&((per_cpu(ici_index_kobject, x))[y]))
 871
 872#define show_one_plus(file_name, object, val)                           \
 873static ssize_t show_##file_name                                         \
 874                        (struct _cpuid4_info *this_leaf, char *buf)     \
 875{                                                                       \
 876        return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
 877}
 878
 879show_one_plus(level, eax.split.level, 0);
 880show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
 881show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
 882show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
 883show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
 884
 885static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
 886{
 887        return sprintf(buf, "%luK\n", this_leaf->size / 1024);
 888}
 889
 890static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
 891                                        int type, char *buf)
 892{
 893        ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
 894        int n = 0;
 895
 896        if (len > 1) {
 897                const struct cpumask *mask;
 898
 899                mask = to_cpumask(this_leaf->shared_cpu_map);
 900                n = type ?
 901                        cpulist_scnprintf(buf, len-2, mask) :
 902                        cpumask_scnprintf(buf, len-2, mask);
 903                buf[n++] = '\n';
 904                buf[n] = '\0';
 905        }
 906        return n;
 907}
 908
 909static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf)
 910{
 911        return show_shared_cpu_map_func(leaf, 0, buf);
 912}
 913
 914static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
 915{
 916        return show_shared_cpu_map_func(leaf, 1, buf);
 917}
 918
 919static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
 920{
 921        switch (this_leaf->eax.split.type) {
 922        case CACHE_TYPE_DATA:
 923                return sprintf(buf, "Data\n");
 924        case CACHE_TYPE_INST:
 925                return sprintf(buf, "Instruction\n");
 926        case CACHE_TYPE_UNIFIED:
 927                return sprintf(buf, "Unified\n");
 928        default:
 929                return sprintf(buf, "Unknown\n");
 930        }
 931}
 932
 933#define to_object(k)    container_of(k, struct _index_kobject, kobj)
 934#define to_attr(a)      container_of(a, struct _cache_attr, attr)
 935
 936#define define_one_ro(_name) \
 937static struct _cache_attr _name = \
 938        __ATTR(_name, 0444, show_##_name, NULL)
 939
 940define_one_ro(level);
 941define_one_ro(type);
 942define_one_ro(coherency_line_size);
 943define_one_ro(physical_line_partition);
 944define_one_ro(ways_of_associativity);
 945define_one_ro(number_of_sets);
 946define_one_ro(size);
 947define_one_ro(shared_cpu_map);
 948define_one_ro(shared_cpu_list);
 949
 950static struct attribute *default_attrs[] = {
 951        &type.attr,
 952        &level.attr,
 953        &coherency_line_size.attr,
 954        &physical_line_partition.attr,
 955        &ways_of_associativity.attr,
 956        &number_of_sets.attr,
 957        &size.attr,
 958        &shared_cpu_map.attr,
 959        &shared_cpu_list.attr,
 960        NULL
 961};
 962
 963#ifdef CONFIG_AMD_NB
 964static struct attribute ** __cpuinit amd_l3_attrs(void)
 965{
 966        static struct attribute **attrs;
 967        int n;
 968
 969        if (attrs)
 970                return attrs;
 971
 972        n = sizeof (default_attrs) / sizeof (struct attribute *);
 973
 974        if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 975                n += 2;
 976
 977        attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
 978        if (attrs == NULL)
 979                return attrs = default_attrs;
 980
 981        for (n = 0; default_attrs[n]; n++)
 982                attrs[n] = default_attrs[n];
 983
 984        if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
 985                attrs[n++] = &cache_disable_0.attr;
 986                attrs[n++] = &cache_disable_1.attr;
 987        }
 988
 989        return attrs;
 990}
 991#endif
 992
 993static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 994{
 995        struct _cache_attr *fattr = to_attr(attr);
 996        struct _index_kobject *this_leaf = to_object(kobj);
 997        ssize_t ret;
 998
 999        ret = fattr->show ?
1000                fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1001                        buf) :
1002                0;
1003        return ret;
1004}
1005
1006static ssize_t store(struct kobject *kobj, struct attribute *attr,
1007                     const char *buf, size_t count)
1008{
1009        struct _cache_attr *fattr = to_attr(attr);
1010        struct _index_kobject *this_leaf = to_object(kobj);
1011        ssize_t ret;
1012
1013        ret = fattr->store ?
1014                fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1015                        buf, count) :
1016                0;
1017        return ret;
1018}
1019
1020static const struct sysfs_ops sysfs_ops = {
1021        .show   = show,
1022        .store  = store,
1023};
1024
1025static struct kobj_type ktype_cache = {
1026        .sysfs_ops      = &sysfs_ops,
1027        .default_attrs  = default_attrs,
1028};
1029
1030static struct kobj_type ktype_percpu_entry = {
1031        .sysfs_ops      = &sysfs_ops,
1032};
1033
1034static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
1035{
1036        kfree(per_cpu(ici_cache_kobject, cpu));
1037        kfree(per_cpu(ici_index_kobject, cpu));
1038        per_cpu(ici_cache_kobject, cpu) = NULL;
1039        per_cpu(ici_index_kobject, cpu) = NULL;
1040        free_cache_attributes(cpu);
1041}
1042
1043static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
1044{
1045        int err;
1046
1047        if (num_cache_leaves == 0)
1048                return -ENOENT;
1049
1050        err = detect_cache_attributes(cpu);
1051        if (err)
1052                return err;
1053
1054        /* Allocate all required memory */
1055        per_cpu(ici_cache_kobject, cpu) =
1056                kzalloc(sizeof(struct kobject), GFP_KERNEL);
1057        if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
1058                goto err_out;
1059
1060        per_cpu(ici_index_kobject, cpu) = kzalloc(
1061            sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
1062        if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
1063                goto err_out;
1064
1065        return 0;
1066
1067err_out:
1068        cpuid4_cache_sysfs_exit(cpu);
1069        return -ENOMEM;
1070}
1071
1072static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
1073
1074/* Add/Remove cache interface for CPU device */
1075static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1076{
1077        unsigned int cpu = sys_dev->id;
1078        unsigned long i, j;
1079        struct _index_kobject *this_object;
1080        struct _cpuid4_info   *this_leaf;
1081        int retval;
1082
1083        retval = cpuid4_cache_sysfs_init(cpu);
1084        if (unlikely(retval < 0))
1085                return retval;
1086
1087        retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
1088                                      &ktype_percpu_entry,
1089                                      &sys_dev->kobj, "%s", "cache");
1090        if (retval < 0) {
1091                cpuid4_cache_sysfs_exit(cpu);
1092                return retval;
1093        }
1094
1095        for (i = 0; i < num_cache_leaves; i++) {
1096                this_object = INDEX_KOBJECT_PTR(cpu, i);
1097                this_object->cpu = cpu;
1098                this_object->index = i;
1099
1100                this_leaf = CPUID4_INFO_IDX(cpu, i);
1101
1102                ktype_cache.default_attrs = default_attrs;
1103#ifdef CONFIG_AMD_NB
1104                if (this_leaf->l3)
1105                        ktype_cache.default_attrs = amd_l3_attrs();
1106#endif
1107                retval = kobject_init_and_add(&(this_object->kobj),
1108                                              &ktype_cache,
1109                                              per_cpu(ici_cache_kobject, cpu),
1110                                              "index%1lu", i);
1111                if (unlikely(retval)) {
1112                        for (j = 0; j < i; j++)
1113                                kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
1114                        kobject_put(per_cpu(ici_cache_kobject, cpu));
1115                        cpuid4_cache_sysfs_exit(cpu);
1116                        return retval;
1117                }
1118                kobject_uevent(&(this_object->kobj), KOBJ_ADD);
1119        }
1120        cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
1121
1122        kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
1123        return 0;
1124}
1125
1126static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
1127{
1128        unsigned int cpu = sys_dev->id;
1129        unsigned long i;
1130
1131        if (per_cpu(ici_cpuid4_info, cpu) == NULL)
1132                return;
1133        if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
1134                return;
1135        cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
1136
1137        for (i = 0; i < num_cache_leaves; i++)
1138                kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
1139        kobject_put(per_cpu(ici_cache_kobject, cpu));
1140        cpuid4_cache_sysfs_exit(cpu);
1141}
1142
1143static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
1144                                        unsigned long action, void *hcpu)
1145{
1146        unsigned int cpu = (unsigned long)hcpu;
1147        struct sys_device *sys_dev;
1148
1149        sys_dev = get_cpu_sysdev(cpu);
1150        switch (action) {
1151        case CPU_ONLINE:
1152        case CPU_ONLINE_FROZEN:
1153                cache_add_dev(sys_dev);
1154                break;
1155        case CPU_DEAD:
1156        case CPU_DEAD_FROZEN:
1157                cache_remove_dev(sys_dev);
1158                break;
1159        }
1160        return NOTIFY_OK;
1161}
1162
1163static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
1164        .notifier_call = cacheinfo_cpu_callback,
1165};
1166
1167static int __cpuinit cache_sysfs_init(void)
1168{
1169        int i;
1170
1171        if (num_cache_leaves == 0)
1172                return 0;
1173
1174        for_each_online_cpu(i) {
1175                int err;
1176                struct sys_device *sys_dev = get_cpu_sysdev(i);
1177
1178                err = cache_add_dev(sys_dev);
1179                if (err)
1180                        return err;
1181        }
1182        register_hotcpu_notifier(&cacheinfo_cpu_notifier);
1183        return 0;
1184}
1185
1186device_initcall(cache_sysfs_init);
1187
1188#endif
1189