linux/arch/x86/events/amd/uncore.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2013 Advanced Micro Devices, Inc.
   3 *
   4 * Author: Jacob Shin <jacob.shin@amd.com>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10
  11#include <linux/perf_event.h>
  12#include <linux/percpu.h>
  13#include <linux/types.h>
  14#include <linux/slab.h>
  15#include <linux/init.h>
  16#include <linux/cpu.h>
  17#include <linux/cpumask.h>
  18
  19#include <asm/cpufeature.h>
  20#include <asm/perf_event.h>
  21#include <asm/msr.h>
  22
  23#define NUM_COUNTERS_NB         4
  24#define NUM_COUNTERS_L2         4
  25#define MAX_COUNTERS            NUM_COUNTERS_NB
  26
  27#define RDPMC_BASE_NB           6
  28#define RDPMC_BASE_L2           10
  29
  30#define COUNTER_SHIFT           16
  31
  32struct amd_uncore {
  33        int id;
  34        int refcnt;
  35        int cpu;
  36        int num_counters;
  37        int rdpmc_base;
  38        u32 msr_base;
  39        cpumask_t *active_mask;
  40        struct pmu *pmu;
  41        struct perf_event *events[MAX_COUNTERS];
  42        struct amd_uncore *free_when_cpu_online;
  43};
  44
  45static struct amd_uncore * __percpu *amd_uncore_nb;
  46static struct amd_uncore * __percpu *amd_uncore_l2;
  47
  48static struct pmu amd_nb_pmu;
  49static struct pmu amd_l2_pmu;
  50
  51static cpumask_t amd_nb_active_mask;
  52static cpumask_t amd_l2_active_mask;
  53
  54static bool is_nb_event(struct perf_event *event)
  55{
  56        return event->pmu->type == amd_nb_pmu.type;
  57}
  58
  59static bool is_l2_event(struct perf_event *event)
  60{
  61        return event->pmu->type == amd_l2_pmu.type;
  62}
  63
  64static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
  65{
  66        if (is_nb_event(event) && amd_uncore_nb)
  67                return *per_cpu_ptr(amd_uncore_nb, event->cpu);
  68        else if (is_l2_event(event) && amd_uncore_l2)
  69                return *per_cpu_ptr(amd_uncore_l2, event->cpu);
  70
  71        return NULL;
  72}
  73
  74static void amd_uncore_read(struct perf_event *event)
  75{
  76        struct hw_perf_event *hwc = &event->hw;
  77        u64 prev, new;
  78        s64 delta;
  79
  80        /*
  81         * since we do not enable counter overflow interrupts,
  82         * we do not have to worry about prev_count changing on us
  83         */
  84
  85        prev = local64_read(&hwc->prev_count);
  86        rdpmcl(hwc->event_base_rdpmc, new);
  87        local64_set(&hwc->prev_count, new);
  88        delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
  89        delta >>= COUNTER_SHIFT;
  90        local64_add(delta, &event->count);
  91}
  92
  93static void amd_uncore_start(struct perf_event *event, int flags)
  94{
  95        struct hw_perf_event *hwc = &event->hw;
  96
  97        if (flags & PERF_EF_RELOAD)
  98                wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
  99
 100        hwc->state = 0;
 101        wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
 102        perf_event_update_userpage(event);
 103}
 104
 105static void amd_uncore_stop(struct perf_event *event, int flags)
 106{
 107        struct hw_perf_event *hwc = &event->hw;
 108
 109        wrmsrl(hwc->config_base, hwc->config);
 110        hwc->state |= PERF_HES_STOPPED;
 111
 112        if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
 113                amd_uncore_read(event);
 114                hwc->state |= PERF_HES_UPTODATE;
 115        }
 116}
 117
 118static int amd_uncore_add(struct perf_event *event, int flags)
 119{
 120        int i;
 121        struct amd_uncore *uncore = event_to_amd_uncore(event);
 122        struct hw_perf_event *hwc = &event->hw;
 123
 124        /* are we already assigned? */
 125        if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
 126                goto out;
 127
 128        for (i = 0; i < uncore->num_counters; i++) {
 129                if (uncore->events[i] == event) {
 130                        hwc->idx = i;
 131                        goto out;
 132                }
 133        }
 134
 135        /* if not, take the first available counter */
 136        hwc->idx = -1;
 137        for (i = 0; i < uncore->num_counters; i++) {
 138                if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
 139                        hwc->idx = i;
 140                        break;
 141                }
 142        }
 143
 144out:
 145        if (hwc->idx == -1)
 146                return -EBUSY;
 147
 148        hwc->config_base = uncore->msr_base + (2 * hwc->idx);
 149        hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
 150        hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
 151        hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 152
 153        if (flags & PERF_EF_START)
 154                amd_uncore_start(event, PERF_EF_RELOAD);
 155
 156        return 0;
 157}
 158
 159static void amd_uncore_del(struct perf_event *event, int flags)
 160{
 161        int i;
 162        struct amd_uncore *uncore = event_to_amd_uncore(event);
 163        struct hw_perf_event *hwc = &event->hw;
 164
 165        amd_uncore_stop(event, PERF_EF_UPDATE);
 166
 167        for (i = 0; i < uncore->num_counters; i++) {
 168                if (cmpxchg(&uncore->events[i], event, NULL) == event)
 169                        break;
 170        }
 171
 172        hwc->idx = -1;
 173}
 174
 175static int amd_uncore_event_init(struct perf_event *event)
 176{
 177        struct amd_uncore *uncore;
 178        struct hw_perf_event *hwc = &event->hw;
 179
 180        if (event->attr.type != event->pmu->type)
 181                return -ENOENT;
 182
 183        /*
 184         * NB and L2 counters (MSRs) are shared across all cores that share the
 185         * same NB / L2 cache. Interrupts can be directed to a single target
 186         * core, however, event counts generated by processes running on other
 187         * cores cannot be masked out. So we do not support sampling and
 188         * per-thread events.
 189         */
 190        if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
 191                return -EINVAL;
 192
 193        /* NB and L2 counters do not have usr/os/guest/host bits */
 194        if (event->attr.exclude_user || event->attr.exclude_kernel ||
 195            event->attr.exclude_host || event->attr.exclude_guest)
 196                return -EINVAL;
 197
 198        /* and we do not enable counter overflow interrupts */
 199        hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
 200        hwc->idx = -1;
 201
 202        if (event->cpu < 0)
 203                return -EINVAL;
 204
 205        uncore = event_to_amd_uncore(event);
 206        if (!uncore)
 207                return -ENODEV;
 208
 209        /*
 210         * since request can come in to any of the shared cores, we will remap
 211         * to a single common cpu.
 212         */
 213        event->cpu = uncore->cpu;
 214
 215        return 0;
 216}
 217
 218static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
 219                                            struct device_attribute *attr,
 220                                            char *buf)
 221{
 222        cpumask_t *active_mask;
 223        struct pmu *pmu = dev_get_drvdata(dev);
 224
 225        if (pmu->type == amd_nb_pmu.type)
 226                active_mask = &amd_nb_active_mask;
 227        else if (pmu->type == amd_l2_pmu.type)
 228                active_mask = &amd_l2_active_mask;
 229        else
 230                return 0;
 231
 232        return cpumap_print_to_pagebuf(true, buf, active_mask);
 233}
 234static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
 235
 236static struct attribute *amd_uncore_attrs[] = {
 237        &dev_attr_cpumask.attr,
 238        NULL,
 239};
 240
 241static struct attribute_group amd_uncore_attr_group = {
 242        .attrs = amd_uncore_attrs,
 243};
 244
 245PMU_FORMAT_ATTR(event, "config:0-7,32-35");
 246PMU_FORMAT_ATTR(umask, "config:8-15");
 247
 248static struct attribute *amd_uncore_format_attr[] = {
 249        &format_attr_event.attr,
 250        &format_attr_umask.attr,
 251        NULL,
 252};
 253
 254static struct attribute_group amd_uncore_format_group = {
 255        .name = "format",
 256        .attrs = amd_uncore_format_attr,
 257};
 258
 259static const struct attribute_group *amd_uncore_attr_groups[] = {
 260        &amd_uncore_attr_group,
 261        &amd_uncore_format_group,
 262        NULL,
 263};
 264
 265static struct pmu amd_nb_pmu = {
 266        .attr_groups    = amd_uncore_attr_groups,
 267        .name           = "amd_nb",
 268        .event_init     = amd_uncore_event_init,
 269        .add            = amd_uncore_add,
 270        .del            = amd_uncore_del,
 271        .start          = amd_uncore_start,
 272        .stop           = amd_uncore_stop,
 273        .read           = amd_uncore_read,
 274};
 275
 276static struct pmu amd_l2_pmu = {
 277        .attr_groups    = amd_uncore_attr_groups,
 278        .name           = "amd_l2",
 279        .event_init     = amd_uncore_event_init,
 280        .add            = amd_uncore_add,
 281        .del            = amd_uncore_del,
 282        .start          = amd_uncore_start,
 283        .stop           = amd_uncore_stop,
 284        .read           = amd_uncore_read,
 285};
 286
 287static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
 288{
 289        return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
 290                        cpu_to_node(cpu));
 291}
 292
 293static int amd_uncore_cpu_up_prepare(unsigned int cpu)
 294{
 295        struct amd_uncore *uncore_nb = NULL, *uncore_l2;
 296
 297        if (amd_uncore_nb) {
 298                uncore_nb = amd_uncore_alloc(cpu);
 299                if (!uncore_nb)
 300                        goto fail;
 301                uncore_nb->cpu = cpu;
 302                uncore_nb->num_counters = NUM_COUNTERS_NB;
 303                uncore_nb->rdpmc_base = RDPMC_BASE_NB;
 304                uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
 305                uncore_nb->active_mask = &amd_nb_active_mask;
 306                uncore_nb->pmu = &amd_nb_pmu;
 307                *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
 308        }
 309
 310        if (amd_uncore_l2) {
 311                uncore_l2 = amd_uncore_alloc(cpu);
 312                if (!uncore_l2)
 313                        goto fail;
 314                uncore_l2->cpu = cpu;
 315                uncore_l2->num_counters = NUM_COUNTERS_L2;
 316                uncore_l2->rdpmc_base = RDPMC_BASE_L2;
 317                uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
 318                uncore_l2->active_mask = &amd_l2_active_mask;
 319                uncore_l2->pmu = &amd_l2_pmu;
 320                *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
 321        }
 322
 323        return 0;
 324
 325fail:
 326        if (amd_uncore_nb)
 327                *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
 328        kfree(uncore_nb);
 329        return -ENOMEM;
 330}
 331
 332static struct amd_uncore *
 333amd_uncore_find_online_sibling(struct amd_uncore *this,
 334                               struct amd_uncore * __percpu *uncores)
 335{
 336        unsigned int cpu;
 337        struct amd_uncore *that;
 338
 339        for_each_online_cpu(cpu) {
 340                that = *per_cpu_ptr(uncores, cpu);
 341
 342                if (!that)
 343                        continue;
 344
 345                if (this == that)
 346                        continue;
 347
 348                if (this->id == that->id) {
 349                        that->free_when_cpu_online = this;
 350                        this = that;
 351                        break;
 352                }
 353        }
 354
 355        this->refcnt++;
 356        return this;
 357}
 358
 359static void amd_uncore_cpu_starting(unsigned int cpu)
 360{
 361        unsigned int eax, ebx, ecx, edx;
 362        struct amd_uncore *uncore;
 363
 364        if (amd_uncore_nb) {
 365                uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
 366                cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
 367                uncore->id = ecx & 0xff;
 368
 369                uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
 370                *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
 371        }
 372
 373        if (amd_uncore_l2) {
 374                unsigned int apicid = cpu_data(cpu).apicid;
 375                unsigned int nshared;
 376
 377                uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
 378                cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
 379                nshared = ((eax >> 14) & 0xfff) + 1;
 380                uncore->id = apicid - (apicid % nshared);
 381
 382                uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
 383                *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
 384        }
 385}
 386
 387static void uncore_online(unsigned int cpu,
 388                          struct amd_uncore * __percpu *uncores)
 389{
 390        struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
 391
 392        kfree(uncore->free_when_cpu_online);
 393        uncore->free_when_cpu_online = NULL;
 394
 395        if (cpu == uncore->cpu)
 396                cpumask_set_cpu(cpu, uncore->active_mask);
 397}
 398
 399static void amd_uncore_cpu_online(unsigned int cpu)
 400{
 401        if (amd_uncore_nb)
 402                uncore_online(cpu, amd_uncore_nb);
 403
 404        if (amd_uncore_l2)
 405                uncore_online(cpu, amd_uncore_l2);
 406}
 407
 408static void uncore_down_prepare(unsigned int cpu,
 409                                struct amd_uncore * __percpu *uncores)
 410{
 411        unsigned int i;
 412        struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
 413
 414        if (this->cpu != cpu)
 415                return;
 416
 417        /* this cpu is going down, migrate to a shared sibling if possible */
 418        for_each_online_cpu(i) {
 419                struct amd_uncore *that = *per_cpu_ptr(uncores, i);
 420
 421                if (cpu == i)
 422                        continue;
 423
 424                if (this == that) {
 425                        perf_pmu_migrate_context(this->pmu, cpu, i);
 426                        cpumask_clear_cpu(cpu, that->active_mask);
 427                        cpumask_set_cpu(i, that->active_mask);
 428                        that->cpu = i;
 429                        break;
 430                }
 431        }
 432}
 433
 434static void amd_uncore_cpu_down_prepare(unsigned int cpu)
 435{
 436        if (amd_uncore_nb)
 437                uncore_down_prepare(cpu, amd_uncore_nb);
 438
 439        if (amd_uncore_l2)
 440                uncore_down_prepare(cpu, amd_uncore_l2);
 441}
 442
 443static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
 444{
 445        struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
 446
 447        if (cpu == uncore->cpu)
 448                cpumask_clear_cpu(cpu, uncore->active_mask);
 449
 450        if (!--uncore->refcnt)
 451                kfree(uncore);
 452        *per_cpu_ptr(uncores, cpu) = NULL;
 453}
 454
 455static void amd_uncore_cpu_dead(unsigned int cpu)
 456{
 457        if (amd_uncore_nb)
 458                uncore_dead(cpu, amd_uncore_nb);
 459
 460        if (amd_uncore_l2)
 461                uncore_dead(cpu, amd_uncore_l2);
 462}
 463
 464static int
 465amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
 466                        void *hcpu)
 467{
 468        unsigned int cpu = (long)hcpu;
 469
 470        switch (action & ~CPU_TASKS_FROZEN) {
 471        case CPU_UP_PREPARE:
 472                if (amd_uncore_cpu_up_prepare(cpu))
 473                        return notifier_from_errno(-ENOMEM);
 474                break;
 475
 476        case CPU_STARTING:
 477                amd_uncore_cpu_starting(cpu);
 478                break;
 479
 480        case CPU_ONLINE:
 481                amd_uncore_cpu_online(cpu);
 482                break;
 483
 484        case CPU_DOWN_PREPARE:
 485                amd_uncore_cpu_down_prepare(cpu);
 486                break;
 487
 488        case CPU_UP_CANCELED:
 489        case CPU_DEAD:
 490                amd_uncore_cpu_dead(cpu);
 491                break;
 492
 493        default:
 494                break;
 495        }
 496
 497        return NOTIFY_OK;
 498}
 499
 500static struct notifier_block amd_uncore_cpu_notifier_block = {
 501        .notifier_call  = amd_uncore_cpu_notifier,
 502        .priority       = CPU_PRI_PERF + 1,
 503};
 504
 505static void __init init_cpu_already_online(void *dummy)
 506{
 507        unsigned int cpu = smp_processor_id();
 508
 509        amd_uncore_cpu_starting(cpu);
 510        amd_uncore_cpu_online(cpu);
 511}
 512
 513static void cleanup_cpu_online(void *dummy)
 514{
 515        unsigned int cpu = smp_processor_id();
 516
 517        amd_uncore_cpu_dead(cpu);
 518}
 519
 520static int __init amd_uncore_init(void)
 521{
 522        unsigned int cpu, cpu2;
 523        int ret = -ENODEV;
 524
 525        if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
 526                goto fail_nodev;
 527
 528        if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
 529                goto fail_nodev;
 530
 531        if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
 532                amd_uncore_nb = alloc_percpu(struct amd_uncore *);
 533                if (!amd_uncore_nb) {
 534                        ret = -ENOMEM;
 535                        goto fail_nb;
 536                }
 537                ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
 538                if (ret)
 539                        goto fail_nb;
 540
 541                pr_info("perf: AMD NB counters detected\n");
 542                ret = 0;
 543        }
 544
 545        if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
 546                amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
 547                if (!amd_uncore_l2) {
 548                        ret = -ENOMEM;
 549                        goto fail_l2;
 550                }
 551                ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
 552                if (ret)
 553                        goto fail_l2;
 554
 555                pr_info("perf: AMD L2I counters detected\n");
 556                ret = 0;
 557        }
 558
 559        if (ret)
 560                goto fail_nodev;
 561
 562        cpu_notifier_register_begin();
 563
 564        /* init cpus already online before registering for hotplug notifier */
 565        for_each_online_cpu(cpu) {
 566                ret = amd_uncore_cpu_up_prepare(cpu);
 567                if (ret)
 568                        goto fail_online;
 569                smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
 570        }
 571
 572        __register_cpu_notifier(&amd_uncore_cpu_notifier_block);
 573        cpu_notifier_register_done();
 574
 575        return 0;
 576
 577
 578fail_online:
 579        for_each_online_cpu(cpu2) {
 580                if (cpu2 == cpu)
 581                        break;
 582                smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1);
 583        }
 584        cpu_notifier_register_done();
 585
 586        /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */
 587        amd_uncore_nb = amd_uncore_l2 = NULL;
 588
 589        if (boot_cpu_has(X86_FEATURE_PERFCTR_L2))
 590                perf_pmu_unregister(&amd_l2_pmu);
 591fail_l2:
 592        if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
 593                perf_pmu_unregister(&amd_nb_pmu);
 594        if (amd_uncore_l2)
 595                free_percpu(amd_uncore_l2);
 596fail_nb:
 597        if (amd_uncore_nb)
 598                free_percpu(amd_uncore_nb);
 599
 600fail_nodev:
 601        return ret;
 602}
 603device_initcall(amd_uncore_init);
 604