linux/arch/x86/events/amd/uncore.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2013 Advanced Micro Devices, Inc.
   3 *
   4 * Author: Jacob Shin <jacob.shin@amd.com>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10
  11#include <linux/perf_event.h>
  12#include <linux/percpu.h>
  13#include <linux/types.h>
  14#include <linux/slab.h>
  15#include <linux/init.h>
  16#include <linux/cpu.h>
  17#include <linux/cpumask.h>
  18
  19#include <asm/cpufeature.h>
  20#include <asm/perf_event.h>
  21#include <asm/msr.h>
  22
  23#define NUM_COUNTERS_NB         4
  24#define NUM_COUNTERS_L2         4
  25#define MAX_COUNTERS            NUM_COUNTERS_NB
  26
  27#define RDPMC_BASE_NB           6
  28#define RDPMC_BASE_L2           10
  29
  30#define COUNTER_SHIFT           16
  31
  32struct amd_uncore {
  33        int id;
  34        int refcnt;
  35        int cpu;
  36        int num_counters;
  37        int rdpmc_base;
  38        u32 msr_base;
  39        cpumask_t *active_mask;
  40        struct pmu *pmu;
  41        struct perf_event *events[MAX_COUNTERS];
  42        struct amd_uncore *free_when_cpu_online;
  43};
  44
  45static struct amd_uncore * __percpu *amd_uncore_nb;
  46static struct amd_uncore * __percpu *amd_uncore_l2;
  47
  48static struct pmu amd_nb_pmu;
  49static struct pmu amd_l2_pmu;
  50
  51static cpumask_t amd_nb_active_mask;
  52static cpumask_t amd_l2_active_mask;
  53
  54static bool is_nb_event(struct perf_event *event)
  55{
  56        return event->pmu->type == amd_nb_pmu.type;
  57}
  58
  59static bool is_l2_event(struct perf_event *event)
  60{
  61        return event->pmu->type == amd_l2_pmu.type;
  62}
  63
  64static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
  65{
  66        if (is_nb_event(event) && amd_uncore_nb)
  67                return *per_cpu_ptr(amd_uncore_nb, event->cpu);
  68        else if (is_l2_event(event) && amd_uncore_l2)
  69                return *per_cpu_ptr(amd_uncore_l2, event->cpu);
  70
  71        return NULL;
  72}
  73
  74static void amd_uncore_read(struct perf_event *event)
  75{
  76        struct hw_perf_event *hwc = &event->hw;
  77        u64 prev, new;
  78        s64 delta;
  79
  80        /*
  81         * since we do not enable counter overflow interrupts,
  82         * we do not have to worry about prev_count changing on us
  83         */
  84
  85        prev = local64_read(&hwc->prev_count);
  86        rdpmcl(hwc->event_base_rdpmc, new);
  87        local64_set(&hwc->prev_count, new);
  88        delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
  89        delta >>= COUNTER_SHIFT;
  90        local64_add(delta, &event->count);
  91}
  92
  93static void amd_uncore_start(struct perf_event *event, int flags)
  94{
  95        struct hw_perf_event *hwc = &event->hw;
  96
  97        if (flags & PERF_EF_RELOAD)
  98                wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
  99
 100        hwc->state = 0;
 101        wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
 102        perf_event_update_userpage(event);
 103}
 104
 105static void amd_uncore_stop(struct perf_event *event, int flags)
 106{
 107        struct hw_perf_event *hwc = &event->hw;
 108
 109        wrmsrl(hwc->config_base, hwc->config);
 110        hwc->state |= PERF_HES_STOPPED;
 111
 112        if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
 113                amd_uncore_read(event);
 114                hwc->state |= PERF_HES_UPTODATE;
 115        }
 116}
 117
 118static int amd_uncore_add(struct perf_event *event, int flags)
 119{
 120        int i;
 121        struct amd_uncore *uncore = event_to_amd_uncore(event);
 122        struct hw_perf_event *hwc = &event->hw;
 123
 124        /* are we already assigned? */
 125        if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
 126                goto out;
 127
 128        for (i = 0; i < uncore->num_counters; i++) {
 129                if (uncore->events[i] == event) {
 130                        hwc->idx = i;
 131                        goto out;
 132                }
 133        }
 134
 135        /* if not, take the first available counter */
 136        hwc->idx = -1;
 137        for (i = 0; i < uncore->num_counters; i++) {
 138                if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
 139                        hwc->idx = i;
 140                        break;
 141                }
 142        }
 143
 144out:
 145        if (hwc->idx == -1)
 146                return -EBUSY;
 147
 148        hwc->config_base = uncore->msr_base + (2 * hwc->idx);
 149        hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
 150        hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
 151        hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 152
 153        if (flags & PERF_EF_START)
 154                amd_uncore_start(event, PERF_EF_RELOAD);
 155
 156        return 0;
 157}
 158
 159static void amd_uncore_del(struct perf_event *event, int flags)
 160{
 161        int i;
 162        struct amd_uncore *uncore = event_to_amd_uncore(event);
 163        struct hw_perf_event *hwc = &event->hw;
 164
 165        amd_uncore_stop(event, PERF_EF_UPDATE);
 166
 167        for (i = 0; i < uncore->num_counters; i++) {
 168                if (cmpxchg(&uncore->events[i], event, NULL) == event)
 169                        break;
 170        }
 171
 172        hwc->idx = -1;
 173}
 174
 175static int amd_uncore_event_init(struct perf_event *event)
 176{
 177        struct amd_uncore *uncore;
 178        struct hw_perf_event *hwc = &event->hw;
 179
 180        if (event->attr.type != event->pmu->type)
 181                return -ENOENT;
 182
 183        /*
 184         * NB and L2 counters (MSRs) are shared across all cores that share the
 185         * same NB / L2 cache. Interrupts can be directed to a single target
 186         * core, however, event counts generated by processes running on other
 187         * cores cannot be masked out. So we do not support sampling and
 188         * per-thread events.
 189         */
 190        if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
 191                return -EINVAL;
 192
 193        /* NB and L2 counters do not have usr/os/guest/host bits */
 194        if (event->attr.exclude_user || event->attr.exclude_kernel ||
 195            event->attr.exclude_host || event->attr.exclude_guest)
 196                return -EINVAL;
 197
 198        /* and we do not enable counter overflow interrupts */
 199        hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
 200        hwc->idx = -1;
 201
 202        if (event->cpu < 0)
 203                return -EINVAL;
 204
 205        uncore = event_to_amd_uncore(event);
 206        if (!uncore)
 207                return -ENODEV;
 208
 209        /*
 210         * since request can come in to any of the shared cores, we will remap
 211         * to a single common cpu.
 212         */
 213        event->cpu = uncore->cpu;
 214
 215        return 0;
 216}
 217
 218static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
 219                                            struct device_attribute *attr,
 220                                            char *buf)
 221{
 222        cpumask_t *active_mask;
 223        struct pmu *pmu = dev_get_drvdata(dev);
 224
 225        if (pmu->type == amd_nb_pmu.type)
 226                active_mask = &amd_nb_active_mask;
 227        else if (pmu->type == amd_l2_pmu.type)
 228                active_mask = &amd_l2_active_mask;
 229        else
 230                return 0;
 231
 232        return cpumap_print_to_pagebuf(true, buf, active_mask);
 233}
 234static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
 235
 236static struct attribute *amd_uncore_attrs[] = {
 237        &dev_attr_cpumask.attr,
 238        NULL,
 239};
 240
 241static struct attribute_group amd_uncore_attr_group = {
 242        .attrs = amd_uncore_attrs,
 243};
 244
 245PMU_FORMAT_ATTR(event, "config:0-7,32-35");
 246PMU_FORMAT_ATTR(umask, "config:8-15");
 247
 248static struct attribute *amd_uncore_format_attr[] = {
 249        &format_attr_event.attr,
 250        &format_attr_umask.attr,
 251        NULL,
 252};
 253
 254static struct attribute_group amd_uncore_format_group = {
 255        .name = "format",
 256        .attrs = amd_uncore_format_attr,
 257};
 258
 259static const struct attribute_group *amd_uncore_attr_groups[] = {
 260        &amd_uncore_attr_group,
 261        &amd_uncore_format_group,
 262        NULL,
 263};
 264
 265static struct pmu amd_nb_pmu = {
 266        .task_ctx_nr    = perf_invalid_context,
 267        .attr_groups    = amd_uncore_attr_groups,
 268        .name           = "amd_nb",
 269        .event_init     = amd_uncore_event_init,
 270        .add            = amd_uncore_add,
 271        .del            = amd_uncore_del,
 272        .start          = amd_uncore_start,
 273        .stop           = amd_uncore_stop,
 274        .read           = amd_uncore_read,
 275};
 276
 277static struct pmu amd_l2_pmu = {
 278        .task_ctx_nr    = perf_invalid_context,
 279        .attr_groups    = amd_uncore_attr_groups,
 280        .name           = "amd_l2",
 281        .event_init     = amd_uncore_event_init,
 282        .add            = amd_uncore_add,
 283        .del            = amd_uncore_del,
 284        .start          = amd_uncore_start,
 285        .stop           = amd_uncore_stop,
 286        .read           = amd_uncore_read,
 287};
 288
 289static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
 290{
 291        return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
 292                        cpu_to_node(cpu));
 293}
 294
 295static int amd_uncore_cpu_up_prepare(unsigned int cpu)
 296{
 297        struct amd_uncore *uncore_nb = NULL, *uncore_l2;
 298
 299        if (amd_uncore_nb) {
 300                uncore_nb = amd_uncore_alloc(cpu);
 301                if (!uncore_nb)
 302                        goto fail;
 303                uncore_nb->cpu = cpu;
 304                uncore_nb->num_counters = NUM_COUNTERS_NB;
 305                uncore_nb->rdpmc_base = RDPMC_BASE_NB;
 306                uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
 307                uncore_nb->active_mask = &amd_nb_active_mask;
 308                uncore_nb->pmu = &amd_nb_pmu;
 309                *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
 310        }
 311
 312        if (amd_uncore_l2) {
 313                uncore_l2 = amd_uncore_alloc(cpu);
 314                if (!uncore_l2)
 315                        goto fail;
 316                uncore_l2->cpu = cpu;
 317                uncore_l2->num_counters = NUM_COUNTERS_L2;
 318                uncore_l2->rdpmc_base = RDPMC_BASE_L2;
 319                uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
 320                uncore_l2->active_mask = &amd_l2_active_mask;
 321                uncore_l2->pmu = &amd_l2_pmu;
 322                *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
 323        }
 324
 325        return 0;
 326
 327fail:
 328        if (amd_uncore_nb)
 329                *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
 330        kfree(uncore_nb);
 331        return -ENOMEM;
 332}
 333
 334static struct amd_uncore *
 335amd_uncore_find_online_sibling(struct amd_uncore *this,
 336                               struct amd_uncore * __percpu *uncores)
 337{
 338        unsigned int cpu;
 339        struct amd_uncore *that;
 340
 341        for_each_online_cpu(cpu) {
 342                that = *per_cpu_ptr(uncores, cpu);
 343
 344                if (!that)
 345                        continue;
 346
 347                if (this == that)
 348                        continue;
 349
 350                if (this->id == that->id) {
 351                        that->free_when_cpu_online = this;
 352                        this = that;
 353                        break;
 354                }
 355        }
 356
 357        this->refcnt++;
 358        return this;
 359}
 360
 361static void amd_uncore_cpu_starting(unsigned int cpu)
 362{
 363        unsigned int eax, ebx, ecx, edx;
 364        struct amd_uncore *uncore;
 365
 366        if (amd_uncore_nb) {
 367                uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
 368                cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
 369                uncore->id = ecx & 0xff;
 370
 371                uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
 372                *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
 373        }
 374
 375        if (amd_uncore_l2) {
 376                unsigned int apicid = cpu_data(cpu).apicid;
 377                unsigned int nshared;
 378
 379                uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
 380                cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
 381                nshared = ((eax >> 14) & 0xfff) + 1;
 382                uncore->id = apicid - (apicid % nshared);
 383
 384                uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
 385                *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
 386        }
 387}
 388
 389static void uncore_online(unsigned int cpu,
 390                          struct amd_uncore * __percpu *uncores)
 391{
 392        struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
 393
 394        kfree(uncore->free_when_cpu_online);
 395        uncore->free_when_cpu_online = NULL;
 396
 397        if (cpu == uncore->cpu)
 398                cpumask_set_cpu(cpu, uncore->active_mask);
 399}
 400
 401static void amd_uncore_cpu_online(unsigned int cpu)
 402{
 403        if (amd_uncore_nb)
 404                uncore_online(cpu, amd_uncore_nb);
 405
 406        if (amd_uncore_l2)
 407                uncore_online(cpu, amd_uncore_l2);
 408}
 409
 410static void uncore_down_prepare(unsigned int cpu,
 411                                struct amd_uncore * __percpu *uncores)
 412{
 413        unsigned int i;
 414        struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
 415
 416        if (this->cpu != cpu)
 417                return;
 418
 419        /* this cpu is going down, migrate to a shared sibling if possible */
 420        for_each_online_cpu(i) {
 421                struct amd_uncore *that = *per_cpu_ptr(uncores, i);
 422
 423                if (cpu == i)
 424                        continue;
 425
 426                if (this == that) {
 427                        perf_pmu_migrate_context(this->pmu, cpu, i);
 428                        cpumask_clear_cpu(cpu, that->active_mask);
 429                        cpumask_set_cpu(i, that->active_mask);
 430                        that->cpu = i;
 431                        break;
 432                }
 433        }
 434}
 435
 436static void amd_uncore_cpu_down_prepare(unsigned int cpu)
 437{
 438        if (amd_uncore_nb)
 439                uncore_down_prepare(cpu, amd_uncore_nb);
 440
 441        if (amd_uncore_l2)
 442                uncore_down_prepare(cpu, amd_uncore_l2);
 443}
 444
 445static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
 446{
 447        struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
 448
 449        if (cpu == uncore->cpu)
 450                cpumask_clear_cpu(cpu, uncore->active_mask);
 451
 452        if (!--uncore->refcnt)
 453                kfree(uncore);
 454        *per_cpu_ptr(uncores, cpu) = NULL;
 455}
 456
 457static void amd_uncore_cpu_dead(unsigned int cpu)
 458{
 459        if (amd_uncore_nb)
 460                uncore_dead(cpu, amd_uncore_nb);
 461
 462        if (amd_uncore_l2)
 463                uncore_dead(cpu, amd_uncore_l2);
 464}
 465
 466static int
 467amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
 468                        void *hcpu)
 469{
 470        unsigned int cpu = (long)hcpu;
 471
 472        switch (action & ~CPU_TASKS_FROZEN) {
 473        case CPU_UP_PREPARE:
 474                if (amd_uncore_cpu_up_prepare(cpu))
 475                        return notifier_from_errno(-ENOMEM);
 476                break;
 477
 478        case CPU_STARTING:
 479                amd_uncore_cpu_starting(cpu);
 480                break;
 481
 482        case CPU_ONLINE:
 483                amd_uncore_cpu_online(cpu);
 484                break;
 485
 486        case CPU_DOWN_PREPARE:
 487                amd_uncore_cpu_down_prepare(cpu);
 488                break;
 489
 490        case CPU_UP_CANCELED:
 491        case CPU_DEAD:
 492                amd_uncore_cpu_dead(cpu);
 493                break;
 494
 495        default:
 496                break;
 497        }
 498
 499        return NOTIFY_OK;
 500}
 501
 502static struct notifier_block amd_uncore_cpu_notifier_block = {
 503        .notifier_call  = amd_uncore_cpu_notifier,
 504        .priority       = CPU_PRI_PERF + 1,
 505};
 506
 507static void __init init_cpu_already_online(void *dummy)
 508{
 509        unsigned int cpu = smp_processor_id();
 510
 511        amd_uncore_cpu_starting(cpu);
 512        amd_uncore_cpu_online(cpu);
 513}
 514
 515static void cleanup_cpu_online(void *dummy)
 516{
 517        unsigned int cpu = smp_processor_id();
 518
 519        amd_uncore_cpu_dead(cpu);
 520}
 521
 522static int __init amd_uncore_init(void)
 523{
 524        unsigned int cpu, cpu2;
 525        int ret = -ENODEV;
 526
 527        if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
 528                goto fail_nodev;
 529
 530        if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
 531                goto fail_nodev;
 532
 533        if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
 534                amd_uncore_nb = alloc_percpu(struct amd_uncore *);
 535                if (!amd_uncore_nb) {
 536                        ret = -ENOMEM;
 537                        goto fail_nb;
 538                }
 539                ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
 540                if (ret)
 541                        goto fail_nb;
 542
 543                pr_info("perf: AMD NB counters detected\n");
 544                ret = 0;
 545        }
 546
 547        if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
 548                amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
 549                if (!amd_uncore_l2) {
 550                        ret = -ENOMEM;
 551                        goto fail_l2;
 552                }
 553                ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
 554                if (ret)
 555                        goto fail_l2;
 556
 557                pr_info("perf: AMD L2I counters detected\n");
 558                ret = 0;
 559        }
 560
 561        if (ret)
 562                goto fail_nodev;
 563
 564        cpu_notifier_register_begin();
 565
 566        /* init cpus already online before registering for hotplug notifier */
 567        for_each_online_cpu(cpu) {
 568                ret = amd_uncore_cpu_up_prepare(cpu);
 569                if (ret)
 570                        goto fail_online;
 571                smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
 572        }
 573
 574        __register_cpu_notifier(&amd_uncore_cpu_notifier_block);
 575        cpu_notifier_register_done();
 576
 577        return 0;
 578
 579
 580fail_online:
 581        for_each_online_cpu(cpu2) {
 582                if (cpu2 == cpu)
 583                        break;
 584                smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1);
 585        }
 586        cpu_notifier_register_done();
 587
 588        /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */
 589        amd_uncore_nb = amd_uncore_l2 = NULL;
 590
 591        if (boot_cpu_has(X86_FEATURE_PERFCTR_L2))
 592                perf_pmu_unregister(&amd_l2_pmu);
 593fail_l2:
 594        if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
 595                perf_pmu_unregister(&amd_nb_pmu);
 596        if (amd_uncore_l2)
 597                free_percpu(amd_uncore_l2);
 598fail_nb:
 599        if (amd_uncore_nb)
 600                free_percpu(amd_uncore_nb);
 601
 602fail_nodev:
 603        return ret;
 604}
 605device_initcall(amd_uncore_init);
 606