linux/arch/x86/kernel/cpu/perf_event_amd_uncore.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2013 Advanced Micro Devices, Inc.
   3 *
   4 * Author: Jacob Shin <jacob.shin@amd.com>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10
  11#include <linux/perf_event.h>
  12#include <linux/percpu.h>
  13#include <linux/types.h>
  14#include <linux/slab.h>
  15#include <linux/init.h>
  16#include <linux/cpu.h>
  17#include <linux/cpumask.h>
  18
  19#include <asm/cpufeature.h>
  20#include <asm/perf_event.h>
  21#include <asm/msr.h>
  22
  23#define NUM_COUNTERS_NB         4
  24#define NUM_COUNTERS_L2         4
  25#define MAX_COUNTERS            NUM_COUNTERS_NB
  26
  27#define RDPMC_BASE_NB           6
  28#define RDPMC_BASE_L2           10
  29
  30#define COUNTER_SHIFT           16
  31
  32struct amd_uncore {
  33        int id;
  34        int refcnt;
  35        int cpu;
  36        int num_counters;
  37        int rdpmc_base;
  38        u32 msr_base;
  39        cpumask_t *active_mask;
  40        struct pmu *pmu;
  41        struct perf_event *events[MAX_COUNTERS];
  42        struct amd_uncore *free_when_cpu_online;
  43};
  44
  45static struct amd_uncore * __percpu *amd_uncore_nb;
  46static struct amd_uncore * __percpu *amd_uncore_l2;
  47
  48static struct pmu amd_nb_pmu;
  49static struct pmu amd_l2_pmu;
  50
  51static cpumask_t amd_nb_active_mask;
  52static cpumask_t amd_l2_active_mask;
  53
  54static bool is_nb_event(struct perf_event *event)
  55{
  56        return event->pmu->type == amd_nb_pmu.type;
  57}
  58
  59static bool is_l2_event(struct perf_event *event)
  60{
  61        return event->pmu->type == amd_l2_pmu.type;
  62}
  63
  64static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
  65{
  66        if (is_nb_event(event) && amd_uncore_nb)
  67                return *per_cpu_ptr(amd_uncore_nb, event->cpu);
  68        else if (is_l2_event(event) && amd_uncore_l2)
  69                return *per_cpu_ptr(amd_uncore_l2, event->cpu);
  70
  71        return NULL;
  72}
  73
  74static void amd_uncore_read(struct perf_event *event)
  75{
  76        struct hw_perf_event *hwc = &event->hw;
  77        u64 prev, new;
  78        s64 delta;
  79
  80        /*
  81         * since we do not enable counter overflow interrupts,
  82         * we do not have to worry about prev_count changing on us
  83         */
  84
  85        prev = local64_read(&hwc->prev_count);
  86        rdpmcl(hwc->event_base_rdpmc, new);
  87        local64_set(&hwc->prev_count, new);
  88        delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
  89        delta >>= COUNTER_SHIFT;
  90        local64_add(delta, &event->count);
  91}
  92
  93static void amd_uncore_start(struct perf_event *event, int flags)
  94{
  95        struct hw_perf_event *hwc = &event->hw;
  96
  97        if (flags & PERF_EF_RELOAD)
  98                wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
  99
 100        hwc->state = 0;
 101        wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
 102        perf_event_update_userpage(event);
 103}
 104
 105static void amd_uncore_stop(struct perf_event *event, int flags)
 106{
 107        struct hw_perf_event *hwc = &event->hw;
 108
 109        wrmsrl(hwc->config_base, hwc->config);
 110        hwc->state |= PERF_HES_STOPPED;
 111
 112        if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
 113                amd_uncore_read(event);
 114                hwc->state |= PERF_HES_UPTODATE;
 115        }
 116}
 117
 118static int amd_uncore_add(struct perf_event *event, int flags)
 119{
 120        int i;
 121        struct amd_uncore *uncore = event_to_amd_uncore(event);
 122        struct hw_perf_event *hwc = &event->hw;
 123
 124        /* are we already assigned? */
 125        if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
 126                goto out;
 127
 128        for (i = 0; i < uncore->num_counters; i++) {
 129                if (uncore->events[i] == event) {
 130                        hwc->idx = i;
 131                        goto out;
 132                }
 133        }
 134
 135        /* if not, take the first available counter */
 136        hwc->idx = -1;
 137        for (i = 0; i < uncore->num_counters; i++) {
 138                if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
 139                        hwc->idx = i;
 140                        break;
 141                }
 142        }
 143
 144out:
 145        if (hwc->idx == -1)
 146                return -EBUSY;
 147
 148        hwc->config_base = uncore->msr_base + (2 * hwc->idx);
 149        hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
 150        hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
 151        hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 152
 153        if (flags & PERF_EF_START)
 154                amd_uncore_start(event, PERF_EF_RELOAD);
 155
 156        return 0;
 157}
 158
 159static void amd_uncore_del(struct perf_event *event, int flags)
 160{
 161        int i;
 162        struct amd_uncore *uncore = event_to_amd_uncore(event);
 163        struct hw_perf_event *hwc = &event->hw;
 164
 165        amd_uncore_stop(event, PERF_EF_UPDATE);
 166
 167        for (i = 0; i < uncore->num_counters; i++) {
 168                if (cmpxchg(&uncore->events[i], event, NULL) == event)
 169                        break;
 170        }
 171
 172        hwc->idx = -1;
 173}
 174
 175static int amd_uncore_event_init(struct perf_event *event)
 176{
 177        struct amd_uncore *uncore;
 178        struct hw_perf_event *hwc = &event->hw;
 179
 180        if (event->attr.type != event->pmu->type)
 181                return -ENOENT;
 182
 183        /*
 184         * NB and L2 counters (MSRs) are shared across all cores that share the
 185         * same NB / L2 cache. Interrupts can be directed to a single target
 186         * core, however, event counts generated by processes running on other
 187         * cores cannot be masked out. So we do not support sampling and
 188         * per-thread events.
 189         */
 190        if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
 191                return -EINVAL;
 192
 193        /* NB and L2 counters do not have usr/os/guest/host bits */
 194        if (event->attr.exclude_user || event->attr.exclude_kernel ||
 195            event->attr.exclude_host || event->attr.exclude_guest)
 196                return -EINVAL;
 197
 198        /* and we do not enable counter overflow interrupts */
 199        hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
 200        hwc->idx = -1;
 201
 202        if (event->cpu < 0)
 203                return -EINVAL;
 204
 205        uncore = event_to_amd_uncore(event);
 206        if (!uncore)
 207                return -ENODEV;
 208
 209        /*
 210         * since request can come in to any of the shared cores, we will remap
 211         * to a single common cpu.
 212         */
 213        event->cpu = uncore->cpu;
 214
 215        return 0;
 216}
 217
 218static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
 219                                            struct device_attribute *attr,
 220                                            char *buf)
 221{
 222        cpumask_t *active_mask;
 223        struct pmu *pmu = dev_get_drvdata(dev);
 224
 225        if (pmu->type == amd_nb_pmu.type)
 226                active_mask = &amd_nb_active_mask;
 227        else if (pmu->type == amd_l2_pmu.type)
 228                active_mask = &amd_l2_active_mask;
 229        else
 230                return 0;
 231
 232        return cpumap_print_to_pagebuf(true, buf, active_mask);
 233}
 234static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
 235
 236static struct attribute *amd_uncore_attrs[] = {
 237        &dev_attr_cpumask.attr,
 238        NULL,
 239};
 240
 241static struct attribute_group amd_uncore_attr_group = {
 242        .attrs = amd_uncore_attrs,
 243};
 244
 245PMU_FORMAT_ATTR(event, "config:0-7,32-35");
 246PMU_FORMAT_ATTR(umask, "config:8-15");
 247
 248static struct attribute *amd_uncore_format_attr[] = {
 249        &format_attr_event.attr,
 250        &format_attr_umask.attr,
 251        NULL,
 252};
 253
 254static struct attribute_group amd_uncore_format_group = {
 255        .name = "format",
 256        .attrs = amd_uncore_format_attr,
 257};
 258
 259static const struct attribute_group *amd_uncore_attr_groups[] = {
 260        &amd_uncore_attr_group,
 261        &amd_uncore_format_group,
 262        NULL,
 263};
 264
 265static struct pmu amd_nb_pmu = {
 266        .attr_groups    = amd_uncore_attr_groups,
 267        .name           = "amd_nb",
 268        .event_init     = amd_uncore_event_init,
 269        .add            = amd_uncore_add,
 270        .del            = amd_uncore_del,
 271        .start          = amd_uncore_start,
 272        .stop           = amd_uncore_stop,
 273        .read           = amd_uncore_read,
 274};
 275
 276static struct pmu amd_l2_pmu = {
 277        .attr_groups    = amd_uncore_attr_groups,
 278        .name           = "amd_l2",
 279        .event_init     = amd_uncore_event_init,
 280        .add            = amd_uncore_add,
 281        .del            = amd_uncore_del,
 282        .start          = amd_uncore_start,
 283        .stop           = amd_uncore_stop,
 284        .read           = amd_uncore_read,
 285};
 286
 287static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
 288{
 289        return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
 290                        cpu_to_node(cpu));
 291}
 292
 293static int amd_uncore_cpu_up_prepare(unsigned int cpu)
 294{
 295        struct amd_uncore *uncore_nb = NULL, *uncore_l2;
 296
 297        if (amd_uncore_nb) {
 298                uncore_nb = amd_uncore_alloc(cpu);
 299                if (!uncore_nb)
 300                        goto fail;
 301                uncore_nb->cpu = cpu;
 302                uncore_nb->num_counters = NUM_COUNTERS_NB;
 303                uncore_nb->rdpmc_base = RDPMC_BASE_NB;
 304                uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
 305                uncore_nb->active_mask = &amd_nb_active_mask;
 306                uncore_nb->pmu = &amd_nb_pmu;
 307                *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
 308        }
 309
 310        if (amd_uncore_l2) {
 311                uncore_l2 = amd_uncore_alloc(cpu);
 312                if (!uncore_l2)
 313                        goto fail;
 314                uncore_l2->cpu = cpu;
 315                uncore_l2->num_counters = NUM_COUNTERS_L2;
 316                uncore_l2->rdpmc_base = RDPMC_BASE_L2;
 317                uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
 318                uncore_l2->active_mask = &amd_l2_active_mask;
 319                uncore_l2->pmu = &amd_l2_pmu;
 320                *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
 321        }
 322
 323        return 0;
 324
 325fail:
 326        kfree(uncore_nb);
 327        return -ENOMEM;
 328}
 329
 330static struct amd_uncore *
 331amd_uncore_find_online_sibling(struct amd_uncore *this,
 332                               struct amd_uncore * __percpu *uncores)
 333{
 334        unsigned int cpu;
 335        struct amd_uncore *that;
 336
 337        for_each_online_cpu(cpu) {
 338                that = *per_cpu_ptr(uncores, cpu);
 339
 340                if (!that)
 341                        continue;
 342
 343                if (this == that)
 344                        continue;
 345
 346                if (this->id == that->id) {
 347                        that->free_when_cpu_online = this;
 348                        this = that;
 349                        break;
 350                }
 351        }
 352
 353        this->refcnt++;
 354        return this;
 355}
 356
 357static void amd_uncore_cpu_starting(unsigned int cpu)
 358{
 359        unsigned int eax, ebx, ecx, edx;
 360        struct amd_uncore *uncore;
 361
 362        if (amd_uncore_nb) {
 363                uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
 364                cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
 365                uncore->id = ecx & 0xff;
 366
 367                uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
 368                *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
 369        }
 370
 371        if (amd_uncore_l2) {
 372                unsigned int apicid = cpu_data(cpu).apicid;
 373                unsigned int nshared;
 374
 375                uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
 376                cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
 377                nshared = ((eax >> 14) & 0xfff) + 1;
 378                uncore->id = apicid - (apicid % nshared);
 379
 380                uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
 381                *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
 382        }
 383}
 384
 385static void uncore_online(unsigned int cpu,
 386                          struct amd_uncore * __percpu *uncores)
 387{
 388        struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
 389
 390        kfree(uncore->free_when_cpu_online);
 391        uncore->free_when_cpu_online = NULL;
 392
 393        if (cpu == uncore->cpu)
 394                cpumask_set_cpu(cpu, uncore->active_mask);
 395}
 396
 397static void amd_uncore_cpu_online(unsigned int cpu)
 398{
 399        if (amd_uncore_nb)
 400                uncore_online(cpu, amd_uncore_nb);
 401
 402        if (amd_uncore_l2)
 403                uncore_online(cpu, amd_uncore_l2);
 404}
 405
 406static void uncore_down_prepare(unsigned int cpu,
 407                                struct amd_uncore * __percpu *uncores)
 408{
 409        unsigned int i;
 410        struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
 411
 412        if (this->cpu != cpu)
 413                return;
 414
 415        /* this cpu is going down, migrate to a shared sibling if possible */
 416        for_each_online_cpu(i) {
 417                struct amd_uncore *that = *per_cpu_ptr(uncores, i);
 418
 419                if (cpu == i)
 420                        continue;
 421
 422                if (this == that) {
 423                        perf_pmu_migrate_context(this->pmu, cpu, i);
 424                        cpumask_clear_cpu(cpu, that->active_mask);
 425                        cpumask_set_cpu(i, that->active_mask);
 426                        that->cpu = i;
 427                        break;
 428                }
 429        }
 430}
 431
 432static void amd_uncore_cpu_down_prepare(unsigned int cpu)
 433{
 434        if (amd_uncore_nb)
 435                uncore_down_prepare(cpu, amd_uncore_nb);
 436
 437        if (amd_uncore_l2)
 438                uncore_down_prepare(cpu, amd_uncore_l2);
 439}
 440
 441static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
 442{
 443        struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
 444
 445        if (cpu == uncore->cpu)
 446                cpumask_clear_cpu(cpu, uncore->active_mask);
 447
 448        if (!--uncore->refcnt)
 449                kfree(uncore);
 450        *per_cpu_ptr(uncores, cpu) = NULL;
 451}
 452
 453static void amd_uncore_cpu_dead(unsigned int cpu)
 454{
 455        if (amd_uncore_nb)
 456                uncore_dead(cpu, amd_uncore_nb);
 457
 458        if (amd_uncore_l2)
 459                uncore_dead(cpu, amd_uncore_l2);
 460}
 461
 462static int
 463amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
 464                        void *hcpu)
 465{
 466        unsigned int cpu = (long)hcpu;
 467
 468        switch (action & ~CPU_TASKS_FROZEN) {
 469        case CPU_UP_PREPARE:
 470                if (amd_uncore_cpu_up_prepare(cpu))
 471                        return notifier_from_errno(-ENOMEM);
 472                break;
 473
 474        case CPU_STARTING:
 475                amd_uncore_cpu_starting(cpu);
 476                break;
 477
 478        case CPU_ONLINE:
 479                amd_uncore_cpu_online(cpu);
 480                break;
 481
 482        case CPU_DOWN_PREPARE:
 483                amd_uncore_cpu_down_prepare(cpu);
 484                break;
 485
 486        case CPU_UP_CANCELED:
 487        case CPU_DEAD:
 488                amd_uncore_cpu_dead(cpu);
 489                break;
 490
 491        default:
 492                break;
 493        }
 494
 495        return NOTIFY_OK;
 496}
 497
 498static struct notifier_block amd_uncore_cpu_notifier_block = {
 499        .notifier_call  = amd_uncore_cpu_notifier,
 500        .priority       = CPU_PRI_PERF + 1,
 501};
 502
 503static void __init init_cpu_already_online(void *dummy)
 504{
 505        unsigned int cpu = smp_processor_id();
 506
 507        amd_uncore_cpu_starting(cpu);
 508        amd_uncore_cpu_online(cpu);
 509}
 510
 511static void cleanup_cpu_online(void *dummy)
 512{
 513        unsigned int cpu = smp_processor_id();
 514
 515        amd_uncore_cpu_dead(cpu);
 516}
 517
 518static int __init amd_uncore_init(void)
 519{
 520        unsigned int cpu, cpu2;
 521        int ret = -ENODEV;
 522
 523        if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
 524                goto fail_nodev;
 525
 526        if (!cpu_has_topoext)
 527                goto fail_nodev;
 528
 529        if (cpu_has_perfctr_nb) {
 530                amd_uncore_nb = alloc_percpu(struct amd_uncore *);
 531                if (!amd_uncore_nb) {
 532                        ret = -ENOMEM;
 533                        goto fail_nb;
 534                }
 535                ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
 536                if (ret)
 537                        goto fail_nb;
 538
 539                printk(KERN_INFO "perf: AMD NB counters detected\n");
 540                ret = 0;
 541        }
 542
 543        if (cpu_has_perfctr_l2) {
 544                amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
 545                if (!amd_uncore_l2) {
 546                        ret = -ENOMEM;
 547                        goto fail_l2;
 548                }
 549                ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
 550                if (ret)
 551                        goto fail_l2;
 552
 553                printk(KERN_INFO "perf: AMD L2I counters detected\n");
 554                ret = 0;
 555        }
 556
 557        if (ret)
 558                goto fail_nodev;
 559
 560        cpu_notifier_register_begin();
 561
 562        /* init cpus already online before registering for hotplug notifier */
 563        for_each_online_cpu(cpu) {
 564                ret = amd_uncore_cpu_up_prepare(cpu);
 565                if (ret)
 566                        goto fail_online;
 567                smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
 568        }
 569
 570        __register_cpu_notifier(&amd_uncore_cpu_notifier_block);
 571        cpu_notifier_register_done();
 572
 573        return 0;
 574
 575
 576fail_online:
 577        for_each_online_cpu(cpu2) {
 578                if (cpu2 == cpu)
 579                        break;
 580                smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1);
 581        }
 582        cpu_notifier_register_done();
 583
 584        /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */
 585        amd_uncore_nb = amd_uncore_l2 = NULL;
 586        if (cpu_has_perfctr_l2)
 587                perf_pmu_unregister(&amd_l2_pmu);
 588fail_l2:
 589        if (cpu_has_perfctr_nb)
 590                perf_pmu_unregister(&amd_nb_pmu);
 591        if (amd_uncore_l2)
 592                free_percpu(amd_uncore_l2);
 593fail_nb:
 594        if (amd_uncore_nb)
 595                free_percpu(amd_uncore_nb);
 596
 597fail_nodev:
 598        return ret;
 599}
 600device_initcall(amd_uncore_init);
 601