linux/arch/x86/kernel/cpu/perf_event_amd_uncore.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2013 Advanced Micro Devices, Inc.
   3 *
   4 * Author: Jacob Shin <jacob.shin@amd.com>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10
  11#include <linux/perf_event.h>
  12#include <linux/percpu.h>
  13#include <linux/types.h>
  14#include <linux/slab.h>
  15#include <linux/init.h>
  16#include <linux/cpu.h>
  17#include <linux/cpumask.h>
  18
  19#include <asm/cpufeature.h>
  20#include <asm/perf_event.h>
  21#include <asm/msr.h>
  22
  23#define NUM_COUNTERS_NB         4
  24#define NUM_COUNTERS_L2         4
  25#define MAX_COUNTERS            NUM_COUNTERS_NB
  26
  27#define RDPMC_BASE_NB           6
  28#define RDPMC_BASE_L2           10
  29
  30#define COUNTER_SHIFT           16
  31
  32struct amd_uncore {
  33        int id;
  34        int refcnt;
  35        int cpu;
  36        int num_counters;
  37        int rdpmc_base;
  38        u32 msr_base;
  39        cpumask_t *active_mask;
  40        struct pmu *pmu;
  41        struct perf_event *events[MAX_COUNTERS];
  42        struct amd_uncore *free_when_cpu_online;
  43};
  44
  45static struct amd_uncore * __percpu *amd_uncore_nb;
  46static struct amd_uncore * __percpu *amd_uncore_l2;
  47
  48static struct pmu amd_nb_pmu;
  49static struct pmu amd_l2_pmu;
  50
  51static cpumask_t amd_nb_active_mask;
  52static cpumask_t amd_l2_active_mask;
  53
  54static bool is_nb_event(struct perf_event *event)
  55{
  56        return event->pmu->type == amd_nb_pmu.type;
  57}
  58
  59static bool is_l2_event(struct perf_event *event)
  60{
  61        return event->pmu->type == amd_l2_pmu.type;
  62}
  63
  64static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
  65{
  66        if (is_nb_event(event) && amd_uncore_nb)
  67                return *per_cpu_ptr(amd_uncore_nb, event->cpu);
  68        else if (is_l2_event(event) && amd_uncore_l2)
  69                return *per_cpu_ptr(amd_uncore_l2, event->cpu);
  70
  71        return NULL;
  72}
  73
  74static void amd_uncore_read(struct perf_event *event)
  75{
  76        struct hw_perf_event *hwc = &event->hw;
  77        u64 prev, new;
  78        s64 delta;
  79
  80        /*
  81         * since we do not enable counter overflow interrupts,
  82         * we do not have to worry about prev_count changing on us
  83         */
  84
  85        prev = local64_read(&hwc->prev_count);
  86        rdpmcl(hwc->event_base_rdpmc, new);
  87        local64_set(&hwc->prev_count, new);
  88        delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
  89        delta >>= COUNTER_SHIFT;
  90        local64_add(delta, &event->count);
  91}
  92
  93static void amd_uncore_start(struct perf_event *event, int flags)
  94{
  95        struct hw_perf_event *hwc = &event->hw;
  96
  97        if (flags & PERF_EF_RELOAD)
  98                wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
  99
 100        hwc->state = 0;
 101        wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
 102        perf_event_update_userpage(event);
 103}
 104
 105static void amd_uncore_stop(struct perf_event *event, int flags)
 106{
 107        struct hw_perf_event *hwc = &event->hw;
 108
 109        wrmsrl(hwc->config_base, hwc->config);
 110        hwc->state |= PERF_HES_STOPPED;
 111
 112        if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
 113                amd_uncore_read(event);
 114                hwc->state |= PERF_HES_UPTODATE;
 115        }
 116}
 117
 118static int amd_uncore_add(struct perf_event *event, int flags)
 119{
 120        int i;
 121        struct amd_uncore *uncore = event_to_amd_uncore(event);
 122        struct hw_perf_event *hwc = &event->hw;
 123
 124        /* are we already assigned? */
 125        if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
 126                goto out;
 127
 128        for (i = 0; i < uncore->num_counters; i++) {
 129                if (uncore->events[i] == event) {
 130                        hwc->idx = i;
 131                        goto out;
 132                }
 133        }
 134
 135        /* if not, take the first available counter */
 136        hwc->idx = -1;
 137        for (i = 0; i < uncore->num_counters; i++) {
 138                if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
 139                        hwc->idx = i;
 140                        break;
 141                }
 142        }
 143
 144out:
 145        if (hwc->idx == -1)
 146                return -EBUSY;
 147
 148        hwc->config_base = uncore->msr_base + (2 * hwc->idx);
 149        hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
 150        hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
 151        hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 152
 153        if (flags & PERF_EF_START)
 154                amd_uncore_start(event, PERF_EF_RELOAD);
 155
 156        return 0;
 157}
 158
 159static void amd_uncore_del(struct perf_event *event, int flags)
 160{
 161        int i;
 162        struct amd_uncore *uncore = event_to_amd_uncore(event);
 163        struct hw_perf_event *hwc = &event->hw;
 164
 165        amd_uncore_stop(event, PERF_EF_UPDATE);
 166
 167        for (i = 0; i < uncore->num_counters; i++) {
 168                if (cmpxchg(&uncore->events[i], event, NULL) == event)
 169                        break;
 170        }
 171
 172        hwc->idx = -1;
 173}
 174
 175static int amd_uncore_event_init(struct perf_event *event)
 176{
 177        struct amd_uncore *uncore;
 178        struct hw_perf_event *hwc = &event->hw;
 179
 180        if (event->attr.type != event->pmu->type)
 181                return -ENOENT;
 182
 183        /*
 184         * NB and L2 counters (MSRs) are shared across all cores that share the
 185         * same NB / L2 cache. Interrupts can be directed to a single target
 186         * core, however, event counts generated by processes running on other
 187         * cores cannot be masked out. So we do not support sampling and
 188         * per-thread events.
 189         */
 190        if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
 191                return -EINVAL;
 192
 193        /* NB and L2 counters do not have usr/os/guest/host bits */
 194        if (event->attr.exclude_user || event->attr.exclude_kernel ||
 195            event->attr.exclude_host || event->attr.exclude_guest)
 196                return -EINVAL;
 197
 198        /* and we do not enable counter overflow interrupts */
 199        hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
 200        hwc->idx = -1;
 201
 202        if (event->cpu < 0)
 203                return -EINVAL;
 204
 205        uncore = event_to_amd_uncore(event);
 206        if (!uncore)
 207                return -ENODEV;
 208
 209        /*
 210         * since request can come in to any of the shared cores, we will remap
 211         * to a single common cpu.
 212         */
 213        event->cpu = uncore->cpu;
 214
 215        return 0;
 216}
 217
 218static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
 219                                            struct device_attribute *attr,
 220                                            char *buf)
 221{
 222        int n;
 223        cpumask_t *active_mask;
 224        struct pmu *pmu = dev_get_drvdata(dev);
 225
 226        if (pmu->type == amd_nb_pmu.type)
 227                active_mask = &amd_nb_active_mask;
 228        else if (pmu->type == amd_l2_pmu.type)
 229                active_mask = &amd_l2_active_mask;
 230        else
 231                return 0;
 232
 233        n = cpulist_scnprintf(buf, PAGE_SIZE - 2, active_mask);
 234        buf[n++] = '\n';
 235        buf[n] = '\0';
 236        return n;
 237}
 238static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
 239
 240static struct attribute *amd_uncore_attrs[] = {
 241        &dev_attr_cpumask.attr,
 242        NULL,
 243};
 244
 245static struct attribute_group amd_uncore_attr_group = {
 246        .attrs = amd_uncore_attrs,
 247};
 248
 249PMU_FORMAT_ATTR(event, "config:0-7,32-35");
 250PMU_FORMAT_ATTR(umask, "config:8-15");
 251
 252static struct attribute *amd_uncore_format_attr[] = {
 253        &format_attr_event.attr,
 254        &format_attr_umask.attr,
 255        NULL,
 256};
 257
 258static struct attribute_group amd_uncore_format_group = {
 259        .name = "format",
 260        .attrs = amd_uncore_format_attr,
 261};
 262
 263static const struct attribute_group *amd_uncore_attr_groups[] = {
 264        &amd_uncore_attr_group,
 265        &amd_uncore_format_group,
 266        NULL,
 267};
 268
 269static struct pmu amd_nb_pmu = {
 270        .attr_groups    = amd_uncore_attr_groups,
 271        .name           = "amd_nb",
 272        .event_init     = amd_uncore_event_init,
 273        .add            = amd_uncore_add,
 274        .del            = amd_uncore_del,
 275        .start          = amd_uncore_start,
 276        .stop           = amd_uncore_stop,
 277        .read           = amd_uncore_read,
 278};
 279
 280static struct pmu amd_l2_pmu = {
 281        .attr_groups    = amd_uncore_attr_groups,
 282        .name           = "amd_l2",
 283        .event_init     = amd_uncore_event_init,
 284        .add            = amd_uncore_add,
 285        .del            = amd_uncore_del,
 286        .start          = amd_uncore_start,
 287        .stop           = amd_uncore_stop,
 288        .read           = amd_uncore_read,
 289};
 290
 291static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
 292{
 293        return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
 294                        cpu_to_node(cpu));
 295}
 296
 297static void amd_uncore_cpu_up_prepare(unsigned int cpu)
 298{
 299        struct amd_uncore *uncore;
 300
 301        if (amd_uncore_nb) {
 302                uncore = amd_uncore_alloc(cpu);
 303                uncore->cpu = cpu;
 304                uncore->num_counters = NUM_COUNTERS_NB;
 305                uncore->rdpmc_base = RDPMC_BASE_NB;
 306                uncore->msr_base = MSR_F15H_NB_PERF_CTL;
 307                uncore->active_mask = &amd_nb_active_mask;
 308                uncore->pmu = &amd_nb_pmu;
 309                *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
 310        }
 311
 312        if (amd_uncore_l2) {
 313                uncore = amd_uncore_alloc(cpu);
 314                uncore->cpu = cpu;
 315                uncore->num_counters = NUM_COUNTERS_L2;
 316                uncore->rdpmc_base = RDPMC_BASE_L2;
 317                uncore->msr_base = MSR_F16H_L2I_PERF_CTL;
 318                uncore->active_mask = &amd_l2_active_mask;
 319                uncore->pmu = &amd_l2_pmu;
 320                *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
 321        }
 322}
 323
 324static struct amd_uncore *
 325amd_uncore_find_online_sibling(struct amd_uncore *this,
 326                               struct amd_uncore * __percpu *uncores)
 327{
 328        unsigned int cpu;
 329        struct amd_uncore *that;
 330
 331        for_each_online_cpu(cpu) {
 332                that = *per_cpu_ptr(uncores, cpu);
 333
 334                if (!that)
 335                        continue;
 336
 337                if (this == that)
 338                        continue;
 339
 340                if (this->id == that->id) {
 341                        that->free_when_cpu_online = this;
 342                        this = that;
 343                        break;
 344                }
 345        }
 346
 347        this->refcnt++;
 348        return this;
 349}
 350
 351static void amd_uncore_cpu_starting(unsigned int cpu)
 352{
 353        unsigned int eax, ebx, ecx, edx;
 354        struct amd_uncore *uncore;
 355
 356        if (amd_uncore_nb) {
 357                uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
 358                cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
 359                uncore->id = ecx & 0xff;
 360
 361                uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
 362                *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
 363        }
 364
 365        if (amd_uncore_l2) {
 366                unsigned int apicid = cpu_data(cpu).apicid;
 367                unsigned int nshared;
 368
 369                uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
 370                cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
 371                nshared = ((eax >> 14) & 0xfff) + 1;
 372                uncore->id = apicid - (apicid % nshared);
 373
 374                uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
 375                *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
 376        }
 377}
 378
 379static void uncore_online(unsigned int cpu,
 380                          struct amd_uncore * __percpu *uncores)
 381{
 382        struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
 383
 384        kfree(uncore->free_when_cpu_online);
 385        uncore->free_when_cpu_online = NULL;
 386
 387        if (cpu == uncore->cpu)
 388                cpumask_set_cpu(cpu, uncore->active_mask);
 389}
 390
 391static void amd_uncore_cpu_online(unsigned int cpu)
 392{
 393        if (amd_uncore_nb)
 394                uncore_online(cpu, amd_uncore_nb);
 395
 396        if (amd_uncore_l2)
 397                uncore_online(cpu, amd_uncore_l2);
 398}
 399
 400static void uncore_down_prepare(unsigned int cpu,
 401                                struct amd_uncore * __percpu *uncores)
 402{
 403        unsigned int i;
 404        struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
 405
 406        if (this->cpu != cpu)
 407                return;
 408
 409        /* this cpu is going down, migrate to a shared sibling if possible */
 410        for_each_online_cpu(i) {
 411                struct amd_uncore *that = *per_cpu_ptr(uncores, i);
 412
 413                if (cpu == i)
 414                        continue;
 415
 416                if (this == that) {
 417                        perf_pmu_migrate_context(this->pmu, cpu, i);
 418                        cpumask_clear_cpu(cpu, that->active_mask);
 419                        cpumask_set_cpu(i, that->active_mask);
 420                        that->cpu = i;
 421                        break;
 422                }
 423        }
 424}
 425
 426static void amd_uncore_cpu_down_prepare(unsigned int cpu)
 427{
 428        if (amd_uncore_nb)
 429                uncore_down_prepare(cpu, amd_uncore_nb);
 430
 431        if (amd_uncore_l2)
 432                uncore_down_prepare(cpu, amd_uncore_l2);
 433}
 434
 435static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
 436{
 437        struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
 438
 439        if (cpu == uncore->cpu)
 440                cpumask_clear_cpu(cpu, uncore->active_mask);
 441
 442        if (!--uncore->refcnt)
 443                kfree(uncore);
 444        *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
 445}
 446
 447static void amd_uncore_cpu_dead(unsigned int cpu)
 448{
 449        if (amd_uncore_nb)
 450                uncore_dead(cpu, amd_uncore_nb);
 451
 452        if (amd_uncore_l2)
 453                uncore_dead(cpu, amd_uncore_l2);
 454}
 455
 456static int
 457amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
 458                        void *hcpu)
 459{
 460        unsigned int cpu = (long)hcpu;
 461
 462        switch (action & ~CPU_TASKS_FROZEN) {
 463        case CPU_UP_PREPARE:
 464                amd_uncore_cpu_up_prepare(cpu);
 465                break;
 466
 467        case CPU_STARTING:
 468                amd_uncore_cpu_starting(cpu);
 469                break;
 470
 471        case CPU_ONLINE:
 472                amd_uncore_cpu_online(cpu);
 473                break;
 474
 475        case CPU_DOWN_PREPARE:
 476                amd_uncore_cpu_down_prepare(cpu);
 477                break;
 478
 479        case CPU_UP_CANCELED:
 480        case CPU_DEAD:
 481                amd_uncore_cpu_dead(cpu);
 482                break;
 483
 484        default:
 485                break;
 486        }
 487
 488        return NOTIFY_OK;
 489}
 490
 491static struct notifier_block amd_uncore_cpu_notifier_block = {
 492        .notifier_call  = amd_uncore_cpu_notifier,
 493        .priority       = CPU_PRI_PERF + 1,
 494};
 495
 496static void __init init_cpu_already_online(void *dummy)
 497{
 498        unsigned int cpu = smp_processor_id();
 499
 500        amd_uncore_cpu_starting(cpu);
 501        amd_uncore_cpu_online(cpu);
 502}
 503
 504static int __init amd_uncore_init(void)
 505{
 506        unsigned int cpu;
 507        int ret = -ENODEV;
 508
 509        if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
 510                return -ENODEV;
 511
 512        if (!cpu_has_topoext)
 513                return -ENODEV;
 514
 515        if (cpu_has_perfctr_nb) {
 516                amd_uncore_nb = alloc_percpu(struct amd_uncore *);
 517                perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
 518
 519                printk(KERN_INFO "perf: AMD NB counters detected\n");
 520                ret = 0;
 521        }
 522
 523        if (cpu_has_perfctr_l2) {
 524                amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
 525                perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
 526
 527                printk(KERN_INFO "perf: AMD L2I counters detected\n");
 528                ret = 0;
 529        }
 530
 531        if (ret)
 532                return -ENODEV;
 533
 534        get_online_cpus();
 535        /* init cpus already online before registering for hotplug notifier */
 536        for_each_online_cpu(cpu) {
 537                amd_uncore_cpu_up_prepare(cpu);
 538                smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
 539        }
 540
 541        register_cpu_notifier(&amd_uncore_cpu_notifier_block);
 542        put_online_cpus();
 543
 544        return 0;
 545}
 546device_initcall(amd_uncore_init);
 547