linux/arch/x86/events/amd/uncore.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2013 Advanced Micro Devices, Inc.
   3 *
   4 * Author: Jacob Shin <jacob.shin@amd.com>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10
  11#include <linux/perf_event.h>
  12#include <linux/percpu.h>
  13#include <linux/types.h>
  14#include <linux/slab.h>
  15#include <linux/init.h>
  16#include <linux/cpu.h>
  17#include <linux/cpumask.h>
  18
  19#include <asm/cpufeature.h>
  20#include <asm/perf_event.h>
  21#include <asm/msr.h>
  22
  23#define NUM_COUNTERS_NB         4
  24#define NUM_COUNTERS_L2         4
  25#define MAX_COUNTERS            NUM_COUNTERS_NB
  26
  27#define RDPMC_BASE_NB           6
  28#define RDPMC_BASE_L2           10
  29
  30#define COUNTER_SHIFT           16
  31
  32static HLIST_HEAD(uncore_unused_list);
  33
  34struct amd_uncore {
  35        int id;
  36        int refcnt;
  37        int cpu;
  38        int num_counters;
  39        int rdpmc_base;
  40        u32 msr_base;
  41        cpumask_t *active_mask;
  42        struct pmu *pmu;
  43        struct perf_event *events[MAX_COUNTERS];
  44        struct hlist_node node;
  45};
  46
  47static struct amd_uncore * __percpu *amd_uncore_nb;
  48static struct amd_uncore * __percpu *amd_uncore_l2;
  49
  50static struct pmu amd_nb_pmu;
  51static struct pmu amd_l2_pmu;
  52
  53static cpumask_t amd_nb_active_mask;
  54static cpumask_t amd_l2_active_mask;
  55
  56static bool is_nb_event(struct perf_event *event)
  57{
  58        return event->pmu->type == amd_nb_pmu.type;
  59}
  60
  61static bool is_l2_event(struct perf_event *event)
  62{
  63        return event->pmu->type == amd_l2_pmu.type;
  64}
  65
  66static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
  67{
  68        if (is_nb_event(event) && amd_uncore_nb)
  69                return *per_cpu_ptr(amd_uncore_nb, event->cpu);
  70        else if (is_l2_event(event) && amd_uncore_l2)
  71                return *per_cpu_ptr(amd_uncore_l2, event->cpu);
  72
  73        return NULL;
  74}
  75
  76static void amd_uncore_read(struct perf_event *event)
  77{
  78        struct hw_perf_event *hwc = &event->hw;
  79        u64 prev, new;
  80        s64 delta;
  81
  82        /*
  83         * since we do not enable counter overflow interrupts,
  84         * we do not have to worry about prev_count changing on us
  85         */
  86
  87        prev = local64_read(&hwc->prev_count);
  88        rdpmcl(hwc->event_base_rdpmc, new);
  89        local64_set(&hwc->prev_count, new);
  90        delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
  91        delta >>= COUNTER_SHIFT;
  92        local64_add(delta, &event->count);
  93}
  94
  95static void amd_uncore_start(struct perf_event *event, int flags)
  96{
  97        struct hw_perf_event *hwc = &event->hw;
  98
  99        if (flags & PERF_EF_RELOAD)
 100                wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
 101
 102        hwc->state = 0;
 103        wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
 104        perf_event_update_userpage(event);
 105}
 106
 107static void amd_uncore_stop(struct perf_event *event, int flags)
 108{
 109        struct hw_perf_event *hwc = &event->hw;
 110
 111        wrmsrl(hwc->config_base, hwc->config);
 112        hwc->state |= PERF_HES_STOPPED;
 113
 114        if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
 115                amd_uncore_read(event);
 116                hwc->state |= PERF_HES_UPTODATE;
 117        }
 118}
 119
 120static int amd_uncore_add(struct perf_event *event, int flags)
 121{
 122        int i;
 123        struct amd_uncore *uncore = event_to_amd_uncore(event);
 124        struct hw_perf_event *hwc = &event->hw;
 125
 126        /* are we already assigned? */
 127        if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
 128                goto out;
 129
 130        for (i = 0; i < uncore->num_counters; i++) {
 131                if (uncore->events[i] == event) {
 132                        hwc->idx = i;
 133                        goto out;
 134                }
 135        }
 136
 137        /* if not, take the first available counter */
 138        hwc->idx = -1;
 139        for (i = 0; i < uncore->num_counters; i++) {
 140                if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
 141                        hwc->idx = i;
 142                        break;
 143                }
 144        }
 145
 146out:
 147        if (hwc->idx == -1)
 148                return -EBUSY;
 149
 150        hwc->config_base = uncore->msr_base + (2 * hwc->idx);
 151        hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
 152        hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
 153        hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 154
 155        if (flags & PERF_EF_START)
 156                amd_uncore_start(event, PERF_EF_RELOAD);
 157
 158        return 0;
 159}
 160
 161static void amd_uncore_del(struct perf_event *event, int flags)
 162{
 163        int i;
 164        struct amd_uncore *uncore = event_to_amd_uncore(event);
 165        struct hw_perf_event *hwc = &event->hw;
 166
 167        amd_uncore_stop(event, PERF_EF_UPDATE);
 168
 169        for (i = 0; i < uncore->num_counters; i++) {
 170                if (cmpxchg(&uncore->events[i], event, NULL) == event)
 171                        break;
 172        }
 173
 174        hwc->idx = -1;
 175}
 176
 177static int amd_uncore_event_init(struct perf_event *event)
 178{
 179        struct amd_uncore *uncore;
 180        struct hw_perf_event *hwc = &event->hw;
 181
 182        if (event->attr.type != event->pmu->type)
 183                return -ENOENT;
 184
 185        /*
 186         * NB and L2 counters (MSRs) are shared across all cores that share the
 187         * same NB / L2 cache. Interrupts can be directed to a single target
 188         * core, however, event counts generated by processes running on other
 189         * cores cannot be masked out. So we do not support sampling and
 190         * per-thread events.
 191         */
 192        if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
 193                return -EINVAL;
 194
 195        /* NB and L2 counters do not have usr/os/guest/host bits */
 196        if (event->attr.exclude_user || event->attr.exclude_kernel ||
 197            event->attr.exclude_host || event->attr.exclude_guest)
 198                return -EINVAL;
 199
 200        /* and we do not enable counter overflow interrupts */
 201        hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
 202        hwc->idx = -1;
 203
 204        if (event->cpu < 0)
 205                return -EINVAL;
 206
 207        uncore = event_to_amd_uncore(event);
 208        if (!uncore)
 209                return -ENODEV;
 210
 211        /*
 212         * since request can come in to any of the shared cores, we will remap
 213         * to a single common cpu.
 214         */
 215        event->cpu = uncore->cpu;
 216
 217        return 0;
 218}
 219
 220static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
 221                                            struct device_attribute *attr,
 222                                            char *buf)
 223{
 224        cpumask_t *active_mask;
 225        struct pmu *pmu = dev_get_drvdata(dev);
 226
 227        if (pmu->type == amd_nb_pmu.type)
 228                active_mask = &amd_nb_active_mask;
 229        else if (pmu->type == amd_l2_pmu.type)
 230                active_mask = &amd_l2_active_mask;
 231        else
 232                return 0;
 233
 234        return cpumap_print_to_pagebuf(true, buf, active_mask);
 235}
 236static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
 237
 238static struct attribute *amd_uncore_attrs[] = {
 239        &dev_attr_cpumask.attr,
 240        NULL,
 241};
 242
 243static struct attribute_group amd_uncore_attr_group = {
 244        .attrs = amd_uncore_attrs,
 245};
 246
 247PMU_FORMAT_ATTR(event, "config:0-7,32-35");
 248PMU_FORMAT_ATTR(umask, "config:8-15");
 249
 250static struct attribute *amd_uncore_format_attr[] = {
 251        &format_attr_event.attr,
 252        &format_attr_umask.attr,
 253        NULL,
 254};
 255
 256static struct attribute_group amd_uncore_format_group = {
 257        .name = "format",
 258        .attrs = amd_uncore_format_attr,
 259};
 260
 261static const struct attribute_group *amd_uncore_attr_groups[] = {
 262        &amd_uncore_attr_group,
 263        &amd_uncore_format_group,
 264        NULL,
 265};
 266
 267static struct pmu amd_nb_pmu = {
 268        .task_ctx_nr    = perf_invalid_context,
 269        .attr_groups    = amd_uncore_attr_groups,
 270        .name           = "amd_nb",
 271        .event_init     = amd_uncore_event_init,
 272        .add            = amd_uncore_add,
 273        .del            = amd_uncore_del,
 274        .start          = amd_uncore_start,
 275        .stop           = amd_uncore_stop,
 276        .read           = amd_uncore_read,
 277};
 278
 279static struct pmu amd_l2_pmu = {
 280        .task_ctx_nr    = perf_invalid_context,
 281        .attr_groups    = amd_uncore_attr_groups,
 282        .name           = "amd_l2",
 283        .event_init     = amd_uncore_event_init,
 284        .add            = amd_uncore_add,
 285        .del            = amd_uncore_del,
 286        .start          = amd_uncore_start,
 287        .stop           = amd_uncore_stop,
 288        .read           = amd_uncore_read,
 289};
 290
 291static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
 292{
 293        return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
 294                        cpu_to_node(cpu));
 295}
 296
 297static int amd_uncore_cpu_up_prepare(unsigned int cpu)
 298{
 299        struct amd_uncore *uncore_nb = NULL, *uncore_l2;
 300
 301        if (amd_uncore_nb) {
 302                uncore_nb = amd_uncore_alloc(cpu);
 303                if (!uncore_nb)
 304                        goto fail;
 305                uncore_nb->cpu = cpu;
 306                uncore_nb->num_counters = NUM_COUNTERS_NB;
 307                uncore_nb->rdpmc_base = RDPMC_BASE_NB;
 308                uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
 309                uncore_nb->active_mask = &amd_nb_active_mask;
 310                uncore_nb->pmu = &amd_nb_pmu;
 311                uncore_nb->id = -1;
 312                *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
 313        }
 314
 315        if (amd_uncore_l2) {
 316                uncore_l2 = amd_uncore_alloc(cpu);
 317                if (!uncore_l2)
 318                        goto fail;
 319                uncore_l2->cpu = cpu;
 320                uncore_l2->num_counters = NUM_COUNTERS_L2;
 321                uncore_l2->rdpmc_base = RDPMC_BASE_L2;
 322                uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
 323                uncore_l2->active_mask = &amd_l2_active_mask;
 324                uncore_l2->pmu = &amd_l2_pmu;
 325                uncore_l2->id = -1;
 326                *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
 327        }
 328
 329        return 0;
 330
 331fail:
 332        if (amd_uncore_nb)
 333                *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
 334        kfree(uncore_nb);
 335        return -ENOMEM;
 336}
 337
 338static struct amd_uncore *
 339amd_uncore_find_online_sibling(struct amd_uncore *this,
 340                               struct amd_uncore * __percpu *uncores)
 341{
 342        unsigned int cpu;
 343        struct amd_uncore *that;
 344
 345        for_each_online_cpu(cpu) {
 346                that = *per_cpu_ptr(uncores, cpu);
 347
 348                if (!that)
 349                        continue;
 350
 351                if (this == that)
 352                        continue;
 353
 354                if (this->id == that->id) {
 355                        hlist_add_head(&this->node, &uncore_unused_list);
 356                        this = that;
 357                        break;
 358                }
 359        }
 360
 361        this->refcnt++;
 362        return this;
 363}
 364
 365static int amd_uncore_cpu_starting(unsigned int cpu)
 366{
 367        unsigned int eax, ebx, ecx, edx;
 368        struct amd_uncore *uncore;
 369
 370        if (amd_uncore_nb) {
 371                uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
 372                cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
 373                uncore->id = ecx & 0xff;
 374
 375                uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
 376                *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
 377        }
 378
 379        if (amd_uncore_l2) {
 380                unsigned int apicid = cpu_data(cpu).apicid;
 381                unsigned int nshared;
 382
 383                uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
 384                cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
 385                nshared = ((eax >> 14) & 0xfff) + 1;
 386                uncore->id = apicid - (apicid % nshared);
 387
 388                uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
 389                *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
 390        }
 391
 392        return 0;
 393}
 394
 395static void uncore_clean_online(void)
 396{
 397        struct amd_uncore *uncore;
 398        struct hlist_node *n;
 399
 400        hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) {
 401                hlist_del(&uncore->node);
 402                kfree(uncore);
 403        }
 404}
 405
 406static void uncore_online(unsigned int cpu,
 407                          struct amd_uncore * __percpu *uncores)
 408{
 409        struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
 410
 411        uncore_clean_online();
 412
 413        if (cpu == uncore->cpu)
 414                cpumask_set_cpu(cpu, uncore->active_mask);
 415}
 416
 417static int amd_uncore_cpu_online(unsigned int cpu)
 418{
 419        if (amd_uncore_nb)
 420                uncore_online(cpu, amd_uncore_nb);
 421
 422        if (amd_uncore_l2)
 423                uncore_online(cpu, amd_uncore_l2);
 424
 425        return 0;
 426}
 427
 428static void uncore_down_prepare(unsigned int cpu,
 429                                struct amd_uncore * __percpu *uncores)
 430{
 431        unsigned int i;
 432        struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
 433
 434        if (this->cpu != cpu)
 435                return;
 436
 437        /* this cpu is going down, migrate to a shared sibling if possible */
 438        for_each_online_cpu(i) {
 439                struct amd_uncore *that = *per_cpu_ptr(uncores, i);
 440
 441                if (cpu == i)
 442                        continue;
 443
 444                if (this == that) {
 445                        perf_pmu_migrate_context(this->pmu, cpu, i);
 446                        cpumask_clear_cpu(cpu, that->active_mask);
 447                        cpumask_set_cpu(i, that->active_mask);
 448                        that->cpu = i;
 449                        break;
 450                }
 451        }
 452}
 453
 454static int amd_uncore_cpu_down_prepare(unsigned int cpu)
 455{
 456        if (amd_uncore_nb)
 457                uncore_down_prepare(cpu, amd_uncore_nb);
 458
 459        if (amd_uncore_l2)
 460                uncore_down_prepare(cpu, amd_uncore_l2);
 461
 462        return 0;
 463}
 464
 465static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
 466{
 467        struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
 468
 469        if (cpu == uncore->cpu)
 470                cpumask_clear_cpu(cpu, uncore->active_mask);
 471
 472        if (!--uncore->refcnt)
 473                kfree(uncore);
 474        *per_cpu_ptr(uncores, cpu) = NULL;
 475}
 476
 477static int amd_uncore_cpu_dead(unsigned int cpu)
 478{
 479        if (amd_uncore_nb)
 480                uncore_dead(cpu, amd_uncore_nb);
 481
 482        if (amd_uncore_l2)
 483                uncore_dead(cpu, amd_uncore_l2);
 484
 485        return 0;
 486}
 487
 488static int __init amd_uncore_init(void)
 489{
 490        int ret = -ENODEV;
 491
 492        if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
 493                goto fail_nodev;
 494
 495        if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
 496                goto fail_nodev;
 497
 498        if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
 499                amd_uncore_nb = alloc_percpu(struct amd_uncore *);
 500                if (!amd_uncore_nb) {
 501                        ret = -ENOMEM;
 502                        goto fail_nb;
 503                }
 504                ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
 505                if (ret)
 506                        goto fail_nb;
 507
 508                pr_info("perf: AMD NB counters detected\n");
 509                ret = 0;
 510        }
 511
 512        if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
 513                amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
 514                if (!amd_uncore_l2) {
 515                        ret = -ENOMEM;
 516                        goto fail_l2;
 517                }
 518                ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
 519                if (ret)
 520                        goto fail_l2;
 521
 522                pr_info("perf: AMD L2I counters detected\n");
 523                ret = 0;
 524        }
 525
 526        /*
 527         * Install callbacks. Core will call them for each online cpu.
 528         */
 529        if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
 530                              "perf/x86/amd/uncore:prepare",
 531                              amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead))
 532                goto fail_l2;
 533
 534        if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
 535                              "perf/x86/amd/uncore:starting",
 536                              amd_uncore_cpu_starting, NULL))
 537                goto fail_prep;
 538        if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
 539                              "perf/x86/amd/uncore:online",
 540                              amd_uncore_cpu_online,
 541                              amd_uncore_cpu_down_prepare))
 542                goto fail_start;
 543        return 0;
 544
 545fail_start:
 546        cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
 547fail_prep:
 548        cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
 549fail_l2:
 550        if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
 551                perf_pmu_unregister(&amd_nb_pmu);
 552        if (amd_uncore_l2)
 553                free_percpu(amd_uncore_l2);
 554fail_nb:
 555        if (amd_uncore_nb)
 556                free_percpu(amd_uncore_nb);
 557
 558fail_nodev:
 559        return ret;
 560}
 561device_initcall(amd_uncore_init);
 562