linux/drivers/perf/arm_pmu.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2#undef DEBUG
   3
   4/*
   5 * ARM performance counter support.
   6 *
   7 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
   8 * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
   9 *
  10 * This code is based on the sparc64 perf event code, which is in turn based
  11 * on the x86 code.
  12 */
  13#define pr_fmt(fmt) "hw perfevents: " fmt
  14
  15#include <linux/bitmap.h>
  16#include <linux/cpumask.h>
  17#include <linux/cpu_pm.h>
  18#include <linux/export.h>
  19#include <linux/kernel.h>
  20#include <linux/perf/arm_pmu.h>
  21#include <linux/slab.h>
  22#include <linux/sched/clock.h>
  23#include <linux/spinlock.h>
  24#include <linux/irq.h>
  25#include <linux/irqdesc.h>
  26
  27#include <asm/irq_regs.h>
  28
  29static int armpmu_count_irq_users(const int irq);
  30
  31struct pmu_irq_ops {
  32        void (*enable_pmuirq)(unsigned int irq);
  33        void (*disable_pmuirq)(unsigned int irq);
  34        void (*free_pmuirq)(unsigned int irq, int cpu, void __percpu *devid);
  35};
  36
  37static void armpmu_free_pmuirq(unsigned int irq, int cpu, void __percpu *devid)
  38{
  39        free_irq(irq, per_cpu_ptr(devid, cpu));
  40}
  41
  42static const struct pmu_irq_ops pmuirq_ops = {
  43        .enable_pmuirq = enable_irq,
  44        .disable_pmuirq = disable_irq_nosync,
  45        .free_pmuirq = armpmu_free_pmuirq
  46};
  47
  48static void armpmu_free_pmunmi(unsigned int irq, int cpu, void __percpu *devid)
  49{
  50        free_nmi(irq, per_cpu_ptr(devid, cpu));
  51}
  52
  53static const struct pmu_irq_ops pmunmi_ops = {
  54        .enable_pmuirq = enable_nmi,
  55        .disable_pmuirq = disable_nmi_nosync,
  56        .free_pmuirq = armpmu_free_pmunmi
  57};
  58
  59static void armpmu_enable_percpu_pmuirq(unsigned int irq)
  60{
  61        enable_percpu_irq(irq, IRQ_TYPE_NONE);
  62}
  63
  64static void armpmu_free_percpu_pmuirq(unsigned int irq, int cpu,
  65                                   void __percpu *devid)
  66{
  67        if (armpmu_count_irq_users(irq) == 1)
  68                free_percpu_irq(irq, devid);
  69}
  70
  71static const struct pmu_irq_ops percpu_pmuirq_ops = {
  72        .enable_pmuirq = armpmu_enable_percpu_pmuirq,
  73        .disable_pmuirq = disable_percpu_irq,
  74        .free_pmuirq = armpmu_free_percpu_pmuirq
  75};
  76
  77static void armpmu_enable_percpu_pmunmi(unsigned int irq)
  78{
  79        if (!prepare_percpu_nmi(irq))
  80                enable_percpu_nmi(irq, IRQ_TYPE_NONE);
  81}
  82
  83static void armpmu_disable_percpu_pmunmi(unsigned int irq)
  84{
  85        disable_percpu_nmi(irq);
  86        teardown_percpu_nmi(irq);
  87}
  88
  89static void armpmu_free_percpu_pmunmi(unsigned int irq, int cpu,
  90                                      void __percpu *devid)
  91{
  92        if (armpmu_count_irq_users(irq) == 1)
  93                free_percpu_nmi(irq, devid);
  94}
  95
  96static const struct pmu_irq_ops percpu_pmunmi_ops = {
  97        .enable_pmuirq = armpmu_enable_percpu_pmunmi,
  98        .disable_pmuirq = armpmu_disable_percpu_pmunmi,
  99        .free_pmuirq = armpmu_free_percpu_pmunmi
 100};
 101
 102static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu);
 103static DEFINE_PER_CPU(int, cpu_irq);
 104static DEFINE_PER_CPU(const struct pmu_irq_ops *, cpu_irq_ops);
 105
 106static bool has_nmi;
 107
 108static inline u64 arm_pmu_event_max_period(struct perf_event *event)
 109{
 110        if (event->hw.flags & ARMPMU_EVT_64BIT)
 111                return GENMASK_ULL(63, 0);
 112        else
 113                return GENMASK_ULL(31, 0);
 114}
 115
 116static int
 117armpmu_map_cache_event(const unsigned (*cache_map)
 118                                      [PERF_COUNT_HW_CACHE_MAX]
 119                                      [PERF_COUNT_HW_CACHE_OP_MAX]
 120                                      [PERF_COUNT_HW_CACHE_RESULT_MAX],
 121                       u64 config)
 122{
 123        unsigned int cache_type, cache_op, cache_result, ret;
 124
 125        cache_type = (config >>  0) & 0xff;
 126        if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
 127                return -EINVAL;
 128
 129        cache_op = (config >>  8) & 0xff;
 130        if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
 131                return -EINVAL;
 132
 133        cache_result = (config >> 16) & 0xff;
 134        if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 135                return -EINVAL;
 136
 137        if (!cache_map)
 138                return -ENOENT;
 139
 140        ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
 141
 142        if (ret == CACHE_OP_UNSUPPORTED)
 143                return -ENOENT;
 144
 145        return ret;
 146}
 147
 148static int
 149armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
 150{
 151        int mapping;
 152
 153        if (config >= PERF_COUNT_HW_MAX)
 154                return -EINVAL;
 155
 156        if (!event_map)
 157                return -ENOENT;
 158
 159        mapping = (*event_map)[config];
 160        return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
 161}
 162
 163static int
 164armpmu_map_raw_event(u32 raw_event_mask, u64 config)
 165{
 166        return (int)(config & raw_event_mask);
 167}
 168
 169int
 170armpmu_map_event(struct perf_event *event,
 171                 const unsigned (*event_map)[PERF_COUNT_HW_MAX],
 172                 const unsigned (*cache_map)
 173                                [PERF_COUNT_HW_CACHE_MAX]
 174                                [PERF_COUNT_HW_CACHE_OP_MAX]
 175                                [PERF_COUNT_HW_CACHE_RESULT_MAX],
 176                 u32 raw_event_mask)
 177{
 178        u64 config = event->attr.config;
 179        int type = event->attr.type;
 180
 181        if (type == event->pmu->type)
 182                return armpmu_map_raw_event(raw_event_mask, config);
 183
 184        switch (type) {
 185        case PERF_TYPE_HARDWARE:
 186                return armpmu_map_hw_event(event_map, config);
 187        case PERF_TYPE_HW_CACHE:
 188                return armpmu_map_cache_event(cache_map, config);
 189        case PERF_TYPE_RAW:
 190                return armpmu_map_raw_event(raw_event_mask, config);
 191        }
 192
 193        return -ENOENT;
 194}
 195
 196int armpmu_event_set_period(struct perf_event *event)
 197{
 198        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 199        struct hw_perf_event *hwc = &event->hw;
 200        s64 left = local64_read(&hwc->period_left);
 201        s64 period = hwc->sample_period;
 202        u64 max_period;
 203        int ret = 0;
 204
 205        max_period = arm_pmu_event_max_period(event);
 206        if (unlikely(left <= -period)) {
 207                left = period;
 208                local64_set(&hwc->period_left, left);
 209                hwc->last_period = period;
 210                ret = 1;
 211        }
 212
 213        if (unlikely(left <= 0)) {
 214                left += period;
 215                local64_set(&hwc->period_left, left);
 216                hwc->last_period = period;
 217                ret = 1;
 218        }
 219
 220        /*
 221         * Limit the maximum period to prevent the counter value
 222         * from overtaking the one we are about to program. In
 223         * effect we are reducing max_period to account for
 224         * interrupt latency (and we are being very conservative).
 225         */
 226        if (left > (max_period >> 1))
 227                left = (max_period >> 1);
 228
 229        local64_set(&hwc->prev_count, (u64)-left);
 230
 231        armpmu->write_counter(event, (u64)(-left) & max_period);
 232
 233        perf_event_update_userpage(event);
 234
 235        return ret;
 236}
 237
 238u64 armpmu_event_update(struct perf_event *event)
 239{
 240        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 241        struct hw_perf_event *hwc = &event->hw;
 242        u64 delta, prev_raw_count, new_raw_count;
 243        u64 max_period = arm_pmu_event_max_period(event);
 244
 245again:
 246        prev_raw_count = local64_read(&hwc->prev_count);
 247        new_raw_count = armpmu->read_counter(event);
 248
 249        if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 250                             new_raw_count) != prev_raw_count)
 251                goto again;
 252
 253        delta = (new_raw_count - prev_raw_count) & max_period;
 254
 255        local64_add(delta, &event->count);
 256        local64_sub(delta, &hwc->period_left);
 257
 258        return new_raw_count;
 259}
 260
 261static void
 262armpmu_read(struct perf_event *event)
 263{
 264        armpmu_event_update(event);
 265}
 266
 267static void
 268armpmu_stop(struct perf_event *event, int flags)
 269{
 270        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 271        struct hw_perf_event *hwc = &event->hw;
 272
 273        /*
 274         * ARM pmu always has to update the counter, so ignore
 275         * PERF_EF_UPDATE, see comments in armpmu_start().
 276         */
 277        if (!(hwc->state & PERF_HES_STOPPED)) {
 278                armpmu->disable(event);
 279                armpmu_event_update(event);
 280                hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
 281        }
 282}
 283
 284static void armpmu_start(struct perf_event *event, int flags)
 285{
 286        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 287        struct hw_perf_event *hwc = &event->hw;
 288
 289        /*
 290         * ARM pmu always has to reprogram the period, so ignore
 291         * PERF_EF_RELOAD, see the comment below.
 292         */
 293        if (flags & PERF_EF_RELOAD)
 294                WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
 295
 296        hwc->state = 0;
 297        /*
 298         * Set the period again. Some counters can't be stopped, so when we
 299         * were stopped we simply disabled the IRQ source and the counter
 300         * may have been left counting. If we don't do this step then we may
 301         * get an interrupt too soon or *way* too late if the overflow has
 302         * happened since disabling.
 303         */
 304        armpmu_event_set_period(event);
 305        armpmu->enable(event);
 306}
 307
 308static void
 309armpmu_del(struct perf_event *event, int flags)
 310{
 311        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 312        struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 313        struct hw_perf_event *hwc = &event->hw;
 314        int idx = hwc->idx;
 315
 316        armpmu_stop(event, PERF_EF_UPDATE);
 317        hw_events->events[idx] = NULL;
 318        armpmu->clear_event_idx(hw_events, event);
 319        perf_event_update_userpage(event);
 320        /* Clear the allocated counter */
 321        hwc->idx = -1;
 322}
 323
 324static int
 325armpmu_add(struct perf_event *event, int flags)
 326{
 327        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 328        struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 329        struct hw_perf_event *hwc = &event->hw;
 330        int idx;
 331
 332        /* An event following a process won't be stopped earlier */
 333        if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
 334                return -ENOENT;
 335
 336        /* If we don't have a space for the counter then finish early. */
 337        idx = armpmu->get_event_idx(hw_events, event);
 338        if (idx < 0)
 339                return idx;
 340
 341        /*
 342         * If there is an event in the counter we are going to use then make
 343         * sure it is disabled.
 344         */
 345        event->hw.idx = idx;
 346        armpmu->disable(event);
 347        hw_events->events[idx] = event;
 348
 349        hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
 350        if (flags & PERF_EF_START)
 351                armpmu_start(event, PERF_EF_RELOAD);
 352
 353        /* Propagate our changes to the userspace mapping. */
 354        perf_event_update_userpage(event);
 355
 356        return 0;
 357}
 358
 359static int
 360validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
 361                               struct perf_event *event)
 362{
 363        struct arm_pmu *armpmu;
 364
 365        if (is_software_event(event))
 366                return 1;
 367
 368        /*
 369         * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
 370         * core perf code won't check that the pmu->ctx == leader->ctx
 371         * until after pmu->event_init(event).
 372         */
 373        if (event->pmu != pmu)
 374                return 0;
 375
 376        if (event->state < PERF_EVENT_STATE_OFF)
 377                return 1;
 378
 379        if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
 380                return 1;
 381
 382        armpmu = to_arm_pmu(event->pmu);
 383        return armpmu->get_event_idx(hw_events, event) >= 0;
 384}
 385
 386static int
 387validate_group(struct perf_event *event)
 388{
 389        struct perf_event *sibling, *leader = event->group_leader;
 390        struct pmu_hw_events fake_pmu;
 391
 392        /*
 393         * Initialise the fake PMU. We only need to populate the
 394         * used_mask for the purposes of validation.
 395         */
 396        memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask));
 397
 398        if (!validate_event(event->pmu, &fake_pmu, leader))
 399                return -EINVAL;
 400
 401        for_each_sibling_event(sibling, leader) {
 402                if (!validate_event(event->pmu, &fake_pmu, sibling))
 403                        return -EINVAL;
 404        }
 405
 406        if (!validate_event(event->pmu, &fake_pmu, event))
 407                return -EINVAL;
 408
 409        return 0;
 410}
 411
 412static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
 413{
 414        struct arm_pmu *armpmu;
 415        int ret;
 416        u64 start_clock, finish_clock;
 417
 418        /*
 419         * we request the IRQ with a (possibly percpu) struct arm_pmu**, but
 420         * the handlers expect a struct arm_pmu*. The percpu_irq framework will
 421         * do any necessary shifting, we just need to perform the first
 422         * dereference.
 423         */
 424        armpmu = *(void **)dev;
 425        if (WARN_ON_ONCE(!armpmu))
 426                return IRQ_NONE;
 427
 428        start_clock = sched_clock();
 429        ret = armpmu->handle_irq(armpmu);
 430        finish_clock = sched_clock();
 431
 432        perf_sample_event_took(finish_clock - start_clock);
 433        return ret;
 434}
 435
 436static int
 437__hw_perf_event_init(struct perf_event *event)
 438{
 439        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 440        struct hw_perf_event *hwc = &event->hw;
 441        int mapping;
 442
 443        hwc->flags = 0;
 444        mapping = armpmu->map_event(event);
 445
 446        if (mapping < 0) {
 447                pr_debug("event %x:%llx not supported\n", event->attr.type,
 448                         event->attr.config);
 449                return mapping;
 450        }
 451
 452        /*
 453         * We don't assign an index until we actually place the event onto
 454         * hardware. Use -1 to signify that we haven't decided where to put it
 455         * yet. For SMP systems, each core has it's own PMU so we can't do any
 456         * clever allocation or constraints checking at this point.
 457         */
 458        hwc->idx                = -1;
 459        hwc->config_base        = 0;
 460        hwc->config             = 0;
 461        hwc->event_base         = 0;
 462
 463        /*
 464         * Check whether we need to exclude the counter from certain modes.
 465         */
 466        if (armpmu->set_event_filter &&
 467            armpmu->set_event_filter(hwc, &event->attr)) {
 468                pr_debug("ARM performance counters do not support "
 469                         "mode exclusion\n");
 470                return -EOPNOTSUPP;
 471        }
 472
 473        /*
 474         * Store the event encoding into the config_base field.
 475         */
 476        hwc->config_base            |= (unsigned long)mapping;
 477
 478        if (!is_sampling_event(event)) {
 479                /*
 480                 * For non-sampling runs, limit the sample_period to half
 481                 * of the counter width. That way, the new counter value
 482                 * is far less likely to overtake the previous one unless
 483                 * you have some serious IRQ latency issues.
 484                 */
 485                hwc->sample_period  = arm_pmu_event_max_period(event) >> 1;
 486                hwc->last_period    = hwc->sample_period;
 487                local64_set(&hwc->period_left, hwc->sample_period);
 488        }
 489
 490        if (event->group_leader != event) {
 491                if (validate_group(event) != 0)
 492                        return -EINVAL;
 493        }
 494
 495        return 0;
 496}
 497
 498static int armpmu_event_init(struct perf_event *event)
 499{
 500        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 501
 502        /*
 503         * Reject CPU-affine events for CPUs that are of a different class to
 504         * that which this PMU handles. Process-following events (where
 505         * event->cpu == -1) can be migrated between CPUs, and thus we have to
 506         * reject them later (in armpmu_add) if they're scheduled on a
 507         * different class of CPU.
 508         */
 509        if (event->cpu != -1 &&
 510                !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus))
 511                return -ENOENT;
 512
 513        /* does not support taken branch sampling */
 514        if (has_branch_stack(event))
 515                return -EOPNOTSUPP;
 516
 517        if (armpmu->map_event(event) == -ENOENT)
 518                return -ENOENT;
 519
 520        return __hw_perf_event_init(event);
 521}
 522
 523static void armpmu_enable(struct pmu *pmu)
 524{
 525        struct arm_pmu *armpmu = to_arm_pmu(pmu);
 526        struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 527        int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
 528
 529        /* For task-bound events we may be called on other CPUs */
 530        if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
 531                return;
 532
 533        if (enabled)
 534                armpmu->start(armpmu);
 535}
 536
 537static void armpmu_disable(struct pmu *pmu)
 538{
 539        struct arm_pmu *armpmu = to_arm_pmu(pmu);
 540
 541        /* For task-bound events we may be called on other CPUs */
 542        if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
 543                return;
 544
 545        armpmu->stop(armpmu);
 546}
 547
 548/*
 549 * In heterogeneous systems, events are specific to a particular
 550 * microarchitecture, and aren't suitable for another. Thus, only match CPUs of
 551 * the same microarchitecture.
 552 */
 553static int armpmu_filter_match(struct perf_event *event)
 554{
 555        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 556        unsigned int cpu = smp_processor_id();
 557        int ret;
 558
 559        ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus);
 560        if (ret && armpmu->filter_match)
 561                return armpmu->filter_match(event);
 562
 563        return ret;
 564}
 565
 566static ssize_t cpus_show(struct device *dev,
 567                         struct device_attribute *attr, char *buf)
 568{
 569        struct arm_pmu *armpmu = to_arm_pmu(dev_get_drvdata(dev));
 570        return cpumap_print_to_pagebuf(true, buf, &armpmu->supported_cpus);
 571}
 572
 573static DEVICE_ATTR_RO(cpus);
 574
 575static struct attribute *armpmu_common_attrs[] = {
 576        &dev_attr_cpus.attr,
 577        NULL,
 578};
 579
 580static const struct attribute_group armpmu_common_attr_group = {
 581        .attrs = armpmu_common_attrs,
 582};
 583
 584static int armpmu_count_irq_users(const int irq)
 585{
 586        int cpu, count = 0;
 587
 588        for_each_possible_cpu(cpu) {
 589                if (per_cpu(cpu_irq, cpu) == irq)
 590                        count++;
 591        }
 592
 593        return count;
 594}
 595
 596static const struct pmu_irq_ops *armpmu_find_irq_ops(int irq)
 597{
 598        const struct pmu_irq_ops *ops = NULL;
 599        int cpu;
 600
 601        for_each_possible_cpu(cpu) {
 602                if (per_cpu(cpu_irq, cpu) != irq)
 603                        continue;
 604
 605                ops = per_cpu(cpu_irq_ops, cpu);
 606                if (ops)
 607                        break;
 608        }
 609
 610        return ops;
 611}
 612
 613void armpmu_free_irq(int irq, int cpu)
 614{
 615        if (per_cpu(cpu_irq, cpu) == 0)
 616                return;
 617        if (WARN_ON(irq != per_cpu(cpu_irq, cpu)))
 618                return;
 619
 620        per_cpu(cpu_irq_ops, cpu)->free_pmuirq(irq, cpu, &cpu_armpmu);
 621
 622        per_cpu(cpu_irq, cpu) = 0;
 623        per_cpu(cpu_irq_ops, cpu) = NULL;
 624}
 625
 626int armpmu_request_irq(int irq, int cpu)
 627{
 628        int err = 0;
 629        const irq_handler_t handler = armpmu_dispatch_irq;
 630        const struct pmu_irq_ops *irq_ops;
 631
 632        if (!irq)
 633                return 0;
 634
 635        if (!irq_is_percpu_devid(irq)) {
 636                unsigned long irq_flags;
 637
 638                err = irq_force_affinity(irq, cpumask_of(cpu));
 639
 640                if (err && num_possible_cpus() > 1) {
 641                        pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
 642                                irq, cpu);
 643                        goto err_out;
 644                }
 645
 646                irq_flags = IRQF_PERCPU |
 647                            IRQF_NOBALANCING | IRQF_NO_AUTOEN |
 648                            IRQF_NO_THREAD;
 649
 650                err = request_nmi(irq, handler, irq_flags, "arm-pmu",
 651                                  per_cpu_ptr(&cpu_armpmu, cpu));
 652
 653                /* If cannot get an NMI, get a normal interrupt */
 654                if (err) {
 655                        err = request_irq(irq, handler, irq_flags, "arm-pmu",
 656                                          per_cpu_ptr(&cpu_armpmu, cpu));
 657                        irq_ops = &pmuirq_ops;
 658                } else {
 659                        has_nmi = true;
 660                        irq_ops = &pmunmi_ops;
 661                }
 662        } else if (armpmu_count_irq_users(irq) == 0) {
 663                err = request_percpu_nmi(irq, handler, "arm-pmu", &cpu_armpmu);
 664
 665                /* If cannot get an NMI, get a normal interrupt */
 666                if (err) {
 667                        err = request_percpu_irq(irq, handler, "arm-pmu",
 668                                                 &cpu_armpmu);
 669                        irq_ops = &percpu_pmuirq_ops;
 670                } else {
 671                        has_nmi = true;
 672                        irq_ops = &percpu_pmunmi_ops;
 673                }
 674        } else {
 675                /* Per cpudevid irq was already requested by another CPU */
 676                irq_ops = armpmu_find_irq_ops(irq);
 677
 678                if (WARN_ON(!irq_ops))
 679                        err = -EINVAL;
 680        }
 681
 682        if (err)
 683                goto err_out;
 684
 685        per_cpu(cpu_irq, cpu) = irq;
 686        per_cpu(cpu_irq_ops, cpu) = irq_ops;
 687        return 0;
 688
 689err_out:
 690        pr_err("unable to request IRQ%d for ARM PMU counters\n", irq);
 691        return err;
 692}
 693
 694static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
 695{
 696        struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
 697        return per_cpu(hw_events->irq, cpu);
 698}
 699
 700/*
 701 * PMU hardware loses all context when a CPU goes offline.
 702 * When a CPU is hotplugged back in, since some hardware registers are
 703 * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
 704 * junk values out of them.
 705 */
 706static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
 707{
 708        struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
 709        int irq;
 710
 711        if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
 712                return 0;
 713        if (pmu->reset)
 714                pmu->reset(pmu);
 715
 716        per_cpu(cpu_armpmu, cpu) = pmu;
 717
 718        irq = armpmu_get_cpu_irq(pmu, cpu);
 719        if (irq)
 720                per_cpu(cpu_irq_ops, cpu)->enable_pmuirq(irq);
 721
 722        return 0;
 723}
 724
 725static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node)
 726{
 727        struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
 728        int irq;
 729
 730        if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
 731                return 0;
 732
 733        irq = armpmu_get_cpu_irq(pmu, cpu);
 734        if (irq)
 735                per_cpu(cpu_irq_ops, cpu)->disable_pmuirq(irq);
 736
 737        per_cpu(cpu_armpmu, cpu) = NULL;
 738
 739        return 0;
 740}
 741
 742#ifdef CONFIG_CPU_PM
 743static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
 744{
 745        struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 746        struct perf_event *event;
 747        int idx;
 748
 749        for (idx = 0; idx < armpmu->num_events; idx++) {
 750                event = hw_events->events[idx];
 751                if (!event)
 752                        continue;
 753
 754                switch (cmd) {
 755                case CPU_PM_ENTER:
 756                        /*
 757                         * Stop and update the counter
 758                         */
 759                        armpmu_stop(event, PERF_EF_UPDATE);
 760                        break;
 761                case CPU_PM_EXIT:
 762                case CPU_PM_ENTER_FAILED:
 763                         /*
 764                          * Restore and enable the counter.
 765                          * armpmu_start() indirectly calls
 766                          *
 767                          * perf_event_update_userpage()
 768                          *
 769                          * that requires RCU read locking to be functional,
 770                          * wrap the call within RCU_NONIDLE to make the
 771                          * RCU subsystem aware this cpu is not idle from
 772                          * an RCU perspective for the armpmu_start() call
 773                          * duration.
 774                          */
 775                        RCU_NONIDLE(armpmu_start(event, PERF_EF_RELOAD));
 776                        break;
 777                default:
 778                        break;
 779                }
 780        }
 781}
 782
 783static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
 784                             void *v)
 785{
 786        struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
 787        struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 788        int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
 789
 790        if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
 791                return NOTIFY_DONE;
 792
 793        /*
 794         * Always reset the PMU registers on power-up even if
 795         * there are no events running.
 796         */
 797        if (cmd == CPU_PM_EXIT && armpmu->reset)
 798                armpmu->reset(armpmu);
 799
 800        if (!enabled)
 801                return NOTIFY_OK;
 802
 803        switch (cmd) {
 804        case CPU_PM_ENTER:
 805                armpmu->stop(armpmu);
 806                cpu_pm_pmu_setup(armpmu, cmd);
 807                break;
 808        case CPU_PM_EXIT:
 809        case CPU_PM_ENTER_FAILED:
 810                cpu_pm_pmu_setup(armpmu, cmd);
 811                armpmu->start(armpmu);
 812                break;
 813        default:
 814                return NOTIFY_DONE;
 815        }
 816
 817        return NOTIFY_OK;
 818}
 819
 820static int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu)
 821{
 822        cpu_pmu->cpu_pm_nb.notifier_call = cpu_pm_pmu_notify;
 823        return cpu_pm_register_notifier(&cpu_pmu->cpu_pm_nb);
 824}
 825
 826static void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu)
 827{
 828        cpu_pm_unregister_notifier(&cpu_pmu->cpu_pm_nb);
 829}
 830#else
 831static inline int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu) { return 0; }
 832static inline void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu) { }
 833#endif
 834
 835static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 836{
 837        int err;
 838
 839        err = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_STARTING,
 840                                       &cpu_pmu->node);
 841        if (err)
 842                goto out;
 843
 844        err = cpu_pm_pmu_register(cpu_pmu);
 845        if (err)
 846                goto out_unregister;
 847
 848        return 0;
 849
 850out_unregister:
 851        cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
 852                                            &cpu_pmu->node);
 853out:
 854        return err;
 855}
 856
 857static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
 858{
 859        cpu_pm_pmu_unregister(cpu_pmu);
 860        cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
 861                                            &cpu_pmu->node);
 862}
 863
 864static struct arm_pmu *__armpmu_alloc(gfp_t flags)
 865{
 866        struct arm_pmu *pmu;
 867        int cpu;
 868
 869        pmu = kzalloc(sizeof(*pmu), flags);
 870        if (!pmu)
 871                goto out;
 872
 873        pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, flags);
 874        if (!pmu->hw_events) {
 875                pr_info("failed to allocate per-cpu PMU data.\n");
 876                goto out_free_pmu;
 877        }
 878
 879        pmu->pmu = (struct pmu) {
 880                .pmu_enable     = armpmu_enable,
 881                .pmu_disable    = armpmu_disable,
 882                .event_init     = armpmu_event_init,
 883                .add            = armpmu_add,
 884                .del            = armpmu_del,
 885                .start          = armpmu_start,
 886                .stop           = armpmu_stop,
 887                .read           = armpmu_read,
 888                .filter_match   = armpmu_filter_match,
 889                .attr_groups    = pmu->attr_groups,
 890                /*
 891                 * This is a CPU PMU potentially in a heterogeneous
 892                 * configuration (e.g. big.LITTLE). This is not an uncore PMU,
 893                 * and we have taken ctx sharing into account (e.g. with our
 894                 * pmu::filter_match callback and pmu::event_init group
 895                 * validation).
 896                 */
 897                .capabilities   = PERF_PMU_CAP_HETEROGENEOUS_CPUS,
 898        };
 899
 900        pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
 901                &armpmu_common_attr_group;
 902
 903        for_each_possible_cpu(cpu) {
 904                struct pmu_hw_events *events;
 905
 906                events = per_cpu_ptr(pmu->hw_events, cpu);
 907                raw_spin_lock_init(&events->pmu_lock);
 908                events->percpu_pmu = pmu;
 909        }
 910
 911        return pmu;
 912
 913out_free_pmu:
 914        kfree(pmu);
 915out:
 916        return NULL;
 917}
 918
 919struct arm_pmu *armpmu_alloc(void)
 920{
 921        return __armpmu_alloc(GFP_KERNEL);
 922}
 923
 924struct arm_pmu *armpmu_alloc_atomic(void)
 925{
 926        return __armpmu_alloc(GFP_ATOMIC);
 927}
 928
 929
 930void armpmu_free(struct arm_pmu *pmu)
 931{
 932        free_percpu(pmu->hw_events);
 933        kfree(pmu);
 934}
 935
 936int armpmu_register(struct arm_pmu *pmu)
 937{
 938        int ret;
 939
 940        ret = cpu_pmu_init(pmu);
 941        if (ret)
 942                return ret;
 943
 944        if (!pmu->set_event_filter)
 945                pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
 946
 947        ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
 948        if (ret)
 949                goto out_destroy;
 950
 951        pr_info("enabled with %s PMU driver, %d counters available%s\n",
 952                pmu->name, pmu->num_events,
 953                has_nmi ? ", using NMIs" : "");
 954
 955        return 0;
 956
 957out_destroy:
 958        cpu_pmu_destroy(pmu);
 959        return ret;
 960}
 961
 962static int arm_pmu_hp_init(void)
 963{
 964        int ret;
 965
 966        ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_STARTING,
 967                                      "perf/arm/pmu:starting",
 968                                      arm_perf_starting_cpu,
 969                                      arm_perf_teardown_cpu);
 970        if (ret)
 971                pr_err("CPU hotplug notifier for ARM PMU could not be registered: %d\n",
 972                       ret);
 973        return ret;
 974}
 975subsys_initcall(arm_pmu_hp_init);
 976