linux/drivers/perf/arm_pmu.c
<<
>>
Prefs
   1#undef DEBUG
   2
   3/*
   4 * ARM performance counter support.
   5 *
   6 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
   7 * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
   8 *
   9 * This code is based on the sparc64 perf event code, which is in turn based
  10 * on the x86 code.
  11 */
  12#define pr_fmt(fmt) "hw perfevents: " fmt
  13
  14#include <linux/bitmap.h>
  15#include <linux/cpumask.h>
  16#include <linux/export.h>
  17#include <linux/kernel.h>
  18#include <linux/of_device.h>
  19#include <linux/perf/arm_pmu.h>
  20#include <linux/platform_device.h>
  21#include <linux/slab.h>
  22#include <linux/spinlock.h>
  23#include <linux/irq.h>
  24#include <linux/irqdesc.h>
  25
  26#include <asm/cputype.h>
  27#include <asm/irq_regs.h>
  28
  29static int
  30armpmu_map_cache_event(const unsigned (*cache_map)
  31                                      [PERF_COUNT_HW_CACHE_MAX]
  32                                      [PERF_COUNT_HW_CACHE_OP_MAX]
  33                                      [PERF_COUNT_HW_CACHE_RESULT_MAX],
  34                       u64 config)
  35{
  36        unsigned int cache_type, cache_op, cache_result, ret;
  37
  38        cache_type = (config >>  0) & 0xff;
  39        if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
  40                return -EINVAL;
  41
  42        cache_op = (config >>  8) & 0xff;
  43        if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
  44                return -EINVAL;
  45
  46        cache_result = (config >> 16) & 0xff;
  47        if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
  48                return -EINVAL;
  49
  50        ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
  51
  52        if (ret == CACHE_OP_UNSUPPORTED)
  53                return -ENOENT;
  54
  55        return ret;
  56}
  57
  58static int
  59armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
  60{
  61        int mapping;
  62
  63        if (config >= PERF_COUNT_HW_MAX)
  64                return -EINVAL;
  65
  66        mapping = (*event_map)[config];
  67        return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
  68}
  69
  70static int
  71armpmu_map_raw_event(u32 raw_event_mask, u64 config)
  72{
  73        return (int)(config & raw_event_mask);
  74}
  75
  76int
  77armpmu_map_event(struct perf_event *event,
  78                 const unsigned (*event_map)[PERF_COUNT_HW_MAX],
  79                 const unsigned (*cache_map)
  80                                [PERF_COUNT_HW_CACHE_MAX]
  81                                [PERF_COUNT_HW_CACHE_OP_MAX]
  82                                [PERF_COUNT_HW_CACHE_RESULT_MAX],
  83                 u32 raw_event_mask)
  84{
  85        u64 config = event->attr.config;
  86        int type = event->attr.type;
  87
  88        if (type == event->pmu->type)
  89                return armpmu_map_raw_event(raw_event_mask, config);
  90
  91        switch (type) {
  92        case PERF_TYPE_HARDWARE:
  93                return armpmu_map_hw_event(event_map, config);
  94        case PERF_TYPE_HW_CACHE:
  95                return armpmu_map_cache_event(cache_map, config);
  96        case PERF_TYPE_RAW:
  97                return armpmu_map_raw_event(raw_event_mask, config);
  98        }
  99
 100        return -ENOENT;
 101}
 102
 103int armpmu_event_set_period(struct perf_event *event)
 104{
 105        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 106        struct hw_perf_event *hwc = &event->hw;
 107        s64 left = local64_read(&hwc->period_left);
 108        s64 period = hwc->sample_period;
 109        int ret = 0;
 110
 111        if (unlikely(left <= -period)) {
 112                left = period;
 113                local64_set(&hwc->period_left, left);
 114                hwc->last_period = period;
 115                ret = 1;
 116        }
 117
 118        if (unlikely(left <= 0)) {
 119                left += period;
 120                local64_set(&hwc->period_left, left);
 121                hwc->last_period = period;
 122                ret = 1;
 123        }
 124
 125        /*
 126         * Limit the maximum period to prevent the counter value
 127         * from overtaking the one we are about to program. In
 128         * effect we are reducing max_period to account for
 129         * interrupt latency (and we are being very conservative).
 130         */
 131        if (left > (armpmu->max_period >> 1))
 132                left = armpmu->max_period >> 1;
 133
 134        local64_set(&hwc->prev_count, (u64)-left);
 135
 136        armpmu->write_counter(event, (u64)(-left) & 0xffffffff);
 137
 138        perf_event_update_userpage(event);
 139
 140        return ret;
 141}
 142
 143u64 armpmu_event_update(struct perf_event *event)
 144{
 145        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 146        struct hw_perf_event *hwc = &event->hw;
 147        u64 delta, prev_raw_count, new_raw_count;
 148
 149again:
 150        prev_raw_count = local64_read(&hwc->prev_count);
 151        new_raw_count = armpmu->read_counter(event);
 152
 153        if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 154                             new_raw_count) != prev_raw_count)
 155                goto again;
 156
 157        delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
 158
 159        local64_add(delta, &event->count);
 160        local64_sub(delta, &hwc->period_left);
 161
 162        return new_raw_count;
 163}
 164
 165static void
 166armpmu_read(struct perf_event *event)
 167{
 168        armpmu_event_update(event);
 169}
 170
 171static void
 172armpmu_stop(struct perf_event *event, int flags)
 173{
 174        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 175        struct hw_perf_event *hwc = &event->hw;
 176
 177        /*
 178         * ARM pmu always has to update the counter, so ignore
 179         * PERF_EF_UPDATE, see comments in armpmu_start().
 180         */
 181        if (!(hwc->state & PERF_HES_STOPPED)) {
 182                armpmu->disable(event);
 183                armpmu_event_update(event);
 184                hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
 185        }
 186}
 187
 188static void armpmu_start(struct perf_event *event, int flags)
 189{
 190        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 191        struct hw_perf_event *hwc = &event->hw;
 192
 193        /*
 194         * ARM pmu always has to reprogram the period, so ignore
 195         * PERF_EF_RELOAD, see the comment below.
 196         */
 197        if (flags & PERF_EF_RELOAD)
 198                WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
 199
 200        hwc->state = 0;
 201        /*
 202         * Set the period again. Some counters can't be stopped, so when we
 203         * were stopped we simply disabled the IRQ source and the counter
 204         * may have been left counting. If we don't do this step then we may
 205         * get an interrupt too soon or *way* too late if the overflow has
 206         * happened since disabling.
 207         */
 208        armpmu_event_set_period(event);
 209        armpmu->enable(event);
 210}
 211
 212static void
 213armpmu_del(struct perf_event *event, int flags)
 214{
 215        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 216        struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 217        struct hw_perf_event *hwc = &event->hw;
 218        int idx = hwc->idx;
 219
 220        armpmu_stop(event, PERF_EF_UPDATE);
 221        hw_events->events[idx] = NULL;
 222        clear_bit(idx, hw_events->used_mask);
 223        if (armpmu->clear_event_idx)
 224                armpmu->clear_event_idx(hw_events, event);
 225
 226        perf_event_update_userpage(event);
 227}
 228
 229static int
 230armpmu_add(struct perf_event *event, int flags)
 231{
 232        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 233        struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 234        struct hw_perf_event *hwc = &event->hw;
 235        int idx;
 236        int err = 0;
 237
 238        /* An event following a process won't be stopped earlier */
 239        if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
 240                return -ENOENT;
 241
 242        perf_pmu_disable(event->pmu);
 243
 244        /* If we don't have a space for the counter then finish early. */
 245        idx = armpmu->get_event_idx(hw_events, event);
 246        if (idx < 0) {
 247                err = idx;
 248                goto out;
 249        }
 250
 251        /*
 252         * If there is an event in the counter we are going to use then make
 253         * sure it is disabled.
 254         */
 255        event->hw.idx = idx;
 256        armpmu->disable(event);
 257        hw_events->events[idx] = event;
 258
 259        hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
 260        if (flags & PERF_EF_START)
 261                armpmu_start(event, PERF_EF_RELOAD);
 262
 263        /* Propagate our changes to the userspace mapping. */
 264        perf_event_update_userpage(event);
 265
 266out:
 267        perf_pmu_enable(event->pmu);
 268        return err;
 269}
 270
 271static int
 272validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
 273                               struct perf_event *event)
 274{
 275        struct arm_pmu *armpmu;
 276
 277        if (is_software_event(event))
 278                return 1;
 279
 280        /*
 281         * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
 282         * core perf code won't check that the pmu->ctx == leader->ctx
 283         * until after pmu->event_init(event).
 284         */
 285        if (event->pmu != pmu)
 286                return 0;
 287
 288        if (event->state < PERF_EVENT_STATE_OFF)
 289                return 1;
 290
 291        if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
 292                return 1;
 293
 294        armpmu = to_arm_pmu(event->pmu);
 295        return armpmu->get_event_idx(hw_events, event) >= 0;
 296}
 297
 298static int
 299validate_group(struct perf_event *event)
 300{
 301        struct perf_event *sibling, *leader = event->group_leader;
 302        struct pmu_hw_events fake_pmu;
 303
 304        /*
 305         * Initialise the fake PMU. We only need to populate the
 306         * used_mask for the purposes of validation.
 307         */
 308        memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask));
 309
 310        if (!validate_event(event->pmu, &fake_pmu, leader))
 311                return -EINVAL;
 312
 313        list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
 314                if (!validate_event(event->pmu, &fake_pmu, sibling))
 315                        return -EINVAL;
 316        }
 317
 318        if (!validate_event(event->pmu, &fake_pmu, event))
 319                return -EINVAL;
 320
 321        return 0;
 322}
 323
 324static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
 325{
 326        struct arm_pmu *armpmu;
 327        struct platform_device *plat_device;
 328        struct arm_pmu_platdata *plat;
 329        int ret;
 330        u64 start_clock, finish_clock;
 331
 332        /*
 333         * we request the IRQ with a (possibly percpu) struct arm_pmu**, but
 334         * the handlers expect a struct arm_pmu*. The percpu_irq framework will
 335         * do any necessary shifting, we just need to perform the first
 336         * dereference.
 337         */
 338        armpmu = *(void **)dev;
 339        plat_device = armpmu->plat_device;
 340        plat = dev_get_platdata(&plat_device->dev);
 341
 342        start_clock = sched_clock();
 343        if (plat && plat->handle_irq)
 344                ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq);
 345        else
 346                ret = armpmu->handle_irq(irq, armpmu);
 347        finish_clock = sched_clock();
 348
 349        perf_sample_event_took(finish_clock - start_clock);
 350        return ret;
 351}
 352
 353static void
 354armpmu_release_hardware(struct arm_pmu *armpmu)
 355{
 356        armpmu->free_irq(armpmu);
 357}
 358
 359static int
 360armpmu_reserve_hardware(struct arm_pmu *armpmu)
 361{
 362        int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);
 363        if (err) {
 364                armpmu_release_hardware(armpmu);
 365                return err;
 366        }
 367
 368        return 0;
 369}
 370
 371static void
 372hw_perf_event_destroy(struct perf_event *event)
 373{
 374        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 375        atomic_t *active_events  = &armpmu->active_events;
 376        struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
 377
 378        if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
 379                armpmu_release_hardware(armpmu);
 380                mutex_unlock(pmu_reserve_mutex);
 381        }
 382}
 383
 384static int
 385event_requires_mode_exclusion(struct perf_event_attr *attr)
 386{
 387        return attr->exclude_idle || attr->exclude_user ||
 388               attr->exclude_kernel || attr->exclude_hv;
 389}
 390
 391static int
 392__hw_perf_event_init(struct perf_event *event)
 393{
 394        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 395        struct hw_perf_event *hwc = &event->hw;
 396        int mapping;
 397
 398        mapping = armpmu->map_event(event);
 399
 400        if (mapping < 0) {
 401                pr_debug("event %x:%llx not supported\n", event->attr.type,
 402                         event->attr.config);
 403                return mapping;
 404        }
 405
 406        /*
 407         * We don't assign an index until we actually place the event onto
 408         * hardware. Use -1 to signify that we haven't decided where to put it
 409         * yet. For SMP systems, each core has it's own PMU so we can't do any
 410         * clever allocation or constraints checking at this point.
 411         */
 412        hwc->idx                = -1;
 413        hwc->config_base        = 0;
 414        hwc->config             = 0;
 415        hwc->event_base         = 0;
 416
 417        /*
 418         * Check whether we need to exclude the counter from certain modes.
 419         */
 420        if ((!armpmu->set_event_filter ||
 421             armpmu->set_event_filter(hwc, &event->attr)) &&
 422             event_requires_mode_exclusion(&event->attr)) {
 423                pr_debug("ARM performance counters do not support "
 424                         "mode exclusion\n");
 425                return -EOPNOTSUPP;
 426        }
 427
 428        /*
 429         * Store the event encoding into the config_base field.
 430         */
 431        hwc->config_base            |= (unsigned long)mapping;
 432
 433        if (!is_sampling_event(event)) {
 434                /*
 435                 * For non-sampling runs, limit the sample_period to half
 436                 * of the counter width. That way, the new counter value
 437                 * is far less likely to overtake the previous one unless
 438                 * you have some serious IRQ latency issues.
 439                 */
 440                hwc->sample_period  = armpmu->max_period >> 1;
 441                hwc->last_period    = hwc->sample_period;
 442                local64_set(&hwc->period_left, hwc->sample_period);
 443        }
 444
 445        if (event->group_leader != event) {
 446                if (validate_group(event) != 0)
 447                        return -EINVAL;
 448        }
 449
 450        return 0;
 451}
 452
 453static int armpmu_event_init(struct perf_event *event)
 454{
 455        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 456        int err = 0;
 457        atomic_t *active_events = &armpmu->active_events;
 458
 459        /*
 460         * Reject CPU-affine events for CPUs that are of a different class to
 461         * that which this PMU handles. Process-following events (where
 462         * event->cpu == -1) can be migrated between CPUs, and thus we have to
 463         * reject them later (in armpmu_add) if they're scheduled on a
 464         * different class of CPU.
 465         */
 466        if (event->cpu != -1 &&
 467                !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus))
 468                return -ENOENT;
 469
 470        /* does not support taken branch sampling */
 471        if (has_branch_stack(event))
 472                return -EOPNOTSUPP;
 473
 474        if (armpmu->map_event(event) == -ENOENT)
 475                return -ENOENT;
 476
 477        event->destroy = hw_perf_event_destroy;
 478
 479        if (!atomic_inc_not_zero(active_events)) {
 480                mutex_lock(&armpmu->reserve_mutex);
 481                if (atomic_read(active_events) == 0)
 482                        err = armpmu_reserve_hardware(armpmu);
 483
 484                if (!err)
 485                        atomic_inc(active_events);
 486                mutex_unlock(&armpmu->reserve_mutex);
 487        }
 488
 489        if (err)
 490                return err;
 491
 492        err = __hw_perf_event_init(event);
 493        if (err)
 494                hw_perf_event_destroy(event);
 495
 496        return err;
 497}
 498
 499static void armpmu_enable(struct pmu *pmu)
 500{
 501        struct arm_pmu *armpmu = to_arm_pmu(pmu);
 502        struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 503        int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
 504
 505        /* For task-bound events we may be called on other CPUs */
 506        if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
 507                return;
 508
 509        if (enabled)
 510                armpmu->start(armpmu);
 511}
 512
 513static void armpmu_disable(struct pmu *pmu)
 514{
 515        struct arm_pmu *armpmu = to_arm_pmu(pmu);
 516
 517        /* For task-bound events we may be called on other CPUs */
 518        if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
 519                return;
 520
 521        armpmu->stop(armpmu);
 522}
 523
 524/*
 525 * In heterogeneous systems, events are specific to a particular
 526 * microarchitecture, and aren't suitable for another. Thus, only match CPUs of
 527 * the same microarchitecture.
 528 */
 529static int armpmu_filter_match(struct perf_event *event)
 530{
 531        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 532        unsigned int cpu = smp_processor_id();
 533        return cpumask_test_cpu(cpu, &armpmu->supported_cpus);
 534}
 535
 536static void armpmu_init(struct arm_pmu *armpmu)
 537{
 538        atomic_set(&armpmu->active_events, 0);
 539        mutex_init(&armpmu->reserve_mutex);
 540
 541        armpmu->pmu = (struct pmu) {
 542                .pmu_enable     = armpmu_enable,
 543                .pmu_disable    = armpmu_disable,
 544                .event_init     = armpmu_event_init,
 545                .add            = armpmu_add,
 546                .del            = armpmu_del,
 547                .start          = armpmu_start,
 548                .stop           = armpmu_stop,
 549                .read           = armpmu_read,
 550                .filter_match   = armpmu_filter_match,
 551        };
 552}
 553
 554/* Set at runtime when we know what CPU type we are. */
 555static struct arm_pmu *__oprofile_cpu_pmu;
 556
 557/*
 558 * Despite the names, these two functions are CPU-specific and are used
 559 * by the OProfile/perf code.
 560 */
 561const char *perf_pmu_name(void)
 562{
 563        if (!__oprofile_cpu_pmu)
 564                return NULL;
 565
 566        return __oprofile_cpu_pmu->name;
 567}
 568EXPORT_SYMBOL_GPL(perf_pmu_name);
 569
 570int perf_num_counters(void)
 571{
 572        int max_events = 0;
 573
 574        if (__oprofile_cpu_pmu != NULL)
 575                max_events = __oprofile_cpu_pmu->num_events;
 576
 577        return max_events;
 578}
 579EXPORT_SYMBOL_GPL(perf_num_counters);
 580
 581static void cpu_pmu_enable_percpu_irq(void *data)
 582{
 583        int irq = *(int *)data;
 584
 585        enable_percpu_irq(irq, IRQ_TYPE_NONE);
 586}
 587
 588static void cpu_pmu_disable_percpu_irq(void *data)
 589{
 590        int irq = *(int *)data;
 591
 592        disable_percpu_irq(irq);
 593}
 594
 595static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
 596{
 597        int i, irq, irqs;
 598        struct platform_device *pmu_device = cpu_pmu->plat_device;
 599        struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
 600
 601        irqs = min(pmu_device->num_resources, num_possible_cpus());
 602
 603        irq = platform_get_irq(pmu_device, 0);
 604        if (irq >= 0 && irq_is_percpu(irq)) {
 605                on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
 606                free_percpu_irq(irq, &hw_events->percpu_pmu);
 607        } else {
 608                for (i = 0; i < irqs; ++i) {
 609                        int cpu = i;
 610
 611                        if (cpu_pmu->irq_affinity)
 612                                cpu = cpu_pmu->irq_affinity[i];
 613
 614                        if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
 615                                continue;
 616                        irq = platform_get_irq(pmu_device, i);
 617                        if (irq >= 0)
 618                                free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
 619                }
 620        }
 621}
 622
 623static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 624{
 625        int i, err, irq, irqs;
 626        struct platform_device *pmu_device = cpu_pmu->plat_device;
 627        struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
 628
 629        if (!pmu_device)
 630                return -ENODEV;
 631
 632        irqs = min(pmu_device->num_resources, num_possible_cpus());
 633        if (irqs < 1) {
 634                pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
 635                return 0;
 636        }
 637
 638        irq = platform_get_irq(pmu_device, 0);
 639        if (irq >= 0 && irq_is_percpu(irq)) {
 640                err = request_percpu_irq(irq, handler, "arm-pmu",
 641                                         &hw_events->percpu_pmu);
 642                if (err) {
 643                        pr_err("unable to request IRQ%d for ARM PMU counters\n",
 644                                irq);
 645                        return err;
 646                }
 647                on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1);
 648        } else {
 649                for (i = 0; i < irqs; ++i) {
 650                        int cpu = i;
 651
 652                        err = 0;
 653                        irq = platform_get_irq(pmu_device, i);
 654                        if (irq < 0)
 655                                continue;
 656
 657                        if (cpu_pmu->irq_affinity)
 658                                cpu = cpu_pmu->irq_affinity[i];
 659
 660                        /*
 661                         * If we have a single PMU interrupt that we can't shift,
 662                         * assume that we're running on a uniprocessor machine and
 663                         * continue. Otherwise, continue without this interrupt.
 664                         */
 665                        if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
 666                                pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
 667                                        irq, cpu);
 668                                continue;
 669                        }
 670
 671                        err = request_irq(irq, handler,
 672                                          IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
 673                                          per_cpu_ptr(&hw_events->percpu_pmu, cpu));
 674                        if (err) {
 675                                pr_err("unable to request IRQ%d for ARM PMU counters\n",
 676                                        irq);
 677                                return err;
 678                        }
 679
 680                        cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
 681                }
 682        }
 683
 684        return 0;
 685}
 686
 687/*
 688 * PMU hardware loses all context when a CPU goes offline.
 689 * When a CPU is hotplugged back in, since some hardware registers are
 690 * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
 691 * junk values out of them.
 692 */
 693static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
 694                          void *hcpu)
 695{
 696        int cpu = (unsigned long)hcpu;
 697        struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb);
 698
 699        if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
 700                return NOTIFY_DONE;
 701
 702        if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
 703                return NOTIFY_DONE;
 704
 705        if (pmu->reset)
 706                pmu->reset(pmu);
 707        else
 708                return NOTIFY_DONE;
 709
 710        return NOTIFY_OK;
 711}
 712
 713static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 714{
 715        int err;
 716        int cpu;
 717        struct pmu_hw_events __percpu *cpu_hw_events;
 718
 719        cpu_hw_events = alloc_percpu(struct pmu_hw_events);
 720        if (!cpu_hw_events)
 721                return -ENOMEM;
 722
 723        cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify;
 724        err = register_cpu_notifier(&cpu_pmu->hotplug_nb);
 725        if (err)
 726                goto out_hw_events;
 727
 728        for_each_possible_cpu(cpu) {
 729                struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
 730                raw_spin_lock_init(&events->pmu_lock);
 731                events->percpu_pmu = cpu_pmu;
 732        }
 733
 734        cpu_pmu->hw_events      = cpu_hw_events;
 735        cpu_pmu->request_irq    = cpu_pmu_request_irq;
 736        cpu_pmu->free_irq       = cpu_pmu_free_irq;
 737
 738        /* Ensure the PMU has sane values out of reset. */
 739        if (cpu_pmu->reset)
 740                on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset,
 741                         cpu_pmu, 1);
 742
 743        /* If no interrupts available, set the corresponding capability flag */
 744        if (!platform_get_irq(cpu_pmu->plat_device, 0))
 745                cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 746
 747        return 0;
 748
 749out_hw_events:
 750        free_percpu(cpu_hw_events);
 751        return err;
 752}
 753
 754static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
 755{
 756        unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
 757        free_percpu(cpu_pmu->hw_events);
 758}
 759
 760/*
 761 * CPU PMU identification and probing.
 762 */
 763static int probe_current_pmu(struct arm_pmu *pmu,
 764                             const struct pmu_probe_info *info)
 765{
 766        int cpu = get_cpu();
 767        unsigned int cpuid = read_cpuid_id();
 768        int ret = -ENODEV;
 769
 770        pr_info("probing PMU on CPU %d\n", cpu);
 771
 772        for (; info->init != NULL; info++) {
 773                if ((cpuid & info->mask) != info->cpuid)
 774                        continue;
 775                ret = info->init(pmu);
 776                break;
 777        }
 778
 779        put_cpu();
 780        return ret;
 781}
 782
 783static int of_pmu_irq_cfg(struct arm_pmu *pmu)
 784{
 785        int *irqs, i = 0;
 786        bool using_spi = false;
 787        struct platform_device *pdev = pmu->plat_device;
 788
 789        irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
 790        if (!irqs)
 791                return -ENOMEM;
 792
 793        do {
 794                struct device_node *dn;
 795                int cpu, irq;
 796
 797                /* See if we have an affinity entry */
 798                dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", i);
 799                if (!dn)
 800                        break;
 801
 802                /* Check the IRQ type and prohibit a mix of PPIs and SPIs */
 803                irq = platform_get_irq(pdev, i);
 804                if (irq >= 0) {
 805                        bool spi = !irq_is_percpu(irq);
 806
 807                        if (i > 0 && spi != using_spi) {
 808                                pr_err("PPI/SPI IRQ type mismatch for %s!\n",
 809                                        dn->name);
 810                                kfree(irqs);
 811                                return -EINVAL;
 812                        }
 813
 814                        using_spi = spi;
 815                }
 816
 817                /* Now look up the logical CPU number */
 818                for_each_possible_cpu(cpu) {
 819                        struct device_node *cpu_dn;
 820
 821                        cpu_dn = of_cpu_device_node_get(cpu);
 822                        of_node_put(cpu_dn);
 823
 824                        if (dn == cpu_dn)
 825                                break;
 826                }
 827
 828                if (cpu >= nr_cpu_ids) {
 829                        pr_warn("Failed to find logical CPU for %s\n",
 830                                dn->name);
 831                        of_node_put(dn);
 832                        cpumask_setall(&pmu->supported_cpus);
 833                        break;
 834                }
 835                of_node_put(dn);
 836
 837                /* For SPIs, we need to track the affinity per IRQ */
 838                if (using_spi) {
 839                        if (i >= pdev->num_resources) {
 840                                of_node_put(dn);
 841                                break;
 842                        }
 843
 844                        irqs[i] = cpu;
 845                }
 846
 847                /* Keep track of the CPUs containing this PMU type */
 848                cpumask_set_cpu(cpu, &pmu->supported_cpus);
 849                of_node_put(dn);
 850                i++;
 851        } while (1);
 852
 853        /* If we didn't manage to parse anything, claim to support all CPUs */
 854        if (cpumask_weight(&pmu->supported_cpus) == 0)
 855                cpumask_setall(&pmu->supported_cpus);
 856
 857        /* If we matched up the IRQ affinities, use them to route the SPIs */
 858        if (using_spi && i == pdev->num_resources)
 859                pmu->irq_affinity = irqs;
 860        else
 861                kfree(irqs);
 862
 863        return 0;
 864}
 865
 866int arm_pmu_device_probe(struct platform_device *pdev,
 867                         const struct of_device_id *of_table,
 868                         const struct pmu_probe_info *probe_table)
 869{
 870        const struct of_device_id *of_id;
 871        const int (*init_fn)(struct arm_pmu *);
 872        struct device_node *node = pdev->dev.of_node;
 873        struct arm_pmu *pmu;
 874        int ret = -ENODEV;
 875
 876        pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
 877        if (!pmu) {
 878                pr_info("failed to allocate PMU device!\n");
 879                return -ENOMEM;
 880        }
 881
 882        armpmu_init(pmu);
 883
 884        if (!__oprofile_cpu_pmu)
 885                __oprofile_cpu_pmu = pmu;
 886
 887        pmu->plat_device = pdev;
 888
 889        if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) {
 890                init_fn = of_id->data;
 891
 892                ret = of_pmu_irq_cfg(pmu);
 893                if (!ret)
 894                        ret = init_fn(pmu);
 895        } else {
 896                ret = probe_current_pmu(pmu, probe_table);
 897                cpumask_setall(&pmu->supported_cpus);
 898        }
 899
 900        if (ret) {
 901                pr_info("failed to probe PMU!\n");
 902                goto out_free;
 903        }
 904
 905        ret = cpu_pmu_init(pmu);
 906        if (ret)
 907                goto out_free;
 908
 909        ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
 910        if (ret)
 911                goto out_destroy;
 912
 913        pr_info("enabled with %s PMU driver, %d counters available\n",
 914                        pmu->name, pmu->num_events);
 915
 916        return 0;
 917
 918out_destroy:
 919        cpu_pmu_destroy(pmu);
 920out_free:
 921        pr_info("failed to register PMU devices!\n");
 922        kfree(pmu);
 923        return ret;
 924}
 925