linux/arch/arm64/kernel/perf_event.c
<<
>>
Prefs
   1/*
   2 * PMU support
   3 *
   4 * Copyright (C) 2012 ARM Limited
   5 * Author: Will Deacon <will.deacon@arm.com>
   6 *
   7 * This code is based heavily on the ARMv7 perf event code.
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of the GNU General Public License version 2 as
  11 * published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 * GNU General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU General Public License
  19 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  20 */
  21#define pr_fmt(fmt) "hw perfevents: " fmt
  22
  23#include <linux/bitmap.h>
  24#include <linux/interrupt.h>
  25#include <linux/irq.h>
  26#include <linux/kernel.h>
  27#include <linux/export.h>
  28#include <linux/perf_event.h>
  29#include <linux/platform_device.h>
  30#include <linux/spinlock.h>
  31#include <linux/uaccess.h>
  32
  33#include <asm/cputype.h>
  34#include <asm/irq.h>
  35#include <asm/irq_regs.h>
  36#include <asm/pmu.h>
  37#include <asm/stacktrace.h>
  38
  39/*
  40 * ARMv8 supports a maximum of 32 events.
  41 * The cycle counter is included in this total.
  42 */
  43#define ARMPMU_MAX_HWEVENTS             32
  44
  45static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
  46static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
  47static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
  48
  49#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
  50
  51/* Set at runtime when we know what CPU type we are. */
  52static struct arm_pmu *cpu_pmu;
  53
  54int
  55armpmu_get_max_events(void)
  56{
  57        int max_events = 0;
  58
  59        if (cpu_pmu != NULL)
  60                max_events = cpu_pmu->num_events;
  61
  62        return max_events;
  63}
  64EXPORT_SYMBOL_GPL(armpmu_get_max_events);
  65
  66int perf_num_counters(void)
  67{
  68        return armpmu_get_max_events();
  69}
  70EXPORT_SYMBOL_GPL(perf_num_counters);
  71
  72#define HW_OP_UNSUPPORTED               0xFFFF
  73
  74#define C(_x) \
  75        PERF_COUNT_HW_CACHE_##_x
  76
  77#define CACHE_OP_UNSUPPORTED            0xFFFF
  78
  79static int
  80armpmu_map_cache_event(const unsigned (*cache_map)
  81                                      [PERF_COUNT_HW_CACHE_MAX]
  82                                      [PERF_COUNT_HW_CACHE_OP_MAX]
  83                                      [PERF_COUNT_HW_CACHE_RESULT_MAX],
  84                       u64 config)
  85{
  86        unsigned int cache_type, cache_op, cache_result, ret;
  87
  88        cache_type = (config >>  0) & 0xff;
  89        if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
  90                return -EINVAL;
  91
  92        cache_op = (config >>  8) & 0xff;
  93        if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
  94                return -EINVAL;
  95
  96        cache_result = (config >> 16) & 0xff;
  97        if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
  98                return -EINVAL;
  99
 100        ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
 101
 102        if (ret == CACHE_OP_UNSUPPORTED)
 103                return -ENOENT;
 104
 105        return ret;
 106}
 107
 108static int
 109armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
 110{
 111        int mapping;
 112
 113        if (config >= PERF_COUNT_HW_MAX)
 114                return -EINVAL;
 115
 116        mapping = (*event_map)[config];
 117        return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
 118}
 119
 120static int
 121armpmu_map_raw_event(u32 raw_event_mask, u64 config)
 122{
 123        return (int)(config & raw_event_mask);
 124}
 125
 126static int map_cpu_event(struct perf_event *event,
 127                         const unsigned (*event_map)[PERF_COUNT_HW_MAX],
 128                         const unsigned (*cache_map)
 129                                        [PERF_COUNT_HW_CACHE_MAX]
 130                                        [PERF_COUNT_HW_CACHE_OP_MAX]
 131                                        [PERF_COUNT_HW_CACHE_RESULT_MAX],
 132                         u32 raw_event_mask)
 133{
 134        u64 config = event->attr.config;
 135
 136        switch (event->attr.type) {
 137        case PERF_TYPE_HARDWARE:
 138                return armpmu_map_event(event_map, config);
 139        case PERF_TYPE_HW_CACHE:
 140                return armpmu_map_cache_event(cache_map, config);
 141        case PERF_TYPE_RAW:
 142                return armpmu_map_raw_event(raw_event_mask, config);
 143        }
 144
 145        return -ENOENT;
 146}
 147
 148int
 149armpmu_event_set_period(struct perf_event *event,
 150                        struct hw_perf_event *hwc,
 151                        int idx)
 152{
 153        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 154        s64 left = local64_read(&hwc->period_left);
 155        s64 period = hwc->sample_period;
 156        int ret = 0;
 157
 158        if (unlikely(left <= -period)) {
 159                left = period;
 160                local64_set(&hwc->period_left, left);
 161                hwc->last_period = period;
 162                ret = 1;
 163        }
 164
 165        if (unlikely(left <= 0)) {
 166                left += period;
 167                local64_set(&hwc->period_left, left);
 168                hwc->last_period = period;
 169                ret = 1;
 170        }
 171
 172        if (left > (s64)armpmu->max_period)
 173                left = armpmu->max_period;
 174
 175        local64_set(&hwc->prev_count, (u64)-left);
 176
 177        armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
 178
 179        perf_event_update_userpage(event);
 180
 181        return ret;
 182}
 183
 184u64
 185armpmu_event_update(struct perf_event *event,
 186                    struct hw_perf_event *hwc,
 187                    int idx)
 188{
 189        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 190        u64 delta, prev_raw_count, new_raw_count;
 191
 192again:
 193        prev_raw_count = local64_read(&hwc->prev_count);
 194        new_raw_count = armpmu->read_counter(idx);
 195
 196        if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 197                             new_raw_count) != prev_raw_count)
 198                goto again;
 199
 200        delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
 201
 202        local64_add(delta, &event->count);
 203        local64_sub(delta, &hwc->period_left);
 204
 205        return new_raw_count;
 206}
 207
 208static void
 209armpmu_read(struct perf_event *event)
 210{
 211        struct hw_perf_event *hwc = &event->hw;
 212
 213        /* Don't read disabled counters! */
 214        if (hwc->idx < 0)
 215                return;
 216
 217        armpmu_event_update(event, hwc, hwc->idx);
 218}
 219
 220static void
 221armpmu_stop(struct perf_event *event, int flags)
 222{
 223        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 224        struct hw_perf_event *hwc = &event->hw;
 225
 226        /*
 227         * ARM pmu always has to update the counter, so ignore
 228         * PERF_EF_UPDATE, see comments in armpmu_start().
 229         */
 230        if (!(hwc->state & PERF_HES_STOPPED)) {
 231                armpmu->disable(hwc, hwc->idx);
 232                barrier(); /* why? */
 233                armpmu_event_update(event, hwc, hwc->idx);
 234                hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
 235        }
 236}
 237
 238static void
 239armpmu_start(struct perf_event *event, int flags)
 240{
 241        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 242        struct hw_perf_event *hwc = &event->hw;
 243
 244        /*
 245         * ARM pmu always has to reprogram the period, so ignore
 246         * PERF_EF_RELOAD, see the comment below.
 247         */
 248        if (flags & PERF_EF_RELOAD)
 249                WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
 250
 251        hwc->state = 0;
 252        /*
 253         * Set the period again. Some counters can't be stopped, so when we
 254         * were stopped we simply disabled the IRQ source and the counter
 255         * may have been left counting. If we don't do this step then we may
 256         * get an interrupt too soon or *way* too late if the overflow has
 257         * happened since disabling.
 258         */
 259        armpmu_event_set_period(event, hwc, hwc->idx);
 260        armpmu->enable(hwc, hwc->idx);
 261}
 262
 263static void
 264armpmu_del(struct perf_event *event, int flags)
 265{
 266        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 267        struct pmu_hw_events *hw_events = armpmu->get_hw_events();
 268        struct hw_perf_event *hwc = &event->hw;
 269        int idx = hwc->idx;
 270
 271        WARN_ON(idx < 0);
 272
 273        armpmu_stop(event, PERF_EF_UPDATE);
 274        hw_events->events[idx] = NULL;
 275        clear_bit(idx, hw_events->used_mask);
 276
 277        perf_event_update_userpage(event);
 278}
 279
 280static int
 281armpmu_add(struct perf_event *event, int flags)
 282{
 283        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 284        struct pmu_hw_events *hw_events = armpmu->get_hw_events();
 285        struct hw_perf_event *hwc = &event->hw;
 286        int idx;
 287        int err = 0;
 288
 289        perf_pmu_disable(event->pmu);
 290
 291        /* If we don't have a space for the counter then finish early. */
 292        idx = armpmu->get_event_idx(hw_events, hwc);
 293        if (idx < 0) {
 294                err = idx;
 295                goto out;
 296        }
 297
 298        /*
 299         * If there is an event in the counter we are going to use then make
 300         * sure it is disabled.
 301         */
 302        event->hw.idx = idx;
 303        armpmu->disable(hwc, idx);
 304        hw_events->events[idx] = event;
 305
 306        hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
 307        if (flags & PERF_EF_START)
 308                armpmu_start(event, PERF_EF_RELOAD);
 309
 310        /* Propagate our changes to the userspace mapping. */
 311        perf_event_update_userpage(event);
 312
 313out:
 314        perf_pmu_enable(event->pmu);
 315        return err;
 316}
 317
 318static int
 319validate_event(struct pmu_hw_events *hw_events,
 320               struct perf_event *event)
 321{
 322        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 323        struct hw_perf_event fake_event = event->hw;
 324        struct pmu *leader_pmu = event->group_leader->pmu;
 325
 326        if (is_software_event(event))
 327                return 1;
 328
 329        if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF)
 330                return 1;
 331
 332        if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
 333                return 1;
 334
 335        return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
 336}
 337
 338static int
 339validate_group(struct perf_event *event)
 340{
 341        struct perf_event *sibling, *leader = event->group_leader;
 342        struct pmu_hw_events fake_pmu;
 343        DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS);
 344
 345        /*
 346         * Initialise the fake PMU. We only need to populate the
 347         * used_mask for the purposes of validation.
 348         */
 349        memset(fake_used_mask, 0, sizeof(fake_used_mask));
 350        fake_pmu.used_mask = fake_used_mask;
 351
 352        if (!validate_event(&fake_pmu, leader))
 353                return -EINVAL;
 354
 355        list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
 356                if (!validate_event(&fake_pmu, sibling))
 357                        return -EINVAL;
 358        }
 359
 360        if (!validate_event(&fake_pmu, event))
 361                return -EINVAL;
 362
 363        return 0;
 364}
 365
 366static void
 367armpmu_disable_percpu_irq(void *data)
 368{
 369        unsigned int irq = *(unsigned int *)data;
 370        disable_percpu_irq(irq);
 371}
 372
 373static void
 374armpmu_release_hardware(struct arm_pmu *armpmu)
 375{
 376        int irq;
 377        unsigned int i, irqs;
 378        struct platform_device *pmu_device = armpmu->plat_device;
 379
 380        irqs = min(pmu_device->num_resources, num_possible_cpus());
 381        if (!irqs)
 382                return;
 383
 384        irq = platform_get_irq(pmu_device, 0);
 385        if (irq <= 0)
 386                return;
 387
 388        if (irq_is_percpu(irq)) {
 389                on_each_cpu(armpmu_disable_percpu_irq, &irq, 1);
 390                free_percpu_irq(irq, &cpu_hw_events);
 391        } else {
 392                for (i = 0; i < irqs; ++i) {
 393                        if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs))
 394                                continue;
 395                        irq = platform_get_irq(pmu_device, i);
 396                        if (irq > 0)
 397                                free_irq(irq, armpmu);
 398                }
 399        }
 400}
 401
 402static void
 403armpmu_enable_percpu_irq(void *data)
 404{
 405        unsigned int irq = *(unsigned int *)data;
 406        enable_percpu_irq(irq, IRQ_TYPE_NONE);
 407}
 408
 409static int
 410armpmu_reserve_hardware(struct arm_pmu *armpmu)
 411{
 412        int err, irq;
 413        unsigned int i, irqs;
 414        struct platform_device *pmu_device = armpmu->plat_device;
 415
 416        if (!pmu_device) {
 417                pr_err("no PMU device registered\n");
 418                return -ENODEV;
 419        }
 420
 421        irqs = min(pmu_device->num_resources, num_possible_cpus());
 422        if (!irqs) {
 423                pr_err("no irqs for PMUs defined\n");
 424                return -ENODEV;
 425        }
 426
 427        irq = platform_get_irq(pmu_device, 0);
 428        if (irq <= 0) {
 429                pr_err("failed to get valid irq for PMU device\n");
 430                return -ENODEV;
 431        }
 432
 433        if (irq_is_percpu(irq)) {
 434                err = request_percpu_irq(irq, armpmu->handle_irq,
 435                                "arm-pmu", &cpu_hw_events);
 436
 437                if (err) {
 438                        pr_err("unable to request percpu IRQ%d for ARM PMU counters\n",
 439                                        irq);
 440                        armpmu_release_hardware(armpmu);
 441                        return err;
 442                }
 443
 444                on_each_cpu(armpmu_enable_percpu_irq, &irq, 1);
 445        } else {
 446                for (i = 0; i < irqs; ++i) {
 447                        err = 0;
 448                        irq = platform_get_irq(pmu_device, i);
 449                        if (irq <= 0)
 450                                continue;
 451
 452                        /*
 453                         * If we have a single PMU interrupt that we can't shift,
 454                         * assume that we're running on a uniprocessor machine and
 455                         * continue. Otherwise, continue without this interrupt.
 456                         */
 457                        if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
 458                                pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
 459                                                irq, i);
 460                                continue;
 461                        }
 462
 463                        err = request_irq(irq, armpmu->handle_irq,
 464                                        IRQF_NOBALANCING,
 465                                        "arm-pmu", armpmu);
 466                        if (err) {
 467                                pr_err("unable to request IRQ%d for ARM PMU counters\n",
 468                                                irq);
 469                                armpmu_release_hardware(armpmu);
 470                                return err;
 471                        }
 472
 473                        cpumask_set_cpu(i, &armpmu->active_irqs);
 474                }
 475        }
 476
 477        return 0;
 478}
 479
 480static void
 481hw_perf_event_destroy(struct perf_event *event)
 482{
 483        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 484        atomic_t *active_events  = &armpmu->active_events;
 485        struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
 486
 487        if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
 488                armpmu_release_hardware(armpmu);
 489                mutex_unlock(pmu_reserve_mutex);
 490        }
 491}
 492
 493static int
 494event_requires_mode_exclusion(struct perf_event_attr *attr)
 495{
 496        return attr->exclude_idle || attr->exclude_user ||
 497               attr->exclude_kernel || attr->exclude_hv;
 498}
 499
 500static int
 501__hw_perf_event_init(struct perf_event *event)
 502{
 503        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 504        struct hw_perf_event *hwc = &event->hw;
 505        int mapping, err;
 506
 507        mapping = armpmu->map_event(event);
 508
 509        if (mapping < 0) {
 510                pr_debug("event %x:%llx not supported\n", event->attr.type,
 511                         event->attr.config);
 512                return mapping;
 513        }
 514
 515        /*
 516         * We don't assign an index until we actually place the event onto
 517         * hardware. Use -1 to signify that we haven't decided where to put it
 518         * yet. For SMP systems, each core has it's own PMU so we can't do any
 519         * clever allocation or constraints checking at this point.
 520         */
 521        hwc->idx                = -1;
 522        hwc->config_base        = 0;
 523        hwc->config             = 0;
 524        hwc->event_base         = 0;
 525
 526        /*
 527         * Check whether we need to exclude the counter from certain modes.
 528         */
 529        if ((!armpmu->set_event_filter ||
 530             armpmu->set_event_filter(hwc, &event->attr)) &&
 531             event_requires_mode_exclusion(&event->attr)) {
 532                pr_debug("ARM performance counters do not support mode exclusion\n");
 533                return -EPERM;
 534        }
 535
 536        /*
 537         * Store the event encoding into the config_base field.
 538         */
 539        hwc->config_base            |= (unsigned long)mapping;
 540
 541        if (!hwc->sample_period) {
 542                /*
 543                 * For non-sampling runs, limit the sample_period to half
 544                 * of the counter width. That way, the new counter value
 545                 * is far less likely to overtake the previous one unless
 546                 * you have some serious IRQ latency issues.
 547                 */
 548                hwc->sample_period  = armpmu->max_period >> 1;
 549                hwc->last_period    = hwc->sample_period;
 550                local64_set(&hwc->period_left, hwc->sample_period);
 551        }
 552
 553        err = 0;
 554        if (event->group_leader != event) {
 555                err = validate_group(event);
 556                if (err)
 557                        return -EINVAL;
 558        }
 559
 560        return err;
 561}
 562
 563static int armpmu_event_init(struct perf_event *event)
 564{
 565        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 566        int err = 0;
 567        atomic_t *active_events = &armpmu->active_events;
 568
 569        if (armpmu->map_event(event) == -ENOENT)
 570                return -ENOENT;
 571
 572        event->destroy = hw_perf_event_destroy;
 573
 574        if (!atomic_inc_not_zero(active_events)) {
 575                mutex_lock(&armpmu->reserve_mutex);
 576                if (atomic_read(active_events) == 0)
 577                        err = armpmu_reserve_hardware(armpmu);
 578
 579                if (!err)
 580                        atomic_inc(active_events);
 581                mutex_unlock(&armpmu->reserve_mutex);
 582        }
 583
 584        if (err)
 585                return err;
 586
 587        err = __hw_perf_event_init(event);
 588        if (err)
 589                hw_perf_event_destroy(event);
 590
 591        return err;
 592}
 593
 594static void armpmu_enable(struct pmu *pmu)
 595{
 596        struct arm_pmu *armpmu = to_arm_pmu(pmu);
 597        struct pmu_hw_events *hw_events = armpmu->get_hw_events();
 598        int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
 599
 600        if (enabled)
 601                armpmu->start();
 602}
 603
 604static void armpmu_disable(struct pmu *pmu)
 605{
 606        struct arm_pmu *armpmu = to_arm_pmu(pmu);
 607        armpmu->stop();
 608}
 609
 610static void __init armpmu_init(struct arm_pmu *armpmu)
 611{
 612        atomic_set(&armpmu->active_events, 0);
 613        mutex_init(&armpmu->reserve_mutex);
 614
 615        armpmu->pmu = (struct pmu) {
 616                .pmu_enable     = armpmu_enable,
 617                .pmu_disable    = armpmu_disable,
 618                .event_init     = armpmu_event_init,
 619                .add            = armpmu_add,
 620                .del            = armpmu_del,
 621                .start          = armpmu_start,
 622                .stop           = armpmu_stop,
 623                .read           = armpmu_read,
 624        };
 625}
 626
 627int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type)
 628{
 629        armpmu_init(armpmu);
 630        return perf_pmu_register(&armpmu->pmu, name, type);
 631}
 632
 633/*
 634 * ARMv8 PMUv3 Performance Events handling code.
 635 * Common event types.
 636 */
 637enum armv8_pmuv3_perf_types {
 638        /* Required events. */
 639        ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR                        = 0x00,
 640        ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL                    = 0x03,
 641        ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS                    = 0x04,
 642        ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED                  = 0x10,
 643        ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES                        = 0x11,
 644        ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED                      = 0x12,
 645
 646        /* At least one of the following is required. */
 647        ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED                      = 0x08,
 648        ARMV8_PMUV3_PERFCTR_OP_SPEC                             = 0x1B,
 649
 650        /* Common architectural events. */
 651        ARMV8_PMUV3_PERFCTR_MEM_READ                            = 0x06,
 652        ARMV8_PMUV3_PERFCTR_MEM_WRITE                           = 0x07,
 653        ARMV8_PMUV3_PERFCTR_EXC_TAKEN                           = 0x09,
 654        ARMV8_PMUV3_PERFCTR_EXC_EXECUTED                        = 0x0A,
 655        ARMV8_PMUV3_PERFCTR_CID_WRITE                           = 0x0B,
 656        ARMV8_PMUV3_PERFCTR_PC_WRITE                            = 0x0C,
 657        ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH                       = 0x0D,
 658        ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN                      = 0x0E,
 659        ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS                = 0x0F,
 660        ARMV8_PMUV3_PERFCTR_TTBR_WRITE                          = 0x1C,
 661
 662        /* Common microarchitectural events. */
 663        ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL                    = 0x01,
 664        ARMV8_PMUV3_PERFCTR_ITLB_REFILL                         = 0x02,
 665        ARMV8_PMUV3_PERFCTR_DTLB_REFILL                         = 0x05,
 666        ARMV8_PMUV3_PERFCTR_MEM_ACCESS                          = 0x13,
 667        ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS                    = 0x14,
 668        ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB                        = 0x15,
 669        ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS                     = 0x16,
 670        ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL                     = 0x17,
 671        ARMV8_PMUV3_PERFCTR_L2_CACHE_WB                         = 0x18,
 672        ARMV8_PMUV3_PERFCTR_BUS_ACCESS                          = 0x19,
 673        ARMV8_PMUV3_PERFCTR_MEM_ERROR                           = 0x1A,
 674        ARMV8_PMUV3_PERFCTR_BUS_CYCLES                          = 0x1D,
 675};
 676
 677/* PMUv3 HW events mapping. */
 678static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
 679        [PERF_COUNT_HW_CPU_CYCLES]              = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
 680        [PERF_COUNT_HW_INSTRUCTIONS]            = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
 681        [PERF_COUNT_HW_CACHE_REFERENCES]        = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
 682        [PERF_COUNT_HW_CACHE_MISSES]            = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
 683        [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = HW_OP_UNSUPPORTED,
 684        [PERF_COUNT_HW_BRANCH_MISSES]           = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
 685        [PERF_COUNT_HW_BUS_CYCLES]              = HW_OP_UNSUPPORTED,
 686        [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
 687        [PERF_COUNT_HW_STALLED_CYCLES_BACKEND]  = HW_OP_UNSUPPORTED,
 688};
 689
 690static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 691                                                [PERF_COUNT_HW_CACHE_OP_MAX]
 692                                                [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
 693        [C(L1D)] = {
 694                [C(OP_READ)] = {
 695                        [C(RESULT_ACCESS)]      = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
 696                        [C(RESULT_MISS)]        = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
 697                },
 698                [C(OP_WRITE)] = {
 699                        [C(RESULT_ACCESS)]      = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
 700                        [C(RESULT_MISS)]        = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
 701                },
 702                [C(OP_PREFETCH)] = {
 703                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
 704                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
 705                },
 706        },
 707        [C(L1I)] = {
 708                [C(OP_READ)] = {
 709                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
 710                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
 711                },
 712                [C(OP_WRITE)] = {
 713                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
 714                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
 715                },
 716                [C(OP_PREFETCH)] = {
 717                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
 718                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
 719                },
 720        },
 721        [C(LL)] = {
 722                [C(OP_READ)] = {
 723                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
 724                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
 725                },
 726                [C(OP_WRITE)] = {
 727                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
 728                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
 729                },
 730                [C(OP_PREFETCH)] = {
 731                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
 732                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
 733                },
 734        },
 735        [C(DTLB)] = {
 736                [C(OP_READ)] = {
 737                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
 738                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
 739                },
 740                [C(OP_WRITE)] = {
 741                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
 742                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
 743                },
 744                [C(OP_PREFETCH)] = {
 745                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
 746                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
 747                },
 748        },
 749        [C(ITLB)] = {
 750                [C(OP_READ)] = {
 751                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
 752                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
 753                },
 754                [C(OP_WRITE)] = {
 755                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
 756                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
 757                },
 758                [C(OP_PREFETCH)] = {
 759                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
 760                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
 761                },
 762        },
 763        [C(BPU)] = {
 764                [C(OP_READ)] = {
 765                        [C(RESULT_ACCESS)]      = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
 766                        [C(RESULT_MISS)]        = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
 767                },
 768                [C(OP_WRITE)] = {
 769                        [C(RESULT_ACCESS)]      = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
 770                        [C(RESULT_MISS)]        = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
 771                },
 772                [C(OP_PREFETCH)] = {
 773                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
 774                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
 775                },
 776        },
 777        [C(NODE)] = {
 778                [C(OP_READ)] = {
 779                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
 780                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
 781                },
 782                [C(OP_WRITE)] = {
 783                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
 784                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
 785                },
 786                [C(OP_PREFETCH)] = {
 787                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
 788                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
 789                },
 790        },
 791};
 792
 793/*
 794 * Perf Events' indices
 795 */
 796#define ARMV8_IDX_CYCLE_COUNTER 0
 797#define ARMV8_IDX_COUNTER0      1
 798#define ARMV8_IDX_COUNTER_LAST  (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
 799
 800#define ARMV8_MAX_COUNTERS      32
 801#define ARMV8_COUNTER_MASK      (ARMV8_MAX_COUNTERS - 1)
 802
 803/*
 804 * ARMv8 low level PMU access
 805 */
 806
 807/*
 808 * Perf Event to low level counters mapping
 809 */
 810#define ARMV8_IDX_TO_COUNTER(x) \
 811        (((x) - ARMV8_IDX_COUNTER0) & ARMV8_COUNTER_MASK)
 812
 813/*
 814 * Per-CPU PMCR: config reg
 815 */
 816#define ARMV8_PMCR_E            (1 << 0) /* Enable all counters */
 817#define ARMV8_PMCR_P            (1 << 1) /* Reset all counters */
 818#define ARMV8_PMCR_C            (1 << 2) /* Cycle counter reset */
 819#define ARMV8_PMCR_D            (1 << 3) /* CCNT counts every 64th cpu cycle */
 820#define ARMV8_PMCR_X            (1 << 4) /* Export to ETM */
 821#define ARMV8_PMCR_DP           (1 << 5) /* Disable CCNT if non-invasive debug*/
 822#define ARMV8_PMCR_N_SHIFT      11       /* Number of counters supported */
 823#define ARMV8_PMCR_N_MASK       0x1f
 824#define ARMV8_PMCR_MASK         0x3f     /* Mask for writable bits */
 825
 826/*
 827 * PMOVSR: counters overflow flag status reg
 828 */
 829#define ARMV8_OVSR_MASK         0xffffffff      /* Mask for writable bits */
 830#define ARMV8_OVERFLOWED_MASK   ARMV8_OVSR_MASK
 831
 832/*
 833 * PMXEVTYPER: Event selection reg
 834 */
 835#define ARMV8_EVTYPE_MASK       0xc80003ff      /* Mask for writable bits */
 836#define ARMV8_EVTYPE_EVENT      0x3ff           /* Mask for EVENT bits */
 837
 838/*
 839 * Event filters for PMUv3
 840 */
 841#define ARMV8_EXCLUDE_EL1       (1 << 31)
 842#define ARMV8_EXCLUDE_EL0       (1 << 30)
 843#define ARMV8_INCLUDE_EL2       (1 << 27)
 844
 845static inline u32 armv8pmu_pmcr_read(void)
 846{
 847        u32 val;
 848        asm volatile("mrs %0, pmcr_el0" : "=r" (val));
 849        return val;
 850}
 851
 852static inline void armv8pmu_pmcr_write(u32 val)
 853{
 854        val &= ARMV8_PMCR_MASK;
 855        isb();
 856        asm volatile("msr pmcr_el0, %0" :: "r" (val));
 857}
 858
 859static inline int armv8pmu_has_overflowed(u32 pmovsr)
 860{
 861        return pmovsr & ARMV8_OVERFLOWED_MASK;
 862}
 863
 864static inline int armv8pmu_counter_valid(int idx)
 865{
 866        return idx >= ARMV8_IDX_CYCLE_COUNTER && idx <= ARMV8_IDX_COUNTER_LAST;
 867}
 868
 869static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
 870{
 871        int ret = 0;
 872        u32 counter;
 873
 874        if (!armv8pmu_counter_valid(idx)) {
 875                pr_err("CPU%u checking wrong counter %d overflow status\n",
 876                        smp_processor_id(), idx);
 877        } else {
 878                counter = ARMV8_IDX_TO_COUNTER(idx);
 879                ret = pmnc & BIT(counter);
 880        }
 881
 882        return ret;
 883}
 884
 885static inline int armv8pmu_select_counter(int idx)
 886{
 887        u32 counter;
 888
 889        if (!armv8pmu_counter_valid(idx)) {
 890                pr_err("CPU%u selecting wrong PMNC counter %d\n",
 891                        smp_processor_id(), idx);
 892                return -EINVAL;
 893        }
 894
 895        counter = ARMV8_IDX_TO_COUNTER(idx);
 896        asm volatile("msr pmselr_el0, %0" :: "r" (counter));
 897        isb();
 898
 899        return idx;
 900}
 901
 902static inline u32 armv8pmu_read_counter(int idx)
 903{
 904        u32 value = 0;
 905
 906        if (!armv8pmu_counter_valid(idx))
 907                pr_err("CPU%u reading wrong counter %d\n",
 908                        smp_processor_id(), idx);
 909        else if (idx == ARMV8_IDX_CYCLE_COUNTER)
 910                asm volatile("mrs %0, pmccntr_el0" : "=r" (value));
 911        else if (armv8pmu_select_counter(idx) == idx)
 912                asm volatile("mrs %0, pmxevcntr_el0" : "=r" (value));
 913
 914        return value;
 915}
 916
 917static inline void armv8pmu_write_counter(int idx, u32 value)
 918{
 919        if (!armv8pmu_counter_valid(idx))
 920                pr_err("CPU%u writing wrong counter %d\n",
 921                        smp_processor_id(), idx);
 922        else if (idx == ARMV8_IDX_CYCLE_COUNTER)
 923                asm volatile("msr pmccntr_el0, %0" :: "r" (value));
 924        else if (armv8pmu_select_counter(idx) == idx)
 925                asm volatile("msr pmxevcntr_el0, %0" :: "r" (value));
 926}
 927
 928static inline void armv8pmu_write_evtype(int idx, u32 val)
 929{
 930        if (armv8pmu_select_counter(idx) == idx) {
 931                val &= ARMV8_EVTYPE_MASK;
 932                asm volatile("msr pmxevtyper_el0, %0" :: "r" (val));
 933        }
 934}
 935
 936static inline int armv8pmu_enable_counter(int idx)
 937{
 938        u32 counter;
 939
 940        if (!armv8pmu_counter_valid(idx)) {
 941                pr_err("CPU%u enabling wrong PMNC counter %d\n",
 942                        smp_processor_id(), idx);
 943                return -EINVAL;
 944        }
 945
 946        counter = ARMV8_IDX_TO_COUNTER(idx);
 947        asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter)));
 948        return idx;
 949}
 950
 951static inline int armv8pmu_disable_counter(int idx)
 952{
 953        u32 counter;
 954
 955        if (!armv8pmu_counter_valid(idx)) {
 956                pr_err("CPU%u disabling wrong PMNC counter %d\n",
 957                        smp_processor_id(), idx);
 958                return -EINVAL;
 959        }
 960
 961        counter = ARMV8_IDX_TO_COUNTER(idx);
 962        asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter)));
 963        return idx;
 964}
 965
 966static inline int armv8pmu_enable_intens(int idx)
 967{
 968        u32 counter;
 969
 970        if (!armv8pmu_counter_valid(idx)) {
 971                pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
 972                        smp_processor_id(), idx);
 973                return -EINVAL;
 974        }
 975
 976        counter = ARMV8_IDX_TO_COUNTER(idx);
 977        asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter)));
 978        return idx;
 979}
 980
 981static inline int armv8pmu_disable_intens(int idx)
 982{
 983        u32 counter;
 984
 985        if (!armv8pmu_counter_valid(idx)) {
 986                pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
 987                        smp_processor_id(), idx);
 988                return -EINVAL;
 989        }
 990
 991        counter = ARMV8_IDX_TO_COUNTER(idx);
 992        asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter)));
 993        isb();
 994        /* Clear the overflow flag in case an interrupt is pending. */
 995        asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter)));
 996        isb();
 997        return idx;
 998}
 999
1000static inline u32 armv8pmu_getreset_flags(void)
1001{
1002        u32 value;
1003
1004        /* Read */
1005        asm volatile("mrs %0, pmovsclr_el0" : "=r" (value));
1006
1007        /* Write to clear flags */
1008        value &= ARMV8_OVSR_MASK;
1009        asm volatile("msr pmovsclr_el0, %0" :: "r" (value));
1010
1011        return value;
1012}
1013
1014static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx)
1015{
1016        unsigned long flags;
1017        struct pmu_hw_events *events = cpu_pmu->get_hw_events();
1018
1019        /*
1020         * Enable counter and interrupt, and set the counter to count
1021         * the event that we're interested in.
1022         */
1023        raw_spin_lock_irqsave(&events->pmu_lock, flags);
1024
1025        /*
1026         * Disable counter
1027         */
1028        armv8pmu_disable_counter(idx);
1029
1030        /*
1031         * Set event (if destined for PMNx counters).
1032         */
1033        armv8pmu_write_evtype(idx, hwc->config_base);
1034
1035        /*
1036         * Enable interrupt for this counter
1037         */
1038        armv8pmu_enable_intens(idx);
1039
1040        /*
1041         * Enable counter
1042         */
1043        armv8pmu_enable_counter(idx);
1044
1045        raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
1046}
1047
1048static void armv8pmu_disable_event(struct hw_perf_event *hwc, int idx)
1049{
1050        unsigned long flags;
1051        struct pmu_hw_events *events = cpu_pmu->get_hw_events();
1052
1053        /*
1054         * Disable counter and interrupt
1055         */
1056        raw_spin_lock_irqsave(&events->pmu_lock, flags);
1057
1058        /*
1059         * Disable counter
1060         */
1061        armv8pmu_disable_counter(idx);
1062
1063        /*
1064         * Disable interrupt for this counter
1065         */
1066        armv8pmu_disable_intens(idx);
1067
1068        raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
1069}
1070
1071static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
1072{
1073        u32 pmovsr;
1074        struct perf_sample_data data;
1075        struct pmu_hw_events *cpuc;
1076        struct pt_regs *regs;
1077        int idx;
1078
1079        /*
1080         * Get and reset the IRQ flags
1081         */
1082        pmovsr = armv8pmu_getreset_flags();
1083
1084        /*
1085         * Did an overflow occur?
1086         */
1087        if (!armv8pmu_has_overflowed(pmovsr))
1088                return IRQ_NONE;
1089
1090        /*
1091         * Handle the counter(s) overflow(s)
1092         */
1093        regs = get_irq_regs();
1094
1095        cpuc = this_cpu_ptr(&cpu_hw_events);
1096        for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
1097                struct perf_event *event = cpuc->events[idx];
1098                struct hw_perf_event *hwc;
1099
1100                /* Ignore if we don't have an event. */
1101                if (!event)
1102                        continue;
1103
1104                /*
1105                 * We have a single interrupt for all counters. Check that
1106                 * each counter has overflowed before we process it.
1107                 */
1108                if (!armv8pmu_counter_has_overflowed(pmovsr, idx))
1109                        continue;
1110
1111                hwc = &event->hw;
1112                armpmu_event_update(event, hwc, idx);
1113                perf_sample_data_init(&data, 0, hwc->last_period);
1114                if (!armpmu_event_set_period(event, hwc, idx))
1115                        continue;
1116
1117                if (perf_event_overflow(event, &data, regs))
1118                        cpu_pmu->disable(hwc, idx);
1119        }
1120
1121        /*
1122         * Handle the pending perf events.
1123         *
1124         * Note: this call *must* be run with interrupts disabled. For
1125         * platforms that can have the PMU interrupts raised as an NMI, this
1126         * will not work.
1127         */
1128        irq_work_run();
1129
1130        return IRQ_HANDLED;
1131}
1132
1133static void armv8pmu_start(void)
1134{
1135        unsigned long flags;
1136        struct pmu_hw_events *events = cpu_pmu->get_hw_events();
1137
1138        raw_spin_lock_irqsave(&events->pmu_lock, flags);
1139        /* Enable all counters */
1140        armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMCR_E);
1141        raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
1142}
1143
1144static void armv8pmu_stop(void)
1145{
1146        unsigned long flags;
1147        struct pmu_hw_events *events = cpu_pmu->get_hw_events();
1148
1149        raw_spin_lock_irqsave(&events->pmu_lock, flags);
1150        /* Disable all counters */
1151        armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMCR_E);
1152        raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
1153}
1154
1155static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
1156                                  struct hw_perf_event *event)
1157{
1158        int idx;
1159        unsigned long evtype = event->config_base & ARMV8_EVTYPE_EVENT;
1160
1161        /* Always place a cycle counter into the cycle counter. */
1162        if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) {
1163                if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
1164                        return -EAGAIN;
1165
1166                return ARMV8_IDX_CYCLE_COUNTER;
1167        }
1168
1169        /*
1170         * For anything other than a cycle counter, try and use
1171         * the events counters
1172         */
1173        for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) {
1174                if (!test_and_set_bit(idx, cpuc->used_mask))
1175                        return idx;
1176        }
1177
1178        /* The counters are all in use. */
1179        return -EAGAIN;
1180}
1181
1182/*
1183 * Add an event filter to a given event. This will only work for PMUv2 PMUs.
1184 */
1185static int armv8pmu_set_event_filter(struct hw_perf_event *event,
1186                                     struct perf_event_attr *attr)
1187{
1188        unsigned long config_base = 0;
1189
1190        if (attr->exclude_idle)
1191                return -EPERM;
1192        if (attr->exclude_user)
1193                config_base |= ARMV8_EXCLUDE_EL0;
1194        if (attr->exclude_kernel)
1195                config_base |= ARMV8_EXCLUDE_EL1;
1196        if (!attr->exclude_hv)
1197                config_base |= ARMV8_INCLUDE_EL2;
1198
1199        /*
1200         * Install the filter into config_base as this is used to
1201         * construct the event type.
1202         */
1203        event->config_base = config_base;
1204
1205        return 0;
1206}
1207
1208static void armv8pmu_reset(void *info)
1209{
1210        u32 idx, nb_cnt = cpu_pmu->num_events;
1211
1212        /* The counter and interrupt enable registers are unknown at reset. */
1213        for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx)
1214                armv8pmu_disable_event(NULL, idx);
1215
1216        /* Initialize & Reset PMNC: C and P bits. */
1217        armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C);
1218
1219        /* Disable access from userspace. */
1220        asm volatile("msr pmuserenr_el0, %0" :: "r" (0));
1221}
1222
1223static int armv8_pmuv3_map_event(struct perf_event *event)
1224{
1225        return map_cpu_event(event, &armv8_pmuv3_perf_map,
1226                                &armv8_pmuv3_perf_cache_map,
1227                                ARMV8_EVTYPE_EVENT);
1228}
1229
1230static struct arm_pmu armv8pmu = {
1231        .handle_irq             = armv8pmu_handle_irq,
1232        .enable                 = armv8pmu_enable_event,
1233        .disable                = armv8pmu_disable_event,
1234        .read_counter           = armv8pmu_read_counter,
1235        .write_counter          = armv8pmu_write_counter,
1236        .get_event_idx          = armv8pmu_get_event_idx,
1237        .start                  = armv8pmu_start,
1238        .stop                   = armv8pmu_stop,
1239        .reset                  = armv8pmu_reset,
1240        .max_period             = (1LLU << 32) - 1,
1241};
1242
1243static u32 __init armv8pmu_read_num_pmnc_events(void)
1244{
1245        u32 nb_cnt;
1246
1247        /* Read the nb of CNTx counters supported from PMNC */
1248        nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK;
1249
1250        /* Add the CPU cycles counter and return */
1251        return nb_cnt + 1;
1252}
1253
1254static struct arm_pmu *__init armv8_pmuv3_pmu_init(void)
1255{
1256        armv8pmu.name                   = "arm/armv8-pmuv3";
1257        armv8pmu.map_event              = armv8_pmuv3_map_event;
1258        armv8pmu.num_events             = armv8pmu_read_num_pmnc_events();
1259        armv8pmu.set_event_filter       = armv8pmu_set_event_filter;
1260        return &armv8pmu;
1261}
1262
1263/*
1264 * Ensure the PMU has sane values out of reset.
1265 * This requires SMP to be available, so exists as a separate initcall.
1266 */
1267static int __init
1268cpu_pmu_reset(void)
1269{
1270        if (cpu_pmu && cpu_pmu->reset)
1271                return on_each_cpu(cpu_pmu->reset, NULL, 1);
1272        return 0;
1273}
1274arch_initcall(cpu_pmu_reset);
1275
1276/*
1277 * PMU platform driver and devicetree bindings.
1278 */
1279static struct of_device_id armpmu_of_device_ids[] = {
1280        {.compatible = "arm,armv8-pmuv3"},
1281        {},
1282};
1283
1284static int armpmu_device_probe(struct platform_device *pdev)
1285{
1286        if (!cpu_pmu)
1287                return -ENODEV;
1288
1289        cpu_pmu->plat_device = pdev;
1290        return 0;
1291}
1292
1293static struct platform_driver armpmu_driver = {
1294        .driver         = {
1295                .name   = "arm-pmu",
1296                .of_match_table = armpmu_of_device_ids,
1297        },
1298        .probe          = armpmu_device_probe,
1299};
1300
1301static int __init register_pmu_driver(void)
1302{
1303        return platform_driver_register(&armpmu_driver);
1304}
1305device_initcall(register_pmu_driver);
1306
1307static struct pmu_hw_events *armpmu_get_cpu_events(void)
1308{
1309        return this_cpu_ptr(&cpu_hw_events);
1310}
1311
1312static void __init cpu_pmu_init(struct arm_pmu *armpmu)
1313{
1314        int cpu;
1315        for_each_possible_cpu(cpu) {
1316                struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
1317                events->events = per_cpu(hw_events, cpu);
1318                events->used_mask = per_cpu(used_mask, cpu);
1319                raw_spin_lock_init(&events->pmu_lock);
1320        }
1321        armpmu->get_hw_events = armpmu_get_cpu_events;
1322}
1323
1324static int __init init_hw_perf_events(void)
1325{
1326        u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
1327
1328        switch ((dfr >> 8) & 0xf) {
1329        case 0x1:       /* PMUv3 */
1330                cpu_pmu = armv8_pmuv3_pmu_init();
1331                break;
1332        }
1333
1334        if (cpu_pmu) {
1335                pr_info("enabled with %s PMU driver, %d counters available\n",
1336                        cpu_pmu->name, cpu_pmu->num_events);
1337                cpu_pmu_init(cpu_pmu);
1338                armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW);
1339        } else {
1340                pr_info("no hardware support available\n");
1341        }
1342
1343        return 0;
1344}
1345early_initcall(init_hw_perf_events);
1346
1347/*
1348 * Callchain handling code.
1349 */
1350struct frame_tail {
1351        struct frame_tail       __user *fp;
1352        unsigned long           lr;
1353} __attribute__((packed));
1354
1355/*
1356 * Get the return address for a single stackframe and return a pointer to the
1357 * next frame tail.
1358 */
1359static struct frame_tail __user *
1360user_backtrace(struct frame_tail __user *tail,
1361               struct perf_callchain_entry *entry)
1362{
1363        struct frame_tail buftail;
1364        unsigned long err;
1365
1366        /* Also check accessibility of one struct frame_tail beyond */
1367        if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
1368                return NULL;
1369
1370        pagefault_disable();
1371        err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
1372        pagefault_enable();
1373
1374        if (err)
1375                return NULL;
1376
1377        perf_callchain_store(entry, buftail.lr);
1378
1379        /*
1380         * Frame pointers should strictly progress back up the stack
1381         * (towards higher addresses).
1382         */
1383        if (tail >= buftail.fp)
1384                return NULL;
1385
1386        return buftail.fp;
1387}
1388
1389#ifdef CONFIG_COMPAT
1390/*
1391 * The registers we're interested in are at the end of the variable
1392 * length saved register structure. The fp points at the end of this
1393 * structure so the address of this struct is:
1394 * (struct compat_frame_tail *)(xxx->fp)-1
1395 *
1396 * This code has been adapted from the ARM OProfile support.
1397 */
1398struct compat_frame_tail {
1399        compat_uptr_t   fp; /* a (struct compat_frame_tail *) in compat mode */
1400        u32             sp;
1401        u32             lr;
1402} __attribute__((packed));
1403
1404static struct compat_frame_tail __user *
1405compat_user_backtrace(struct compat_frame_tail __user *tail,
1406                      struct perf_callchain_entry *entry)
1407{
1408        struct compat_frame_tail buftail;
1409        unsigned long err;
1410
1411        /* Also check accessibility of one struct frame_tail beyond */
1412        if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
1413                return NULL;
1414
1415        pagefault_disable();
1416        err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
1417        pagefault_enable();
1418
1419        if (err)
1420                return NULL;
1421
1422        perf_callchain_store(entry, buftail.lr);
1423
1424        /*
1425         * Frame pointers should strictly progress back up the stack
1426         * (towards higher addresses).
1427         */
1428        if (tail + 1 >= (struct compat_frame_tail __user *)
1429                        compat_ptr(buftail.fp))
1430                return NULL;
1431
1432        return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1;
1433}
1434#endif /* CONFIG_COMPAT */
1435
1436void perf_callchain_user(struct perf_callchain_entry *entry,
1437                         struct pt_regs *regs)
1438{
1439        if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1440                /* We don't support guest os callchain now */
1441                return;
1442        }
1443
1444        perf_callchain_store(entry, regs->pc);
1445
1446        if (!compat_user_mode(regs)) {
1447                /* AARCH64 mode */
1448                struct frame_tail __user *tail;
1449
1450                tail = (struct frame_tail __user *)regs->regs[29];
1451
1452                while (entry->nr < PERF_MAX_STACK_DEPTH &&
1453                       tail && !((unsigned long)tail & 0xf))
1454                        tail = user_backtrace(tail, entry);
1455        } else {
1456#ifdef CONFIG_COMPAT
1457                /* AARCH32 compat mode */
1458                struct compat_frame_tail __user *tail;
1459
1460                tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
1461
1462                while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
1463                        tail && !((unsigned long)tail & 0x3))
1464                        tail = compat_user_backtrace(tail, entry);
1465#endif
1466        }
1467}
1468
1469/*
1470 * Gets called by walk_stackframe() for every stackframe. This will be called
1471 * whist unwinding the stackframe and is like a subroutine return so we use
1472 * the PC.
1473 */
1474static int callchain_trace(struct stackframe *frame, void *data)
1475{
1476        struct perf_callchain_entry *entry = data;
1477        perf_callchain_store(entry, frame->pc);
1478        return 0;
1479}
1480
1481void perf_callchain_kernel(struct perf_callchain_entry *entry,
1482                           struct pt_regs *regs)
1483{
1484        struct stackframe frame;
1485
1486        if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1487                /* We don't support guest os callchain now */
1488                return;
1489        }
1490
1491        frame.fp = regs->regs[29];
1492        frame.sp = regs->sp;
1493        frame.pc = regs->pc;
1494
1495        walk_stackframe(&frame, callchain_trace, entry);
1496}
1497
1498unsigned long perf_instruction_pointer(struct pt_regs *regs)
1499{
1500        if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
1501                return perf_guest_cbs->get_guest_ip();
1502
1503        return instruction_pointer(regs);
1504}
1505
1506unsigned long perf_misc_flags(struct pt_regs *regs)
1507{
1508        int misc = 0;
1509
1510        if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1511                if (perf_guest_cbs->is_user_mode())
1512                        misc |= PERF_RECORD_MISC_GUEST_USER;
1513                else
1514                        misc |= PERF_RECORD_MISC_GUEST_KERNEL;
1515        } else {
1516                if (user_mode(regs))
1517                        misc |= PERF_RECORD_MISC_USER;
1518                else
1519                        misc |= PERF_RECORD_MISC_KERNEL;
1520        }
1521
1522        return misc;
1523}
1524