linux/arch/arm/kernel/perf_event.c
<<
>>
Prefs
   1#undef DEBUG
   2
   3/*
   4 * ARM performance counter support.
   5 *
   6 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
   7 * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
   8 *
   9 * This code is based on the sparc64 perf event code, which is in turn based
  10 * on the x86 code. Callchain code is based on the ARM OProfile backtrace
  11 * code.
  12 */
  13#define pr_fmt(fmt) "hw perfevents: " fmt
  14
  15#include <linux/kernel.h>
  16#include <linux/platform_device.h>
  17#include <linux/pm_runtime.h>
  18#include <linux/uaccess.h>
  19#include <linux/irq.h>
  20#include <linux/irqdesc.h>
  21
  22#include <asm/irq_regs.h>
  23#include <asm/pmu.h>
  24#include <asm/stacktrace.h>
  25
  26static int
  27armpmu_map_cache_event(const unsigned (*cache_map)
  28                                      [PERF_COUNT_HW_CACHE_MAX]
  29                                      [PERF_COUNT_HW_CACHE_OP_MAX]
  30                                      [PERF_COUNT_HW_CACHE_RESULT_MAX],
  31                       u64 config)
  32{
  33        unsigned int cache_type, cache_op, cache_result, ret;
  34
  35        cache_type = (config >>  0) & 0xff;
  36        if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
  37                return -EINVAL;
  38
  39        cache_op = (config >>  8) & 0xff;
  40        if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
  41                return -EINVAL;
  42
  43        cache_result = (config >> 16) & 0xff;
  44        if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
  45                return -EINVAL;
  46
  47        ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
  48
  49        if (ret == CACHE_OP_UNSUPPORTED)
  50                return -ENOENT;
  51
  52        return ret;
  53}
  54
  55static int
  56armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
  57{
  58        int mapping;
  59
  60        if (config >= PERF_COUNT_HW_MAX)
  61                return -EINVAL;
  62
  63        mapping = (*event_map)[config];
  64        return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
  65}
  66
  67static int
  68armpmu_map_raw_event(u32 raw_event_mask, u64 config)
  69{
  70        return (int)(config & raw_event_mask);
  71}
  72
  73int
  74armpmu_map_event(struct perf_event *event,
  75                 const unsigned (*event_map)[PERF_COUNT_HW_MAX],
  76                 const unsigned (*cache_map)
  77                                [PERF_COUNT_HW_CACHE_MAX]
  78                                [PERF_COUNT_HW_CACHE_OP_MAX]
  79                                [PERF_COUNT_HW_CACHE_RESULT_MAX],
  80                 u32 raw_event_mask)
  81{
  82        u64 config = event->attr.config;
  83
  84        switch (event->attr.type) {
  85        case PERF_TYPE_HARDWARE:
  86                return armpmu_map_hw_event(event_map, config);
  87        case PERF_TYPE_HW_CACHE:
  88                return armpmu_map_cache_event(cache_map, config);
  89        case PERF_TYPE_RAW:
  90                return armpmu_map_raw_event(raw_event_mask, config);
  91        }
  92
  93        return -ENOENT;
  94}
  95
  96int armpmu_event_set_period(struct perf_event *event)
  97{
  98        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
  99        struct hw_perf_event *hwc = &event->hw;
 100        s64 left = local64_read(&hwc->period_left);
 101        s64 period = hwc->sample_period;
 102        int ret = 0;
 103
 104        if (unlikely(left <= -period)) {
 105                left = period;
 106                local64_set(&hwc->period_left, left);
 107                hwc->last_period = period;
 108                ret = 1;
 109        }
 110
 111        if (unlikely(left <= 0)) {
 112                left += period;
 113                local64_set(&hwc->period_left, left);
 114                hwc->last_period = period;
 115                ret = 1;
 116        }
 117
 118        if (left > (s64)armpmu->max_period)
 119                left = armpmu->max_period;
 120
 121        local64_set(&hwc->prev_count, (u64)-left);
 122
 123        armpmu->write_counter(event, (u64)(-left) & 0xffffffff);
 124
 125        perf_event_update_userpage(event);
 126
 127        return ret;
 128}
 129
 130u64 armpmu_event_update(struct perf_event *event)
 131{
 132        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 133        struct hw_perf_event *hwc = &event->hw;
 134        u64 delta, prev_raw_count, new_raw_count;
 135
 136again:
 137        prev_raw_count = local64_read(&hwc->prev_count);
 138        new_raw_count = armpmu->read_counter(event);
 139
 140        if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 141                             new_raw_count) != prev_raw_count)
 142                goto again;
 143
 144        delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
 145
 146        local64_add(delta, &event->count);
 147        local64_sub(delta, &hwc->period_left);
 148
 149        return new_raw_count;
 150}
 151
 152static void
 153armpmu_read(struct perf_event *event)
 154{
 155        armpmu_event_update(event);
 156}
 157
 158static void
 159armpmu_stop(struct perf_event *event, int flags)
 160{
 161        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 162        struct hw_perf_event *hwc = &event->hw;
 163
 164        /*
 165         * ARM pmu always has to update the counter, so ignore
 166         * PERF_EF_UPDATE, see comments in armpmu_start().
 167         */
 168        if (!(hwc->state & PERF_HES_STOPPED)) {
 169                armpmu->disable(event);
 170                armpmu_event_update(event);
 171                hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
 172        }
 173}
 174
 175static void armpmu_start(struct perf_event *event, int flags)
 176{
 177        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 178        struct hw_perf_event *hwc = &event->hw;
 179
 180        /*
 181         * ARM pmu always has to reprogram the period, so ignore
 182         * PERF_EF_RELOAD, see the comment below.
 183         */
 184        if (flags & PERF_EF_RELOAD)
 185                WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
 186
 187        hwc->state = 0;
 188        /*
 189         * Set the period again. Some counters can't be stopped, so when we
 190         * were stopped we simply disabled the IRQ source and the counter
 191         * may have been left counting. If we don't do this step then we may
 192         * get an interrupt too soon or *way* too late if the overflow has
 193         * happened since disabling.
 194         */
 195        armpmu_event_set_period(event);
 196        armpmu->enable(event);
 197}
 198
 199static void
 200armpmu_del(struct perf_event *event, int flags)
 201{
 202        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 203        struct pmu_hw_events *hw_events = armpmu->get_hw_events();
 204        struct hw_perf_event *hwc = &event->hw;
 205        int idx = hwc->idx;
 206
 207        armpmu_stop(event, PERF_EF_UPDATE);
 208        hw_events->events[idx] = NULL;
 209        clear_bit(idx, hw_events->used_mask);
 210        if (armpmu->clear_event_idx)
 211                armpmu->clear_event_idx(hw_events, event);
 212
 213        perf_event_update_userpage(event);
 214}
 215
 216static int
 217armpmu_add(struct perf_event *event, int flags)
 218{
 219        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 220        struct pmu_hw_events *hw_events = armpmu->get_hw_events();
 221        struct hw_perf_event *hwc = &event->hw;
 222        int idx;
 223        int err = 0;
 224
 225        perf_pmu_disable(event->pmu);
 226
 227        /* If we don't have a space for the counter then finish early. */
 228        idx = armpmu->get_event_idx(hw_events, event);
 229        if (idx < 0) {
 230                err = idx;
 231                goto out;
 232        }
 233
 234        /*
 235         * If there is an event in the counter we are going to use then make
 236         * sure it is disabled.
 237         */
 238        event->hw.idx = idx;
 239        armpmu->disable(event);
 240        hw_events->events[idx] = event;
 241
 242        hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
 243        if (flags & PERF_EF_START)
 244                armpmu_start(event, PERF_EF_RELOAD);
 245
 246        /* Propagate our changes to the userspace mapping. */
 247        perf_event_update_userpage(event);
 248
 249out:
 250        perf_pmu_enable(event->pmu);
 251        return err;
 252}
 253
 254static int
 255validate_event(struct pmu_hw_events *hw_events,
 256               struct perf_event *event)
 257{
 258        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 259
 260        if (is_software_event(event))
 261                return 1;
 262
 263        if (event->state < PERF_EVENT_STATE_OFF)
 264                return 1;
 265
 266        if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
 267                return 1;
 268
 269        return armpmu->get_event_idx(hw_events, event) >= 0;
 270}
 271
 272static int
 273validate_group(struct perf_event *event)
 274{
 275        struct perf_event *sibling, *leader = event->group_leader;
 276        struct pmu_hw_events fake_pmu;
 277        DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS);
 278
 279        /*
 280         * Initialise the fake PMU. We only need to populate the
 281         * used_mask for the purposes of validation.
 282         */
 283        memset(fake_used_mask, 0, sizeof(fake_used_mask));
 284        fake_pmu.used_mask = fake_used_mask;
 285
 286        if (!validate_event(&fake_pmu, leader))
 287                return -EINVAL;
 288
 289        list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
 290                if (!validate_event(&fake_pmu, sibling))
 291                        return -EINVAL;
 292        }
 293
 294        if (!validate_event(&fake_pmu, event))
 295                return -EINVAL;
 296
 297        return 0;
 298}
 299
 300static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
 301{
 302        struct arm_pmu *armpmu;
 303        struct platform_device *plat_device;
 304        struct arm_pmu_platdata *plat;
 305        int ret;
 306        u64 start_clock, finish_clock;
 307
 308        if (irq_is_percpu(irq))
 309                dev = *(void **)dev;
 310        armpmu = dev;
 311        plat_device = armpmu->plat_device;
 312        plat = dev_get_platdata(&plat_device->dev);
 313
 314        start_clock = sched_clock();
 315        if (plat && plat->handle_irq)
 316                ret = plat->handle_irq(irq, dev, armpmu->handle_irq);
 317        else
 318                ret = armpmu->handle_irq(irq, dev);
 319        finish_clock = sched_clock();
 320
 321        perf_sample_event_took(finish_clock - start_clock);
 322        return ret;
 323}
 324
 325static void
 326armpmu_release_hardware(struct arm_pmu *armpmu)
 327{
 328        armpmu->free_irq(armpmu);
 329        pm_runtime_put_sync(&armpmu->plat_device->dev);
 330}
 331
 332static int
 333armpmu_reserve_hardware(struct arm_pmu *armpmu)
 334{
 335        int err;
 336        struct platform_device *pmu_device = armpmu->plat_device;
 337
 338        if (!pmu_device)
 339                return -ENODEV;
 340
 341        pm_runtime_get_sync(&pmu_device->dev);
 342        err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);
 343        if (err) {
 344                armpmu_release_hardware(armpmu);
 345                return err;
 346        }
 347
 348        return 0;
 349}
 350
 351static void
 352hw_perf_event_destroy(struct perf_event *event)
 353{
 354        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 355        atomic_t *active_events  = &armpmu->active_events;
 356        struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
 357
 358        if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
 359                armpmu_release_hardware(armpmu);
 360                mutex_unlock(pmu_reserve_mutex);
 361        }
 362}
 363
 364static int
 365event_requires_mode_exclusion(struct perf_event_attr *attr)
 366{
 367        return attr->exclude_idle || attr->exclude_user ||
 368               attr->exclude_kernel || attr->exclude_hv;
 369}
 370
 371static int
 372__hw_perf_event_init(struct perf_event *event)
 373{
 374        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 375        struct hw_perf_event *hwc = &event->hw;
 376        int mapping;
 377
 378        mapping = armpmu->map_event(event);
 379
 380        if (mapping < 0) {
 381                pr_debug("event %x:%llx not supported\n", event->attr.type,
 382                         event->attr.config);
 383                return mapping;
 384        }
 385
 386        /*
 387         * We don't assign an index until we actually place the event onto
 388         * hardware. Use -1 to signify that we haven't decided where to put it
 389         * yet. For SMP systems, each core has it's own PMU so we can't do any
 390         * clever allocation or constraints checking at this point.
 391         */
 392        hwc->idx                = -1;
 393        hwc->config_base        = 0;
 394        hwc->config             = 0;
 395        hwc->event_base         = 0;
 396
 397        /*
 398         * Check whether we need to exclude the counter from certain modes.
 399         */
 400        if ((!armpmu->set_event_filter ||
 401             armpmu->set_event_filter(hwc, &event->attr)) &&
 402             event_requires_mode_exclusion(&event->attr)) {
 403                pr_debug("ARM performance counters do not support "
 404                         "mode exclusion\n");
 405                return -EOPNOTSUPP;
 406        }
 407
 408        /*
 409         * Store the event encoding into the config_base field.
 410         */
 411        hwc->config_base            |= (unsigned long)mapping;
 412
 413        if (!is_sampling_event(event)) {
 414                /*
 415                 * For non-sampling runs, limit the sample_period to half
 416                 * of the counter width. That way, the new counter value
 417                 * is far less likely to overtake the previous one unless
 418                 * you have some serious IRQ latency issues.
 419                 */
 420                hwc->sample_period  = armpmu->max_period >> 1;
 421                hwc->last_period    = hwc->sample_period;
 422                local64_set(&hwc->period_left, hwc->sample_period);
 423        }
 424
 425        if (event->group_leader != event) {
 426                if (validate_group(event) != 0)
 427                        return -EINVAL;
 428        }
 429
 430        return 0;
 431}
 432
 433static int armpmu_event_init(struct perf_event *event)
 434{
 435        struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 436        int err = 0;
 437        atomic_t *active_events = &armpmu->active_events;
 438
 439        /* does not support taken branch sampling */
 440        if (has_branch_stack(event))
 441                return -EOPNOTSUPP;
 442
 443        if (armpmu->map_event(event) == -ENOENT)
 444                return -ENOENT;
 445
 446        event->destroy = hw_perf_event_destroy;
 447
 448        if (!atomic_inc_not_zero(active_events)) {
 449                mutex_lock(&armpmu->reserve_mutex);
 450                if (atomic_read(active_events) == 0)
 451                        err = armpmu_reserve_hardware(armpmu);
 452
 453                if (!err)
 454                        atomic_inc(active_events);
 455                mutex_unlock(&armpmu->reserve_mutex);
 456        }
 457
 458        if (err)
 459                return err;
 460
 461        err = __hw_perf_event_init(event);
 462        if (err)
 463                hw_perf_event_destroy(event);
 464
 465        return err;
 466}
 467
 468static void armpmu_enable(struct pmu *pmu)
 469{
 470        struct arm_pmu *armpmu = to_arm_pmu(pmu);
 471        struct pmu_hw_events *hw_events = armpmu->get_hw_events();
 472        int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
 473
 474        if (enabled)
 475                armpmu->start(armpmu);
 476}
 477
 478static void armpmu_disable(struct pmu *pmu)
 479{
 480        struct arm_pmu *armpmu = to_arm_pmu(pmu);
 481        armpmu->stop(armpmu);
 482}
 483
 484#ifdef CONFIG_PM_RUNTIME
 485static int armpmu_runtime_resume(struct device *dev)
 486{
 487        struct arm_pmu_platdata *plat = dev_get_platdata(dev);
 488
 489        if (plat && plat->runtime_resume)
 490                return plat->runtime_resume(dev);
 491
 492        return 0;
 493}
 494
 495static int armpmu_runtime_suspend(struct device *dev)
 496{
 497        struct arm_pmu_platdata *plat = dev_get_platdata(dev);
 498
 499        if (plat && plat->runtime_suspend)
 500                return plat->runtime_suspend(dev);
 501
 502        return 0;
 503}
 504#endif
 505
 506const struct dev_pm_ops armpmu_dev_pm_ops = {
 507        SET_RUNTIME_PM_OPS(armpmu_runtime_suspend, armpmu_runtime_resume, NULL)
 508};
 509
 510static void armpmu_init(struct arm_pmu *armpmu)
 511{
 512        atomic_set(&armpmu->active_events, 0);
 513        mutex_init(&armpmu->reserve_mutex);
 514
 515        armpmu->pmu = (struct pmu) {
 516                .pmu_enable     = armpmu_enable,
 517                .pmu_disable    = armpmu_disable,
 518                .event_init     = armpmu_event_init,
 519                .add            = armpmu_add,
 520                .del            = armpmu_del,
 521                .start          = armpmu_start,
 522                .stop           = armpmu_stop,
 523                .read           = armpmu_read,
 524        };
 525}
 526
 527int armpmu_register(struct arm_pmu *armpmu, int type)
 528{
 529        armpmu_init(armpmu);
 530        pm_runtime_enable(&armpmu->plat_device->dev);
 531        pr_info("enabled with %s PMU driver, %d counters available\n",
 532                        armpmu->name, armpmu->num_events);
 533        return perf_pmu_register(&armpmu->pmu, armpmu->name, type);
 534}
 535
 536/*
 537 * Callchain handling code.
 538 */
 539
 540/*
 541 * The registers we're interested in are at the end of the variable
 542 * length saved register structure. The fp points at the end of this
 543 * structure so the address of this struct is:
 544 * (struct frame_tail *)(xxx->fp)-1
 545 *
 546 * This code has been adapted from the ARM OProfile support.
 547 */
 548struct frame_tail {
 549        struct frame_tail __user *fp;
 550        unsigned long sp;
 551        unsigned long lr;
 552} __attribute__((packed));
 553
 554/*
 555 * Get the return address for a single stackframe and return a pointer to the
 556 * next frame tail.
 557 */
 558static struct frame_tail __user *
 559user_backtrace(struct frame_tail __user *tail,
 560               struct perf_callchain_entry *entry)
 561{
 562        struct frame_tail buftail;
 563        unsigned long err;
 564
 565        if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
 566                return NULL;
 567
 568        pagefault_disable();
 569        err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
 570        pagefault_enable();
 571
 572        if (err)
 573                return NULL;
 574
 575        perf_callchain_store(entry, buftail.lr);
 576
 577        /*
 578         * Frame pointers should strictly progress back up the stack
 579         * (towards higher addresses).
 580         */
 581        if (tail + 1 >= buftail.fp)
 582                return NULL;
 583
 584        return buftail.fp - 1;
 585}
 586
 587void
 588perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 589{
 590        struct frame_tail __user *tail;
 591
 592        if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 593                /* We don't support guest os callchain now */
 594                return;
 595        }
 596
 597        perf_callchain_store(entry, regs->ARM_pc);
 598
 599        if (!current->mm)
 600                return;
 601
 602        tail = (struct frame_tail __user *)regs->ARM_fp - 1;
 603
 604        while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
 605               tail && !((unsigned long)tail & 0x3))
 606                tail = user_backtrace(tail, entry);
 607}
 608
 609/*
 610 * Gets called by walk_stackframe() for every stackframe. This will be called
 611 * whist unwinding the stackframe and is like a subroutine return so we use
 612 * the PC.
 613 */
 614static int
 615callchain_trace(struct stackframe *fr,
 616                void *data)
 617{
 618        struct perf_callchain_entry *entry = data;
 619        perf_callchain_store(entry, fr->pc);
 620        return 0;
 621}
 622
 623void
 624perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 625{
 626        struct stackframe fr;
 627
 628        if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 629                /* We don't support guest os callchain now */
 630                return;
 631        }
 632
 633        arm_get_current_stackframe(regs, &fr);
 634        walk_stackframe(&fr, callchain_trace, entry);
 635}
 636
 637unsigned long perf_instruction_pointer(struct pt_regs *regs)
 638{
 639        if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
 640                return perf_guest_cbs->get_guest_ip();
 641
 642        return instruction_pointer(regs);
 643}
 644
 645unsigned long perf_misc_flags(struct pt_regs *regs)
 646{
 647        int misc = 0;
 648
 649        if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 650                if (perf_guest_cbs->is_user_mode())
 651                        misc |= PERF_RECORD_MISC_GUEST_USER;
 652                else
 653                        misc |= PERF_RECORD_MISC_GUEST_KERNEL;
 654        } else {
 655                if (user_mode(regs))
 656                        misc |= PERF_RECORD_MISC_USER;
 657                else
 658                        misc |= PERF_RECORD_MISC_KERNEL;
 659        }
 660
 661        return misc;
 662}
 663