linux/arch/xtensa/kernel/perf_event.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Xtensa Performance Monitor Module driver
   4 * See Tensilica Debug User's Guide for PMU registers documentation.
   5 *
   6 * Copyright (C) 2015 Cadence Design Systems Inc.
   7 */
   8
   9#include <linux/interrupt.h>
  10#include <linux/irqdomain.h>
  11#include <linux/module.h>
  12#include <linux/of.h>
  13#include <linux/perf_event.h>
  14#include <linux/platform_device.h>
  15
  16#include <asm/processor.h>
  17#include <asm/stacktrace.h>
  18
  19/* Global control/status for all perf counters */
  20#define XTENSA_PMU_PMG                  0x1000
  21/* Perf counter values */
  22#define XTENSA_PMU_PM(i)                (0x1080 + (i) * 4)
  23/* Perf counter control registers */
  24#define XTENSA_PMU_PMCTRL(i)            (0x1100 + (i) * 4)
  25/* Perf counter status registers */
  26#define XTENSA_PMU_PMSTAT(i)            (0x1180 + (i) * 4)
  27
  28#define XTENSA_PMU_PMG_PMEN             0x1
  29
  30#define XTENSA_PMU_COUNTER_MASK         0xffffffffULL
  31#define XTENSA_PMU_COUNTER_MAX          0x7fffffff
  32
  33#define XTENSA_PMU_PMCTRL_INTEN         0x00000001
  34#define XTENSA_PMU_PMCTRL_KRNLCNT       0x00000008
  35#define XTENSA_PMU_PMCTRL_TRACELEVEL    0x000000f0
  36#define XTENSA_PMU_PMCTRL_SELECT_SHIFT  8
  37#define XTENSA_PMU_PMCTRL_SELECT        0x00001f00
  38#define XTENSA_PMU_PMCTRL_MASK_SHIFT    16
  39#define XTENSA_PMU_PMCTRL_MASK          0xffff0000
  40
  41#define XTENSA_PMU_MASK(select, mask) \
  42        (((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \
  43         ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \
  44         XTENSA_PMU_PMCTRL_TRACELEVEL | \
  45         XTENSA_PMU_PMCTRL_INTEN)
  46
  47#define XTENSA_PMU_PMSTAT_OVFL          0x00000001
  48#define XTENSA_PMU_PMSTAT_INTASRT       0x00000010
  49
  50struct xtensa_pmu_events {
  51        /* Array of events currently on this core */
  52        struct perf_event *event[XCHAL_NUM_PERF_COUNTERS];
  53        /* Bitmap of used hardware counters */
  54        unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)];
  55};
  56static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events);
  57
  58static const u32 xtensa_hw_ctl[] = {
  59        [PERF_COUNT_HW_CPU_CYCLES]              = XTENSA_PMU_MASK(0, 0x1),
  60        [PERF_COUNT_HW_INSTRUCTIONS]            = XTENSA_PMU_MASK(2, 0xffff),
  61        [PERF_COUNT_HW_CACHE_REFERENCES]        = XTENSA_PMU_MASK(10, 0x1),
  62        [PERF_COUNT_HW_CACHE_MISSES]            = XTENSA_PMU_MASK(12, 0x1),
  63        /* Taken and non-taken branches + taken loop ends */
  64        [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = XTENSA_PMU_MASK(2, 0x490),
  65        /* Instruction-related + other global stall cycles */
  66        [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XTENSA_PMU_MASK(4, 0x1ff),
  67        /* Data-related global stall cycles */
  68        [PERF_COUNT_HW_STALLED_CYCLES_BACKEND]  = XTENSA_PMU_MASK(3, 0x1ff),
  69};
  70
  71#define C(_x) PERF_COUNT_HW_CACHE_##_x
  72
  73static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = {
  74        [C(L1D)] = {
  75                [C(OP_READ)] = {
  76                        [C(RESULT_ACCESS)]      = XTENSA_PMU_MASK(10, 0x1),
  77                        [C(RESULT_MISS)]        = XTENSA_PMU_MASK(10, 0x2),
  78                },
  79                [C(OP_WRITE)] = {
  80                        [C(RESULT_ACCESS)]      = XTENSA_PMU_MASK(11, 0x1),
  81                        [C(RESULT_MISS)]        = XTENSA_PMU_MASK(11, 0x2),
  82                },
  83        },
  84        [C(L1I)] = {
  85                [C(OP_READ)] = {
  86                        [C(RESULT_ACCESS)]      = XTENSA_PMU_MASK(8, 0x1),
  87                        [C(RESULT_MISS)]        = XTENSA_PMU_MASK(8, 0x2),
  88                },
  89        },
  90        [C(DTLB)] = {
  91                [C(OP_READ)] = {
  92                        [C(RESULT_ACCESS)]      = XTENSA_PMU_MASK(9, 0x1),
  93                        [C(RESULT_MISS)]        = XTENSA_PMU_MASK(9, 0x8),
  94                },
  95        },
  96        [C(ITLB)] = {
  97                [C(OP_READ)] = {
  98                        [C(RESULT_ACCESS)]      = XTENSA_PMU_MASK(7, 0x1),
  99                        [C(RESULT_MISS)]        = XTENSA_PMU_MASK(7, 0x8),
 100                },
 101        },
 102};
 103
 104static int xtensa_pmu_cache_event(u64 config)
 105{
 106        unsigned int cache_type, cache_op, cache_result;
 107        int ret;
 108
 109        cache_type = (config >>  0) & 0xff;
 110        cache_op = (config >>  8) & 0xff;
 111        cache_result = (config >> 16) & 0xff;
 112
 113        if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) ||
 114            cache_op >= C(OP_MAX) ||
 115            cache_result >= C(RESULT_MAX))
 116                return -EINVAL;
 117
 118        ret = xtensa_cache_ctl[cache_type][cache_op][cache_result];
 119
 120        if (ret == 0)
 121                return -EINVAL;
 122
 123        return ret;
 124}
 125
 126static inline uint32_t xtensa_pmu_read_counter(int idx)
 127{
 128        return get_er(XTENSA_PMU_PM(idx));
 129}
 130
 131static inline void xtensa_pmu_write_counter(int idx, uint32_t v)
 132{
 133        set_er(v, XTENSA_PMU_PM(idx));
 134}
 135
 136static void xtensa_perf_event_update(struct perf_event *event,
 137                                     struct hw_perf_event *hwc, int idx)
 138{
 139        uint64_t prev_raw_count, new_raw_count;
 140        int64_t delta;
 141
 142        do {
 143                prev_raw_count = local64_read(&hwc->prev_count);
 144                new_raw_count = xtensa_pmu_read_counter(event->hw.idx);
 145        } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 146                                 new_raw_count) != prev_raw_count);
 147
 148        delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK;
 149
 150        local64_add(delta, &event->count);
 151        local64_sub(delta, &hwc->period_left);
 152}
 153
 154static bool xtensa_perf_event_set_period(struct perf_event *event,
 155                                         struct hw_perf_event *hwc, int idx)
 156{
 157        bool rc = false;
 158        s64 left;
 159
 160        if (!is_sampling_event(event)) {
 161                left = XTENSA_PMU_COUNTER_MAX;
 162        } else {
 163                s64 period = hwc->sample_period;
 164
 165                left = local64_read(&hwc->period_left);
 166                if (left <= -period) {
 167                        left = period;
 168                        local64_set(&hwc->period_left, left);
 169                        hwc->last_period = period;
 170                        rc = true;
 171                } else if (left <= 0) {
 172                        left += period;
 173                        local64_set(&hwc->period_left, left);
 174                        hwc->last_period = period;
 175                        rc = true;
 176                }
 177                if (left > XTENSA_PMU_COUNTER_MAX)
 178                        left = XTENSA_PMU_COUNTER_MAX;
 179        }
 180
 181        local64_set(&hwc->prev_count, -left);
 182        xtensa_pmu_write_counter(idx, -left);
 183        perf_event_update_userpage(event);
 184
 185        return rc;
 186}
 187
 188static void xtensa_pmu_enable(struct pmu *pmu)
 189{
 190        set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
 191}
 192
 193static void xtensa_pmu_disable(struct pmu *pmu)
 194{
 195        set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
 196}
 197
 198static int xtensa_pmu_event_init(struct perf_event *event)
 199{
 200        int ret;
 201
 202        switch (event->attr.type) {
 203        case PERF_TYPE_HARDWARE:
 204                if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) ||
 205                    xtensa_hw_ctl[event->attr.config] == 0)
 206                        return -EINVAL;
 207                event->hw.config = xtensa_hw_ctl[event->attr.config];
 208                return 0;
 209
 210        case PERF_TYPE_HW_CACHE:
 211                ret = xtensa_pmu_cache_event(event->attr.config);
 212                if (ret < 0)
 213                        return ret;
 214                event->hw.config = ret;
 215                return 0;
 216
 217        case PERF_TYPE_RAW:
 218                /* Not 'previous counter' select */
 219                if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) ==
 220                    (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT))
 221                        return -EINVAL;
 222                event->hw.config = (event->attr.config &
 223                                    (XTENSA_PMU_PMCTRL_KRNLCNT |
 224                                     XTENSA_PMU_PMCTRL_TRACELEVEL |
 225                                     XTENSA_PMU_PMCTRL_SELECT |
 226                                     XTENSA_PMU_PMCTRL_MASK)) |
 227                        XTENSA_PMU_PMCTRL_INTEN;
 228                return 0;
 229
 230        default:
 231                return -ENOENT;
 232        }
 233}
 234
 235/*
 236 * Starts/Stops a counter present on the PMU. The PMI handler
 237 * should stop the counter when perf_event_overflow() returns
 238 * !0. ->start() will be used to continue.
 239 */
 240static void xtensa_pmu_start(struct perf_event *event, int flags)
 241{
 242        struct hw_perf_event *hwc = &event->hw;
 243        int idx = hwc->idx;
 244
 245        if (WARN_ON_ONCE(idx == -1))
 246                return;
 247
 248        if (flags & PERF_EF_RELOAD) {
 249                WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
 250                xtensa_perf_event_set_period(event, hwc, idx);
 251        }
 252
 253        hwc->state = 0;
 254
 255        set_er(hwc->config, XTENSA_PMU_PMCTRL(idx));
 256}
 257
 258static void xtensa_pmu_stop(struct perf_event *event, int flags)
 259{
 260        struct hw_perf_event *hwc = &event->hw;
 261        int idx = hwc->idx;
 262
 263        if (!(hwc->state & PERF_HES_STOPPED)) {
 264                set_er(0, XTENSA_PMU_PMCTRL(idx));
 265                set_er(get_er(XTENSA_PMU_PMSTAT(idx)),
 266                       XTENSA_PMU_PMSTAT(idx));
 267                hwc->state |= PERF_HES_STOPPED;
 268        }
 269
 270        if ((flags & PERF_EF_UPDATE) &&
 271            !(event->hw.state & PERF_HES_UPTODATE)) {
 272                xtensa_perf_event_update(event, &event->hw, idx);
 273                event->hw.state |= PERF_HES_UPTODATE;
 274        }
 275}
 276
 277/*
 278 * Adds/Removes a counter to/from the PMU, can be done inside
 279 * a transaction, see the ->*_txn() methods.
 280 */
 281static int xtensa_pmu_add(struct perf_event *event, int flags)
 282{
 283        struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
 284        struct hw_perf_event *hwc = &event->hw;
 285        int idx = hwc->idx;
 286
 287        if (__test_and_set_bit(idx, ev->used_mask)) {
 288                idx = find_first_zero_bit(ev->used_mask,
 289                                          XCHAL_NUM_PERF_COUNTERS);
 290                if (idx == XCHAL_NUM_PERF_COUNTERS)
 291                        return -EAGAIN;
 292
 293                __set_bit(idx, ev->used_mask);
 294                hwc->idx = idx;
 295        }
 296        ev->event[idx] = event;
 297
 298        hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 299
 300        if (flags & PERF_EF_START)
 301                xtensa_pmu_start(event, PERF_EF_RELOAD);
 302
 303        perf_event_update_userpage(event);
 304        return 0;
 305}
 306
 307static void xtensa_pmu_del(struct perf_event *event, int flags)
 308{
 309        struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
 310
 311        xtensa_pmu_stop(event, PERF_EF_UPDATE);
 312        __clear_bit(event->hw.idx, ev->used_mask);
 313        perf_event_update_userpage(event);
 314}
 315
 316static void xtensa_pmu_read(struct perf_event *event)
 317{
 318        xtensa_perf_event_update(event, &event->hw, event->hw.idx);
 319}
 320
 321static int callchain_trace(struct stackframe *frame, void *data)
 322{
 323        struct perf_callchain_entry_ctx *entry = data;
 324
 325        perf_callchain_store(entry, frame->pc);
 326        return 0;
 327}
 328
 329void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
 330                           struct pt_regs *regs)
 331{
 332        xtensa_backtrace_kernel(regs, entry->max_stack,
 333                                callchain_trace, NULL, entry);
 334}
 335
 336void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
 337                         struct pt_regs *regs)
 338{
 339        xtensa_backtrace_user(regs, entry->max_stack,
 340                              callchain_trace, entry);
 341}
 342
 343void perf_event_print_debug(void)
 344{
 345        unsigned long flags;
 346        unsigned i;
 347
 348        local_irq_save(flags);
 349        pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(),
 350                get_er(XTENSA_PMU_PMG));
 351        for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i)
 352                pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n",
 353                        i, get_er(XTENSA_PMU_PM(i)),
 354                        i, get_er(XTENSA_PMU_PMCTRL(i)),
 355                        i, get_er(XTENSA_PMU_PMSTAT(i)));
 356        local_irq_restore(flags);
 357}
 358
 359irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
 360{
 361        irqreturn_t rc = IRQ_NONE;
 362        struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
 363        unsigned i;
 364
 365        for_each_set_bit(i, ev->used_mask, XCHAL_NUM_PERF_COUNTERS) {
 366                uint32_t v = get_er(XTENSA_PMU_PMSTAT(i));
 367                struct perf_event *event = ev->event[i];
 368                struct hw_perf_event *hwc = &event->hw;
 369                u64 last_period;
 370
 371                if (!(v & XTENSA_PMU_PMSTAT_OVFL))
 372                        continue;
 373
 374                set_er(v, XTENSA_PMU_PMSTAT(i));
 375                xtensa_perf_event_update(event, hwc, i);
 376                last_period = hwc->last_period;
 377                if (xtensa_perf_event_set_period(event, hwc, i)) {
 378                        struct perf_sample_data data;
 379                        struct pt_regs *regs = get_irq_regs();
 380
 381                        perf_sample_data_init(&data, 0, last_period);
 382                        if (perf_event_overflow(event, &data, regs))
 383                                xtensa_pmu_stop(event, 0);
 384                }
 385
 386                rc = IRQ_HANDLED;
 387        }
 388        return rc;
 389}
 390
 391static struct pmu xtensa_pmu = {
 392        .pmu_enable = xtensa_pmu_enable,
 393        .pmu_disable = xtensa_pmu_disable,
 394        .event_init = xtensa_pmu_event_init,
 395        .add = xtensa_pmu_add,
 396        .del = xtensa_pmu_del,
 397        .start = xtensa_pmu_start,
 398        .stop = xtensa_pmu_stop,
 399        .read = xtensa_pmu_read,
 400};
 401
 402static int xtensa_pmu_setup(unsigned int cpu)
 403{
 404        unsigned i;
 405
 406        set_er(0, XTENSA_PMU_PMG);
 407        for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) {
 408                set_er(0, XTENSA_PMU_PMCTRL(i));
 409                set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i));
 410        }
 411        return 0;
 412}
 413
 414static int __init xtensa_pmu_init(void)
 415{
 416        int ret;
 417        int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
 418
 419        ret = cpuhp_setup_state(CPUHP_AP_PERF_XTENSA_STARTING,
 420                                "perf/xtensa:starting", xtensa_pmu_setup,
 421                                NULL);
 422        if (ret) {
 423                pr_err("xtensa_pmu: failed to register CPU-hotplug.\n");
 424                return ret;
 425        }
 426#if XTENSA_FAKE_NMI
 427        enable_irq(irq);
 428#else
 429        ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
 430                          "pmu", NULL);
 431        if (ret < 0)
 432                return ret;
 433#endif
 434
 435        ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
 436        if (ret)
 437                free_irq(irq, NULL);
 438
 439        return ret;
 440}
 441early_initcall(xtensa_pmu_init);
 442