linux/drivers/perf/qcom_l3_pmu.c
<<
>>
Prefs
   1/*
   2 * Driver for the L3 cache PMUs in Qualcomm Technologies chips.
   3 *
   4 * The driver supports a distributed cache architecture where the overall
   5 * cache for a socket is comprised of multiple slices each with its own PMU.
   6 * Access to each individual PMU is provided even though all CPUs share all
   7 * the slices. User space needs to aggregate to individual counts to provide
   8 * a global picture.
   9 *
  10 * See Documentation/perf/qcom_l3_pmu.txt for more details.
  11 *
  12 * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved.
  13 *
  14 * This program is free software; you can redistribute it and/or modify
  15 * it under the terms of the GNU General Public License version 2 and
  16 * only version 2 as published by the Free Software Foundation.
  17 *
  18 * This program is distributed in the hope that it will be useful,
  19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21 * GNU General Public License for more details.
  22 */
  23
  24#include <linux/acpi.h>
  25#include <linux/bitops.h>
  26#include <linux/interrupt.h>
  27#include <linux/io.h>
  28#include <linux/list.h>
  29#include <linux/module.h>
  30#include <linux/perf_event.h>
  31#include <linux/platform_device.h>
  32
  33/*
  34 * General constants
  35 */
  36
  37/* Number of counters on each PMU */
  38#define L3_NUM_COUNTERS  8
  39/* Mask for the event type field within perf_event_attr.config and EVTYPE reg */
  40#define L3_EVTYPE_MASK   0xFF
  41/*
  42 * Bit position of the 'long counter' flag within perf_event_attr.config.
  43 * Reserve some space between the event type and this flag to allow expansion
  44 * in the event type field.
  45 */
  46#define L3_EVENT_LC_BIT  32
  47
  48/*
  49 * Register offsets
  50 */
  51
  52/* Perfmon registers */
  53#define L3_HML3_PM_CR       0x000
  54#define L3_HML3_PM_EVCNTR(__cntr) (0x420 + ((__cntr) & 0x7) * 8)
  55#define L3_HML3_PM_CNTCTL(__cntr) (0x120 + ((__cntr) & 0x7) * 8)
  56#define L3_HML3_PM_EVTYPE(__cntr) (0x220 + ((__cntr) & 0x7) * 8)
  57#define L3_HML3_PM_FILTRA   0x300
  58#define L3_HML3_PM_FILTRB   0x308
  59#define L3_HML3_PM_FILTRC   0x310
  60#define L3_HML3_PM_FILTRAM  0x304
  61#define L3_HML3_PM_FILTRBM  0x30C
  62#define L3_HML3_PM_FILTRCM  0x314
  63
  64/* Basic counter registers */
  65#define L3_M_BC_CR         0x500
  66#define L3_M_BC_SATROLL_CR 0x504
  67#define L3_M_BC_CNTENSET   0x508
  68#define L3_M_BC_CNTENCLR   0x50C
  69#define L3_M_BC_INTENSET   0x510
  70#define L3_M_BC_INTENCLR   0x514
  71#define L3_M_BC_GANG       0x718
  72#define L3_M_BC_OVSR       0x740
  73#define L3_M_BC_IRQCTL     0x96C
  74
  75/*
  76 * Bit field definitions
  77 */
  78
  79/* L3_HML3_PM_CR */
  80#define PM_CR_RESET           (0)
  81
  82/* L3_HML3_PM_XCNTCTL/L3_HML3_PM_CNTCTLx */
  83#define PMCNT_RESET           (0)
  84
  85/* L3_HML3_PM_EVTYPEx */
  86#define EVSEL(__val)          ((__val) & L3_EVTYPE_MASK)
  87
  88/* Reset value for all the filter registers */
  89#define PM_FLTR_RESET         (0)
  90
  91/* L3_M_BC_CR */
  92#define BC_RESET              (1UL << 1)
  93#define BC_ENABLE             (1UL << 0)
  94
  95/* L3_M_BC_SATROLL_CR */
  96#define BC_SATROLL_CR_RESET   (0)
  97
  98/* L3_M_BC_CNTENSET */
  99#define PMCNTENSET(__cntr)    (1UL << ((__cntr) & 0x7))
 100
 101/* L3_M_BC_CNTENCLR */
 102#define PMCNTENCLR(__cntr)    (1UL << ((__cntr) & 0x7))
 103#define BC_CNTENCLR_RESET     (0xFF)
 104
 105/* L3_M_BC_INTENSET */
 106#define PMINTENSET(__cntr)    (1UL << ((__cntr) & 0x7))
 107
 108/* L3_M_BC_INTENCLR */
 109#define PMINTENCLR(__cntr)    (1UL << ((__cntr) & 0x7))
 110#define BC_INTENCLR_RESET     (0xFF)
 111
 112/* L3_M_BC_GANG */
 113#define GANG_EN(__cntr)       (1UL << ((__cntr) & 0x7))
 114#define BC_GANG_RESET         (0)
 115
 116/* L3_M_BC_OVSR */
 117#define PMOVSRCLR(__cntr)     (1UL << ((__cntr) & 0x7))
 118#define PMOVSRCLR_RESET       (0xFF)
 119
 120/* L3_M_BC_IRQCTL */
 121#define PMIRQONMSBEN(__cntr)  (1UL << ((__cntr) & 0x7))
 122#define BC_IRQCTL_RESET       (0x0)
 123
 124/*
 125 * Events
 126 */
 127
 128#define L3_EVENT_CYCLES         0x01
 129#define L3_EVENT_READ_HIT               0x20
 130#define L3_EVENT_READ_MISS              0x21
 131#define L3_EVENT_READ_HIT_D             0x22
 132#define L3_EVENT_READ_MISS_D            0x23
 133#define L3_EVENT_WRITE_HIT              0x24
 134#define L3_EVENT_WRITE_MISS             0x25
 135
 136/*
 137 * Decoding of settings from perf_event_attr
 138 *
 139 * The config format for perf events is:
 140 * - config: bits 0-7: event type
 141 *           bit  32:  HW counter size requested, 0: 32 bits, 1: 64 bits
 142 */
 143
 144static inline u32 get_event_type(struct perf_event *event)
 145{
 146        return (event->attr.config) & L3_EVTYPE_MASK;
 147}
 148
 149static inline bool event_uses_long_counter(struct perf_event *event)
 150{
 151        return !!(event->attr.config & BIT_ULL(L3_EVENT_LC_BIT));
 152}
 153
 154static inline int event_num_counters(struct perf_event *event)
 155{
 156        return event_uses_long_counter(event) ? 2 : 1;
 157}
 158
 159/*
 160 * Main PMU, inherits from the core perf PMU type
 161 */
 162struct l3cache_pmu {
 163        struct pmu              pmu;
 164        struct hlist_node       node;
 165        void __iomem            *regs;
 166        struct perf_event       *events[L3_NUM_COUNTERS];
 167        unsigned long           used_mask[BITS_TO_LONGS(L3_NUM_COUNTERS)];
 168        cpumask_t               cpumask;
 169};
 170
 171#define to_l3cache_pmu(p) (container_of(p, struct l3cache_pmu, pmu))
 172
 173/*
 174 * Type used to group hardware counter operations
 175 *
 176 * Used to implement two types of hardware counters, standard (32bits) and
 177 * long (64bits). The hardware supports counter chaining which we use to
 178 * implement long counters. This support is exposed via the 'lc' flag field
 179 * in perf_event_attr.config.
 180 */
 181struct l3cache_event_ops {
 182        /* Called to start event monitoring */
 183        void (*start)(struct perf_event *event);
 184        /* Called to stop event monitoring */
 185        void (*stop)(struct perf_event *event, int flags);
 186        /* Called to update the perf_event */
 187        void (*update)(struct perf_event *event);
 188};
 189
 190/*
 191 * Implementation of long counter operations
 192 *
 193 * 64bit counters are implemented by chaining two of the 32bit physical
 194 * counters. The PMU only supports chaining of adjacent even/odd pairs
 195 * and for simplicity the driver always configures the odd counter to
 196 * count the overflows of the lower-numbered even counter. Note that since
 197 * the resulting hardware counter is 64bits no IRQs are required to maintain
 198 * the software counter which is also 64bits.
 199 */
 200
 201static void qcom_l3_cache__64bit_counter_start(struct perf_event *event)
 202{
 203        struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
 204        int idx = event->hw.idx;
 205        u32 evsel = get_event_type(event);
 206        u32 gang;
 207
 208        /* Set the odd counter to count the overflows of the even counter */
 209        gang = readl_relaxed(l3pmu->regs + L3_M_BC_GANG);
 210        gang |= GANG_EN(idx + 1);
 211        writel_relaxed(gang, l3pmu->regs + L3_M_BC_GANG);
 212
 213        /* Initialize the hardware counters and reset prev_count*/
 214        local64_set(&event->hw.prev_count, 0);
 215        writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1));
 216        writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
 217
 218        /*
 219         * Set the event types, the upper half must use zero and the lower
 220         * half the actual event type
 221         */
 222        writel_relaxed(EVSEL(0), l3pmu->regs + L3_HML3_PM_EVTYPE(idx + 1));
 223        writel_relaxed(EVSEL(evsel), l3pmu->regs + L3_HML3_PM_EVTYPE(idx));
 224
 225        /* Finally, enable the counters */
 226        writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx + 1));
 227        writel_relaxed(PMCNTENSET(idx + 1), l3pmu->regs + L3_M_BC_CNTENSET);
 228        writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx));
 229        writel_relaxed(PMCNTENSET(idx), l3pmu->regs + L3_M_BC_CNTENSET);
 230}
 231
 232static void qcom_l3_cache__64bit_counter_stop(struct perf_event *event,
 233                                              int flags)
 234{
 235        struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
 236        int idx = event->hw.idx;
 237        u32 gang = readl_relaxed(l3pmu->regs + L3_M_BC_GANG);
 238
 239        /* Disable the counters */
 240        writel_relaxed(PMCNTENCLR(idx), l3pmu->regs + L3_M_BC_CNTENCLR);
 241        writel_relaxed(PMCNTENCLR(idx + 1), l3pmu->regs + L3_M_BC_CNTENCLR);
 242
 243        /* Disable chaining */
 244        writel_relaxed(gang & ~GANG_EN(idx + 1), l3pmu->regs + L3_M_BC_GANG);
 245}
 246
 247static void qcom_l3_cache__64bit_counter_update(struct perf_event *event)
 248{
 249        struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
 250        int idx = event->hw.idx;
 251        u32 hi, lo;
 252        u64 prev, new;
 253
 254        do {
 255                prev = local64_read(&event->hw.prev_count);
 256                do {
 257                        hi = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1));
 258                        lo = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
 259                } while (hi != readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1)));
 260                new = ((u64)hi << 32) | lo;
 261        } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
 262
 263        local64_add(new - prev, &event->count);
 264}
 265
 266static const struct l3cache_event_ops event_ops_long = {
 267        .start = qcom_l3_cache__64bit_counter_start,
 268        .stop = qcom_l3_cache__64bit_counter_stop,
 269        .update = qcom_l3_cache__64bit_counter_update,
 270};
 271
 272/*
 273 * Implementation of standard counter operations
 274 *
 275 * 32bit counters use a single physical counter and a hardware feature that
 276 * asserts the overflow IRQ on the toggling of the most significant bit in
 277 * the counter. This feature allows the counters to be left free-running
 278 * without needing the usual reprogramming required to properly handle races
 279 * during concurrent calls to update.
 280 */
 281
 282static void qcom_l3_cache__32bit_counter_start(struct perf_event *event)
 283{
 284        struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
 285        int idx = event->hw.idx;
 286        u32 evsel = get_event_type(event);
 287        u32 irqctl = readl_relaxed(l3pmu->regs + L3_M_BC_IRQCTL);
 288
 289        /* Set the counter to assert the overflow IRQ on MSB toggling */
 290        writel_relaxed(irqctl | PMIRQONMSBEN(idx), l3pmu->regs + L3_M_BC_IRQCTL);
 291
 292        /* Initialize the hardware counter and reset prev_count*/
 293        local64_set(&event->hw.prev_count, 0);
 294        writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
 295
 296        /* Set the event type */
 297        writel_relaxed(EVSEL(evsel), l3pmu->regs + L3_HML3_PM_EVTYPE(idx));
 298
 299        /* Enable interrupt generation by this counter */
 300        writel_relaxed(PMINTENSET(idx), l3pmu->regs + L3_M_BC_INTENSET);
 301
 302        /* Finally, enable the counter */
 303        writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx));
 304        writel_relaxed(PMCNTENSET(idx), l3pmu->regs + L3_M_BC_CNTENSET);
 305}
 306
 307static void qcom_l3_cache__32bit_counter_stop(struct perf_event *event,
 308                                              int flags)
 309{
 310        struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
 311        int idx = event->hw.idx;
 312        u32 irqctl = readl_relaxed(l3pmu->regs + L3_M_BC_IRQCTL);
 313
 314        /* Disable the counter */
 315        writel_relaxed(PMCNTENCLR(idx), l3pmu->regs + L3_M_BC_CNTENCLR);
 316
 317        /* Disable interrupt generation by this counter */
 318        writel_relaxed(PMINTENCLR(idx), l3pmu->regs + L3_M_BC_INTENCLR);
 319
 320        /* Set the counter to not assert the overflow IRQ on MSB toggling */
 321        writel_relaxed(irqctl & ~PMIRQONMSBEN(idx), l3pmu->regs + L3_M_BC_IRQCTL);
 322}
 323
 324static void qcom_l3_cache__32bit_counter_update(struct perf_event *event)
 325{
 326        struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
 327        int idx = event->hw.idx;
 328        u32 prev, new;
 329
 330        do {
 331                prev = local64_read(&event->hw.prev_count);
 332                new = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
 333        } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
 334
 335        local64_add(new - prev, &event->count);
 336}
 337
 338static const struct l3cache_event_ops event_ops_std = {
 339        .start = qcom_l3_cache__32bit_counter_start,
 340        .stop = qcom_l3_cache__32bit_counter_stop,
 341        .update = qcom_l3_cache__32bit_counter_update,
 342};
 343
 344/* Retrieve the appropriate operations for the given event */
 345static
 346const struct l3cache_event_ops *l3cache_event_get_ops(struct perf_event *event)
 347{
 348        if (event_uses_long_counter(event))
 349                return &event_ops_long;
 350        else
 351                return &event_ops_std;
 352}
 353
 354/*
 355 * Top level PMU functions.
 356 */
 357
 358static inline void qcom_l3_cache__init(struct l3cache_pmu *l3pmu)
 359{
 360        int i;
 361
 362        writel_relaxed(BC_RESET, l3pmu->regs + L3_M_BC_CR);
 363
 364        /*
 365         * Use writel for the first programming command to ensure the basic
 366         * counter unit is stopped before proceeding
 367         */
 368        writel(BC_SATROLL_CR_RESET, l3pmu->regs + L3_M_BC_SATROLL_CR);
 369
 370        writel_relaxed(BC_CNTENCLR_RESET, l3pmu->regs + L3_M_BC_CNTENCLR);
 371        writel_relaxed(BC_INTENCLR_RESET, l3pmu->regs + L3_M_BC_INTENCLR);
 372        writel_relaxed(PMOVSRCLR_RESET, l3pmu->regs + L3_M_BC_OVSR);
 373        writel_relaxed(BC_GANG_RESET, l3pmu->regs + L3_M_BC_GANG);
 374        writel_relaxed(BC_IRQCTL_RESET, l3pmu->regs + L3_M_BC_IRQCTL);
 375        writel_relaxed(PM_CR_RESET, l3pmu->regs + L3_HML3_PM_CR);
 376
 377        for (i = 0; i < L3_NUM_COUNTERS; ++i) {
 378                writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(i));
 379                writel_relaxed(EVSEL(0), l3pmu->regs + L3_HML3_PM_EVTYPE(i));
 380        }
 381
 382        writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRA);
 383        writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRAM);
 384        writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRB);
 385        writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRBM);
 386        writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRC);
 387        writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRCM);
 388
 389        /*
 390         * Use writel here to ensure all programming commands are done
 391         *  before proceeding
 392         */
 393        writel(BC_ENABLE, l3pmu->regs + L3_M_BC_CR);
 394}
 395
 396static irqreturn_t qcom_l3_cache__handle_irq(int irq_num, void *data)
 397{
 398        struct l3cache_pmu *l3pmu = data;
 399        /* Read the overflow status register */
 400        long status = readl_relaxed(l3pmu->regs + L3_M_BC_OVSR);
 401        int idx;
 402
 403        if (status == 0)
 404                return IRQ_NONE;
 405
 406        /* Clear the bits we read on the overflow status register */
 407        writel_relaxed(status, l3pmu->regs + L3_M_BC_OVSR);
 408
 409        for_each_set_bit(idx, &status, L3_NUM_COUNTERS) {
 410                struct perf_event *event;
 411                const struct l3cache_event_ops *ops;
 412
 413                event = l3pmu->events[idx];
 414                if (!event)
 415                        continue;
 416
 417                /*
 418                 * Since the IRQ is not enabled for events using long counters
 419                 * we should never see one of those here, however, be consistent
 420                 * and use the ops indirections like in the other operations.
 421                 */
 422
 423                ops = l3cache_event_get_ops(event);
 424                ops->update(event);
 425        }
 426
 427        return IRQ_HANDLED;
 428}
 429
 430/*
 431 * Implementation of abstract pmu functionality required by
 432 * the core perf events code.
 433 */
 434
 435static void qcom_l3_cache__pmu_enable(struct pmu *pmu)
 436{
 437        struct l3cache_pmu *l3pmu = to_l3cache_pmu(pmu);
 438
 439        /* Ensure the other programming commands are observed before enabling */
 440        wmb();
 441
 442        writel_relaxed(BC_ENABLE, l3pmu->regs + L3_M_BC_CR);
 443}
 444
 445static void qcom_l3_cache__pmu_disable(struct pmu *pmu)
 446{
 447        struct l3cache_pmu *l3pmu = to_l3cache_pmu(pmu);
 448
 449        writel_relaxed(0, l3pmu->regs + L3_M_BC_CR);
 450
 451        /* Ensure the basic counter unit is stopped before proceeding */
 452        wmb();
 453}
 454
 455/*
 456 * We must NOT create groups containing events from multiple hardware PMUs,
 457 * although mixing different software and hardware PMUs is allowed.
 458 */
 459static bool qcom_l3_cache__validate_event_group(struct perf_event *event)
 460{
 461        struct perf_event *leader = event->group_leader;
 462        struct perf_event *sibling;
 463        int counters = 0;
 464
 465        if (leader->pmu != event->pmu && !is_software_event(leader))
 466                return false;
 467
 468        counters = event_num_counters(event);
 469        counters += event_num_counters(leader);
 470
 471        for_each_sibling_event(sibling, leader) {
 472                if (is_software_event(sibling))
 473                        continue;
 474                if (sibling->pmu != event->pmu)
 475                        return false;
 476                counters += event_num_counters(sibling);
 477        }
 478
 479        /*
 480         * If the group requires more counters than the HW has, it
 481         * cannot ever be scheduled.
 482         */
 483        return counters <= L3_NUM_COUNTERS;
 484}
 485
 486static int qcom_l3_cache__event_init(struct perf_event *event)
 487{
 488        struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
 489        struct hw_perf_event *hwc = &event->hw;
 490
 491        /*
 492         * Is the event for this PMU?
 493         */
 494        if (event->attr.type != event->pmu->type)
 495                return -ENOENT;
 496
 497        /*
 498         * There are no per-counter mode filters in the PMU.
 499         */
 500        if (event->attr.exclude_user || event->attr.exclude_kernel ||
 501            event->attr.exclude_hv || event->attr.exclude_idle)
 502                return -EINVAL;
 503
 504        /*
 505         * Sampling not supported since these events are not core-attributable.
 506         */
 507        if (hwc->sample_period)
 508                return -EINVAL;
 509
 510        /*
 511         * Task mode not available, we run the counters as socket counters,
 512         * not attributable to any CPU and therefore cannot attribute per-task.
 513         */
 514        if (event->cpu < 0)
 515                return -EINVAL;
 516
 517        /* Validate the group */
 518        if (!qcom_l3_cache__validate_event_group(event))
 519                return -EINVAL;
 520
 521        hwc->idx = -1;
 522
 523        /*
 524         * Many perf core operations (eg. events rotation) operate on a
 525         * single CPU context. This is obvious for CPU PMUs, where one
 526         * expects the same sets of events being observed on all CPUs,
 527         * but can lead to issues for off-core PMUs, like this one, where
 528         * each event could be theoretically assigned to a different CPU.
 529         * To mitigate this, we enforce CPU assignment to one designated
 530         * processor (the one described in the "cpumask" attribute exported
 531         * by the PMU device). perf user space tools honor this and avoid
 532         * opening more than one copy of the events.
 533         */
 534        event->cpu = cpumask_first(&l3pmu->cpumask);
 535
 536        return 0;
 537}
 538
 539static void qcom_l3_cache__event_start(struct perf_event *event, int flags)
 540{
 541        struct hw_perf_event *hwc = &event->hw;
 542        const struct l3cache_event_ops *ops = l3cache_event_get_ops(event);
 543
 544        hwc->state = 0;
 545        ops->start(event);
 546}
 547
 548static void qcom_l3_cache__event_stop(struct perf_event *event, int flags)
 549{
 550        struct hw_perf_event *hwc = &event->hw;
 551        const struct l3cache_event_ops *ops = l3cache_event_get_ops(event);
 552
 553        if (hwc->state & PERF_HES_STOPPED)
 554                return;
 555
 556        ops->stop(event, flags);
 557        if (flags & PERF_EF_UPDATE)
 558                ops->update(event);
 559        hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
 560}
 561
 562static int qcom_l3_cache__event_add(struct perf_event *event, int flags)
 563{
 564        struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
 565        struct hw_perf_event *hwc = &event->hw;
 566        int order = event_uses_long_counter(event) ? 1 : 0;
 567        int idx;
 568
 569        /*
 570         * Try to allocate a counter.
 571         */
 572        idx = bitmap_find_free_region(l3pmu->used_mask, L3_NUM_COUNTERS, order);
 573        if (idx < 0)
 574                /* The counters are all in use. */
 575                return -EAGAIN;
 576
 577        hwc->idx = idx;
 578        hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
 579        l3pmu->events[idx] = event;
 580
 581        if (flags & PERF_EF_START)
 582                qcom_l3_cache__event_start(event, 0);
 583
 584        /* Propagate changes to the userspace mapping. */
 585        perf_event_update_userpage(event);
 586
 587        return 0;
 588}
 589
 590static void qcom_l3_cache__event_del(struct perf_event *event, int flags)
 591{
 592        struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
 593        struct hw_perf_event *hwc = &event->hw;
 594        int order = event_uses_long_counter(event) ? 1 : 0;
 595
 596        /* Stop and clean up */
 597        qcom_l3_cache__event_stop(event,  flags | PERF_EF_UPDATE);
 598        l3pmu->events[hwc->idx] = NULL;
 599        bitmap_release_region(l3pmu->used_mask, hwc->idx, order);
 600
 601        /* Propagate changes to the userspace mapping. */
 602        perf_event_update_userpage(event);
 603}
 604
 605static void qcom_l3_cache__event_read(struct perf_event *event)
 606{
 607        const struct l3cache_event_ops *ops = l3cache_event_get_ops(event);
 608
 609        ops->update(event);
 610}
 611
 612/*
 613 * Add sysfs attributes
 614 *
 615 * We export:
 616 * - formats, used by perf user space and other tools to configure events
 617 * - events, used by perf user space and other tools to create events
 618 *   symbolically, e.g.:
 619 *     perf stat -a -e l3cache_0_0/event=read-miss/ ls
 620 *     perf stat -a -e l3cache_0_0/event=0x21/ ls
 621 * - cpumask, used by perf user space and other tools to know on which CPUs
 622 *   to open the events
 623 */
 624
 625/* formats */
 626
 627static ssize_t l3cache_pmu_format_show(struct device *dev,
 628                                       struct device_attribute *attr, char *buf)
 629{
 630        struct dev_ext_attribute *eattr;
 631
 632        eattr = container_of(attr, struct dev_ext_attribute, attr);
 633        return sprintf(buf, "%s\n", (char *) eattr->var);
 634}
 635
 636#define L3CACHE_PMU_FORMAT_ATTR(_name, _config)                               \
 637        (&((struct dev_ext_attribute[]) {                                     \
 638                { .attr = __ATTR(_name, 0444, l3cache_pmu_format_show, NULL), \
 639                  .var = (void *) _config, }                                  \
 640        })[0].attr.attr)
 641
 642static struct attribute *qcom_l3_cache_pmu_formats[] = {
 643        L3CACHE_PMU_FORMAT_ATTR(event, "config:0-7"),
 644        L3CACHE_PMU_FORMAT_ATTR(lc, "config:" __stringify(L3_EVENT_LC_BIT)),
 645        NULL,
 646};
 647
 648static struct attribute_group qcom_l3_cache_pmu_format_group = {
 649        .name = "format",
 650        .attrs = qcom_l3_cache_pmu_formats,
 651};
 652
 653/* events */
 654
 655static ssize_t l3cache_pmu_event_show(struct device *dev,
 656                                     struct device_attribute *attr, char *page)
 657{
 658        struct perf_pmu_events_attr *pmu_attr;
 659
 660        pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
 661        return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
 662}
 663
 664#define L3CACHE_EVENT_ATTR(_name, _id)                                       \
 665        (&((struct perf_pmu_events_attr[]) {                                 \
 666                { .attr = __ATTR(_name, 0444, l3cache_pmu_event_show, NULL), \
 667                  .id = _id, }                                               \
 668        })[0].attr.attr)
 669
 670static struct attribute *qcom_l3_cache_pmu_events[] = {
 671        L3CACHE_EVENT_ATTR(cycles, L3_EVENT_CYCLES),
 672        L3CACHE_EVENT_ATTR(read-hit, L3_EVENT_READ_HIT),
 673        L3CACHE_EVENT_ATTR(read-miss, L3_EVENT_READ_MISS),
 674        L3CACHE_EVENT_ATTR(read-hit-d-side, L3_EVENT_READ_HIT_D),
 675        L3CACHE_EVENT_ATTR(read-miss-d-side, L3_EVENT_READ_MISS_D),
 676        L3CACHE_EVENT_ATTR(write-hit, L3_EVENT_WRITE_HIT),
 677        L3CACHE_EVENT_ATTR(write-miss, L3_EVENT_WRITE_MISS),
 678        NULL
 679};
 680
 681static struct attribute_group qcom_l3_cache_pmu_events_group = {
 682        .name = "events",
 683        .attrs = qcom_l3_cache_pmu_events,
 684};
 685
 686/* cpumask */
 687
 688static ssize_t qcom_l3_cache_pmu_cpumask_show(struct device *dev,
 689                                     struct device_attribute *attr, char *buf)
 690{
 691        struct l3cache_pmu *l3pmu = to_l3cache_pmu(dev_get_drvdata(dev));
 692
 693        return cpumap_print_to_pagebuf(true, buf, &l3pmu->cpumask);
 694}
 695
 696static DEVICE_ATTR(cpumask, 0444, qcom_l3_cache_pmu_cpumask_show, NULL);
 697
 698static struct attribute *qcom_l3_cache_pmu_cpumask_attrs[] = {
 699        &dev_attr_cpumask.attr,
 700        NULL,
 701};
 702
 703static struct attribute_group qcom_l3_cache_pmu_cpumask_attr_group = {
 704        .attrs = qcom_l3_cache_pmu_cpumask_attrs,
 705};
 706
 707/*
 708 * Per PMU device attribute groups
 709 */
 710static const struct attribute_group *qcom_l3_cache_pmu_attr_grps[] = {
 711        &qcom_l3_cache_pmu_format_group,
 712        &qcom_l3_cache_pmu_events_group,
 713        &qcom_l3_cache_pmu_cpumask_attr_group,
 714        NULL,
 715};
 716
 717/*
 718 * Probing functions and data.
 719 */
 720
 721static int qcom_l3_cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
 722{
 723        struct l3cache_pmu *l3pmu = hlist_entry_safe(node, struct l3cache_pmu, node);
 724
 725        /* If there is not a CPU/PMU association pick this CPU */
 726        if (cpumask_empty(&l3pmu->cpumask))
 727                cpumask_set_cpu(cpu, &l3pmu->cpumask);
 728
 729        return 0;
 730}
 731
 732static int qcom_l3_cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 733{
 734        struct l3cache_pmu *l3pmu = hlist_entry_safe(node, struct l3cache_pmu, node);
 735        unsigned int target;
 736
 737        if (!cpumask_test_and_clear_cpu(cpu, &l3pmu->cpumask))
 738                return 0;
 739        target = cpumask_any_but(cpu_online_mask, cpu);
 740        if (target >= nr_cpu_ids)
 741                return 0;
 742        perf_pmu_migrate_context(&l3pmu->pmu, cpu, target);
 743        cpumask_set_cpu(target, &l3pmu->cpumask);
 744        return 0;
 745}
 746
 747static int qcom_l3_cache_pmu_probe(struct platform_device *pdev)
 748{
 749        struct l3cache_pmu *l3pmu;
 750        struct acpi_device *acpi_dev;
 751        struct resource *memrc;
 752        int ret;
 753        char *name;
 754
 755        /* Initialize the PMU data structures */
 756
 757        acpi_dev = ACPI_COMPANION(&pdev->dev);
 758        if (!acpi_dev)
 759                return -ENODEV;
 760
 761        l3pmu = devm_kzalloc(&pdev->dev, sizeof(*l3pmu), GFP_KERNEL);
 762        name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "l3cache_%s_%s",
 763                      acpi_dev->parent->pnp.unique_id, acpi_dev->pnp.unique_id);
 764        if (!l3pmu || !name)
 765                return -ENOMEM;
 766
 767        l3pmu->pmu = (struct pmu) {
 768                .task_ctx_nr    = perf_invalid_context,
 769
 770                .pmu_enable     = qcom_l3_cache__pmu_enable,
 771                .pmu_disable    = qcom_l3_cache__pmu_disable,
 772                .event_init     = qcom_l3_cache__event_init,
 773                .add            = qcom_l3_cache__event_add,
 774                .del            = qcom_l3_cache__event_del,
 775                .start          = qcom_l3_cache__event_start,
 776                .stop           = qcom_l3_cache__event_stop,
 777                .read           = qcom_l3_cache__event_read,
 778
 779                .attr_groups    = qcom_l3_cache_pmu_attr_grps,
 780        };
 781
 782        memrc = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 783        l3pmu->regs = devm_ioremap_resource(&pdev->dev, memrc);
 784        if (IS_ERR(l3pmu->regs)) {
 785                dev_err(&pdev->dev, "Can't map PMU @%pa\n", &memrc->start);
 786                return PTR_ERR(l3pmu->regs);
 787        }
 788
 789        qcom_l3_cache__init(l3pmu);
 790
 791        ret = platform_get_irq(pdev, 0);
 792        if (ret <= 0)
 793                return ret;
 794
 795        ret = devm_request_irq(&pdev->dev, ret, qcom_l3_cache__handle_irq, 0,
 796                               name, l3pmu);
 797        if (ret) {
 798                dev_err(&pdev->dev, "Request for IRQ failed for slice @%pa\n",
 799                        &memrc->start);
 800                return ret;
 801        }
 802
 803        /* Add this instance to the list used by the offline callback */
 804        ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, &l3pmu->node);
 805        if (ret) {
 806                dev_err(&pdev->dev, "Error %d registering hotplug", ret);
 807                return ret;
 808        }
 809
 810        ret = perf_pmu_register(&l3pmu->pmu, name, -1);
 811        if (ret < 0) {
 812                dev_err(&pdev->dev, "Failed to register L3 cache PMU (%d)\n", ret);
 813                return ret;
 814        }
 815
 816        dev_info(&pdev->dev, "Registered %s, type: %d\n", name, l3pmu->pmu.type);
 817
 818        return 0;
 819}
 820
 821static const struct acpi_device_id qcom_l3_cache_pmu_acpi_match[] = {
 822        { "QCOM8081", },
 823        { }
 824};
 825MODULE_DEVICE_TABLE(acpi, qcom_l3_cache_pmu_acpi_match);
 826
 827static struct platform_driver qcom_l3_cache_pmu_driver = {
 828        .driver = {
 829                .name = "qcom-l3cache-pmu",
 830                .acpi_match_table = ACPI_PTR(qcom_l3_cache_pmu_acpi_match),
 831        },
 832        .probe = qcom_l3_cache_pmu_probe,
 833};
 834
 835static int __init register_qcom_l3_cache_pmu_driver(void)
 836{
 837        int ret;
 838
 839        /* Install a hook to update the reader CPU in case it goes offline */
 840        ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
 841                                      "perf/qcom/l3cache:online",
 842                                      qcom_l3_cache_pmu_online_cpu,
 843                                      qcom_l3_cache_pmu_offline_cpu);
 844        if (ret)
 845                return ret;
 846
 847        return platform_driver_register(&qcom_l3_cache_pmu_driver);
 848}
 849device_initcall(register_qcom_l3_cache_pmu_driver);
 850