linux/drivers/perf/qcom_l2_pmu.c
<<
>>
Prefs
   1/* Copyright (c) 2015-2017 The Linux Foundation. All rights reserved.
   2 *
   3 * This program is free software; you can redistribute it and/or modify
   4 * it under the terms of the GNU General Public License version 2 and
   5 * only version 2 as published by the Free Software Foundation.
   6 *
   7 * This program is distributed in the hope that it will be useful,
   8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
   9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  10 * GNU General Public License for more details.
  11 */
  12#include <linux/acpi.h>
  13#include <linux/bitops.h>
  14#include <linux/bug.h>
  15#include <linux/cpuhotplug.h>
  16#include <linux/cpumask.h>
  17#include <linux/device.h>
  18#include <linux/errno.h>
  19#include <linux/interrupt.h>
  20#include <linux/irq.h>
  21#include <linux/kernel.h>
  22#include <linux/list.h>
  23#include <linux/percpu.h>
  24#include <linux/perf_event.h>
  25#include <linux/platform_device.h>
  26#include <linux/smp.h>
  27#include <linux/spinlock.h>
  28#include <linux/sysfs.h>
  29#include <linux/types.h>
  30
  31#include <asm/barrier.h>
  32#include <asm/local64.h>
  33#include <asm/sysreg.h>
  34
  35#define MAX_L2_CTRS             9
  36
  37#define L2PMCR_NUM_EV_SHIFT     11
  38#define L2PMCR_NUM_EV_MASK      0x1F
  39
  40#define L2PMCR                  0x400
  41#define L2PMCNTENCLR            0x403
  42#define L2PMCNTENSET            0x404
  43#define L2PMINTENCLR            0x405
  44#define L2PMINTENSET            0x406
  45#define L2PMOVSCLR              0x407
  46#define L2PMOVSSET              0x408
  47#define L2PMCCNTCR              0x409
  48#define L2PMCCNTR               0x40A
  49#define L2PMCCNTSR              0x40C
  50#define L2PMRESR                0x410
  51#define IA_L2PMXEVCNTCR_BASE    0x420
  52#define IA_L2PMXEVCNTR_BASE     0x421
  53#define IA_L2PMXEVFILTER_BASE   0x423
  54#define IA_L2PMXEVTYPER_BASE    0x424
  55
  56#define IA_L2_REG_OFFSET        0x10
  57
  58#define L2PMXEVFILTER_SUFILTER_ALL      0x000E0000
  59#define L2PMXEVFILTER_ORGFILTER_IDINDEP 0x00000004
  60#define L2PMXEVFILTER_ORGFILTER_ALL     0x00000003
  61
  62#define L2EVTYPER_REG_SHIFT     3
  63
  64#define L2PMRESR_GROUP_BITS     8
  65#define L2PMRESR_GROUP_MASK     GENMASK(7, 0)
  66
  67#define L2CYCLE_CTR_BIT         31
  68#define L2CYCLE_CTR_RAW_CODE    0xFE
  69
  70#define L2PMCR_RESET_ALL        0x6
  71#define L2PMCR_COUNTERS_ENABLE  0x1
  72#define L2PMCR_COUNTERS_DISABLE 0x0
  73
  74#define L2PMRESR_EN             BIT_ULL(63)
  75
  76#define L2_EVT_MASK             0x00000FFF
  77#define L2_EVT_CODE_MASK        0x00000FF0
  78#define L2_EVT_GRP_MASK         0x0000000F
  79#define L2_EVT_CODE_SHIFT       4
  80#define L2_EVT_GRP_SHIFT        0
  81
  82#define L2_EVT_CODE(event)   (((event) & L2_EVT_CODE_MASK) >> L2_EVT_CODE_SHIFT)
  83#define L2_EVT_GROUP(event)  (((event) & L2_EVT_GRP_MASK) >> L2_EVT_GRP_SHIFT)
  84
  85#define L2_EVT_GROUP_MAX        7
  86
  87#define L2_COUNTER_RELOAD       BIT_ULL(31)
  88#define L2_CYCLE_COUNTER_RELOAD BIT_ULL(63)
  89
  90#define L2CPUSRSELR_EL1         sys_reg(3, 3, 15, 0, 6)
  91#define L2CPUSRDR_EL1           sys_reg(3, 3, 15, 0, 7)
  92
  93#define reg_idx(reg, i)         (((i) * IA_L2_REG_OFFSET) + reg##_BASE)
  94
  95/*
  96 * Events
  97 */
  98#define L2_EVENT_CYCLES                    0xfe
  99#define L2_EVENT_DCACHE_OPS                0x400
 100#define L2_EVENT_ICACHE_OPS                0x401
 101#define L2_EVENT_TLBI                      0x402
 102#define L2_EVENT_BARRIERS                  0x403
 103#define L2_EVENT_TOTAL_READS               0x405
 104#define L2_EVENT_TOTAL_WRITES              0x406
 105#define L2_EVENT_TOTAL_REQUESTS            0x407
 106#define L2_EVENT_LDREX                     0x420
 107#define L2_EVENT_STREX                     0x421
 108#define L2_EVENT_CLREX                     0x422
 109
 110static DEFINE_RAW_SPINLOCK(l2_access_lock);
 111
 112/**
 113 * set_l2_indirect_reg: write value to an L2 register
 114 * @reg: Address of L2 register.
 115 * @value: Value to be written to register.
 116 *
 117 * Use architecturally required barriers for ordering between system register
 118 * accesses
 119 */
 120static void set_l2_indirect_reg(u64 reg, u64 val)
 121{
 122        unsigned long flags;
 123
 124        raw_spin_lock_irqsave(&l2_access_lock, flags);
 125        write_sysreg_s(reg, L2CPUSRSELR_EL1);
 126        isb();
 127        write_sysreg_s(val, L2CPUSRDR_EL1);
 128        isb();
 129        raw_spin_unlock_irqrestore(&l2_access_lock, flags);
 130}
 131
 132/**
 133 * get_l2_indirect_reg: read an L2 register value
 134 * @reg: Address of L2 register.
 135 *
 136 * Use architecturally required barriers for ordering between system register
 137 * accesses
 138 */
 139static u64 get_l2_indirect_reg(u64 reg)
 140{
 141        u64 val;
 142        unsigned long flags;
 143
 144        raw_spin_lock_irqsave(&l2_access_lock, flags);
 145        write_sysreg_s(reg, L2CPUSRSELR_EL1);
 146        isb();
 147        val = read_sysreg_s(L2CPUSRDR_EL1);
 148        raw_spin_unlock_irqrestore(&l2_access_lock, flags);
 149
 150        return val;
 151}
 152
 153struct cluster_pmu;
 154
 155/*
 156 * Aggregate PMU. Implements the core pmu functions and manages
 157 * the hardware PMUs.
 158 */
 159struct l2cache_pmu {
 160        struct hlist_node node;
 161        u32 num_pmus;
 162        struct pmu pmu;
 163        int num_counters;
 164        cpumask_t cpumask;
 165        struct platform_device *pdev;
 166        struct cluster_pmu * __percpu *pmu_cluster;
 167        struct list_head clusters;
 168};
 169
 170/*
 171 * The cache is made up of one or more clusters, each cluster has its own PMU.
 172 * Each cluster is associated with one or more CPUs.
 173 * This structure represents one of the hardware PMUs.
 174 *
 175 * Events can be envisioned as a 2-dimensional array. Each column represents
 176 * a group of events. There are 8 groups. Only one entry from each
 177 * group can be in use at a time.
 178 *
 179 * Events are specified as 0xCCG, where CC is 2 hex digits specifying
 180 * the code (array row) and G specifies the group (column).
 181 *
 182 * In addition there is a cycle counter event specified by L2CYCLE_CTR_RAW_CODE
 183 * which is outside the above scheme.
 184 */
 185struct cluster_pmu {
 186        struct list_head next;
 187        struct perf_event *events[MAX_L2_CTRS];
 188        struct l2cache_pmu *l2cache_pmu;
 189        DECLARE_BITMAP(used_counters, MAX_L2_CTRS);
 190        DECLARE_BITMAP(used_groups, L2_EVT_GROUP_MAX + 1);
 191        int irq;
 192        int cluster_id;
 193        /* The CPU that is used for collecting events on this cluster */
 194        int on_cpu;
 195        /* All the CPUs associated with this cluster */
 196        cpumask_t cluster_cpus;
 197        spinlock_t pmu_lock;
 198};
 199
 200#define to_l2cache_pmu(p) (container_of(p, struct l2cache_pmu, pmu))
 201
 202static u32 l2_cycle_ctr_idx;
 203static u32 l2_counter_present_mask;
 204
 205static inline u32 idx_to_reg_bit(u32 idx)
 206{
 207        if (idx == l2_cycle_ctr_idx)
 208                return BIT(L2CYCLE_CTR_BIT);
 209
 210        return BIT(idx);
 211}
 212
 213static inline struct cluster_pmu *get_cluster_pmu(
 214        struct l2cache_pmu *l2cache_pmu, int cpu)
 215{
 216        return *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu);
 217}
 218
 219static void cluster_pmu_reset(void)
 220{
 221        /* Reset all counters */
 222        set_l2_indirect_reg(L2PMCR, L2PMCR_RESET_ALL);
 223        set_l2_indirect_reg(L2PMCNTENCLR, l2_counter_present_mask);
 224        set_l2_indirect_reg(L2PMINTENCLR, l2_counter_present_mask);
 225        set_l2_indirect_reg(L2PMOVSCLR, l2_counter_present_mask);
 226}
 227
 228static inline void cluster_pmu_enable(void)
 229{
 230        set_l2_indirect_reg(L2PMCR, L2PMCR_COUNTERS_ENABLE);
 231}
 232
 233static inline void cluster_pmu_disable(void)
 234{
 235        set_l2_indirect_reg(L2PMCR, L2PMCR_COUNTERS_DISABLE);
 236}
 237
 238static inline void cluster_pmu_counter_set_value(u32 idx, u64 value)
 239{
 240        if (idx == l2_cycle_ctr_idx)
 241                set_l2_indirect_reg(L2PMCCNTR, value);
 242        else
 243                set_l2_indirect_reg(reg_idx(IA_L2PMXEVCNTR, idx), value);
 244}
 245
 246static inline u64 cluster_pmu_counter_get_value(u32 idx)
 247{
 248        u64 value;
 249
 250        if (idx == l2_cycle_ctr_idx)
 251                value = get_l2_indirect_reg(L2PMCCNTR);
 252        else
 253                value = get_l2_indirect_reg(reg_idx(IA_L2PMXEVCNTR, idx));
 254
 255        return value;
 256}
 257
 258static inline void cluster_pmu_counter_enable(u32 idx)
 259{
 260        set_l2_indirect_reg(L2PMCNTENSET, idx_to_reg_bit(idx));
 261}
 262
 263static inline void cluster_pmu_counter_disable(u32 idx)
 264{
 265        set_l2_indirect_reg(L2PMCNTENCLR, idx_to_reg_bit(idx));
 266}
 267
 268static inline void cluster_pmu_counter_enable_interrupt(u32 idx)
 269{
 270        set_l2_indirect_reg(L2PMINTENSET, idx_to_reg_bit(idx));
 271}
 272
 273static inline void cluster_pmu_counter_disable_interrupt(u32 idx)
 274{
 275        set_l2_indirect_reg(L2PMINTENCLR, idx_to_reg_bit(idx));
 276}
 277
 278static inline void cluster_pmu_set_evccntcr(u32 val)
 279{
 280        set_l2_indirect_reg(L2PMCCNTCR, val);
 281}
 282
 283static inline void cluster_pmu_set_evcntcr(u32 ctr, u32 val)
 284{
 285        set_l2_indirect_reg(reg_idx(IA_L2PMXEVCNTCR, ctr), val);
 286}
 287
 288static inline void cluster_pmu_set_evtyper(u32 ctr, u32 val)
 289{
 290        set_l2_indirect_reg(reg_idx(IA_L2PMXEVTYPER, ctr), val);
 291}
 292
 293static void cluster_pmu_set_resr(struct cluster_pmu *cluster,
 294                               u32 event_group, u32 event_cc)
 295{
 296        u64 field;
 297        u64 resr_val;
 298        u32 shift;
 299        unsigned long flags;
 300
 301        shift = L2PMRESR_GROUP_BITS * event_group;
 302        field = ((u64)(event_cc & L2PMRESR_GROUP_MASK) << shift);
 303
 304        spin_lock_irqsave(&cluster->pmu_lock, flags);
 305
 306        resr_val = get_l2_indirect_reg(L2PMRESR);
 307        resr_val &= ~(L2PMRESR_GROUP_MASK << shift);
 308        resr_val |= field;
 309        resr_val |= L2PMRESR_EN;
 310        set_l2_indirect_reg(L2PMRESR, resr_val);
 311
 312        spin_unlock_irqrestore(&cluster->pmu_lock, flags);
 313}
 314
 315/*
 316 * Hardware allows filtering of events based on the originating
 317 * CPU. Turn this off by setting filter bits to allow events from
 318 * all CPUS, subunits and ID independent events in this cluster.
 319 */
 320static inline void cluster_pmu_set_evfilter_sys_mode(u32 ctr)
 321{
 322        u32 val =  L2PMXEVFILTER_SUFILTER_ALL |
 323                   L2PMXEVFILTER_ORGFILTER_IDINDEP |
 324                   L2PMXEVFILTER_ORGFILTER_ALL;
 325
 326        set_l2_indirect_reg(reg_idx(IA_L2PMXEVFILTER, ctr), val);
 327}
 328
 329static inline u32 cluster_pmu_getreset_ovsr(void)
 330{
 331        u32 result = get_l2_indirect_reg(L2PMOVSSET);
 332
 333        set_l2_indirect_reg(L2PMOVSCLR, result);
 334        return result;
 335}
 336
 337static inline bool cluster_pmu_has_overflowed(u32 ovsr)
 338{
 339        return !!(ovsr & l2_counter_present_mask);
 340}
 341
 342static inline bool cluster_pmu_counter_has_overflowed(u32 ovsr, u32 idx)
 343{
 344        return !!(ovsr & idx_to_reg_bit(idx));
 345}
 346
 347static void l2_cache_event_update(struct perf_event *event)
 348{
 349        struct hw_perf_event *hwc = &event->hw;
 350        u64 delta, prev, now;
 351        u32 idx = hwc->idx;
 352
 353        do {
 354                prev = local64_read(&hwc->prev_count);
 355                now = cluster_pmu_counter_get_value(idx);
 356        } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
 357
 358        /*
 359         * The cycle counter is 64-bit, but all other counters are
 360         * 32-bit, and we must handle 32-bit overflow explicitly.
 361         */
 362        delta = now - prev;
 363        if (idx != l2_cycle_ctr_idx)
 364                delta &= 0xffffffff;
 365
 366        local64_add(delta, &event->count);
 367}
 368
 369static void l2_cache_cluster_set_period(struct cluster_pmu *cluster,
 370                                       struct hw_perf_event *hwc)
 371{
 372        u32 idx = hwc->idx;
 373        u64 new;
 374
 375        /*
 376         * We limit the max period to half the max counter value so
 377         * that even in the case of extreme interrupt latency the
 378         * counter will (hopefully) not wrap past its initial value.
 379         */
 380        if (idx == l2_cycle_ctr_idx)
 381                new = L2_CYCLE_COUNTER_RELOAD;
 382        else
 383                new = L2_COUNTER_RELOAD;
 384
 385        local64_set(&hwc->prev_count, new);
 386        cluster_pmu_counter_set_value(idx, new);
 387}
 388
 389static int l2_cache_get_event_idx(struct cluster_pmu *cluster,
 390                                   struct perf_event *event)
 391{
 392        struct hw_perf_event *hwc = &event->hw;
 393        int idx;
 394        int num_ctrs = cluster->l2cache_pmu->num_counters - 1;
 395        unsigned int group;
 396
 397        if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) {
 398                if (test_and_set_bit(l2_cycle_ctr_idx, cluster->used_counters))
 399                        return -EAGAIN;
 400
 401                return l2_cycle_ctr_idx;
 402        }
 403
 404        idx = find_first_zero_bit(cluster->used_counters, num_ctrs);
 405        if (idx == num_ctrs)
 406                /* The counters are all in use. */
 407                return -EAGAIN;
 408
 409        /*
 410         * Check for column exclusion: event column already in use by another
 411         * event. This is for events which are not in the same group.
 412         * Conflicting events in the same group are detected in event_init.
 413         */
 414        group = L2_EVT_GROUP(hwc->config_base);
 415        if (test_bit(group, cluster->used_groups))
 416                return -EAGAIN;
 417
 418        set_bit(idx, cluster->used_counters);
 419        set_bit(group, cluster->used_groups);
 420
 421        return idx;
 422}
 423
 424static void l2_cache_clear_event_idx(struct cluster_pmu *cluster,
 425                                      struct perf_event *event)
 426{
 427        struct hw_perf_event *hwc = &event->hw;
 428        int idx = hwc->idx;
 429
 430        clear_bit(idx, cluster->used_counters);
 431        if (hwc->config_base != L2CYCLE_CTR_RAW_CODE)
 432                clear_bit(L2_EVT_GROUP(hwc->config_base), cluster->used_groups);
 433}
 434
 435static irqreturn_t l2_cache_handle_irq(int irq_num, void *data)
 436{
 437        struct cluster_pmu *cluster = data;
 438        int num_counters = cluster->l2cache_pmu->num_counters;
 439        u32 ovsr;
 440        int idx;
 441
 442        ovsr = cluster_pmu_getreset_ovsr();
 443        if (!cluster_pmu_has_overflowed(ovsr))
 444                return IRQ_NONE;
 445
 446        for_each_set_bit(idx, cluster->used_counters, num_counters) {
 447                struct perf_event *event = cluster->events[idx];
 448                struct hw_perf_event *hwc;
 449
 450                if (WARN_ON_ONCE(!event))
 451                        continue;
 452
 453                if (!cluster_pmu_counter_has_overflowed(ovsr, idx))
 454                        continue;
 455
 456                l2_cache_event_update(event);
 457                hwc = &event->hw;
 458
 459                l2_cache_cluster_set_period(cluster, hwc);
 460        }
 461
 462        return IRQ_HANDLED;
 463}
 464
 465/*
 466 * Implementation of abstract pmu functionality required by
 467 * the core perf events code.
 468 */
 469
 470static void l2_cache_pmu_enable(struct pmu *pmu)
 471{
 472        /*
 473         * Although there is only one PMU (per socket) controlling multiple
 474         * physical PMUs (per cluster), because we do not support per-task mode
 475         * each event is associated with a CPU. Each event has pmu_enable
 476         * called on its CPU, so here it is only necessary to enable the
 477         * counters for the current CPU.
 478         */
 479
 480        cluster_pmu_enable();
 481}
 482
 483static void l2_cache_pmu_disable(struct pmu *pmu)
 484{
 485        cluster_pmu_disable();
 486}
 487
 488static int l2_cache_event_init(struct perf_event *event)
 489{
 490        struct hw_perf_event *hwc = &event->hw;
 491        struct cluster_pmu *cluster;
 492        struct perf_event *sibling;
 493        struct l2cache_pmu *l2cache_pmu;
 494
 495        if (event->attr.type != event->pmu->type)
 496                return -ENOENT;
 497
 498        l2cache_pmu = to_l2cache_pmu(event->pmu);
 499
 500        if (hwc->sample_period) {
 501                dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
 502                                    "Sampling not supported\n");
 503                return -EOPNOTSUPP;
 504        }
 505
 506        if (event->cpu < 0) {
 507                dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
 508                                    "Per-task mode not supported\n");
 509                return -EOPNOTSUPP;
 510        }
 511
 512        /* We cannot filter accurately so we just don't allow it. */
 513        if (event->attr.exclude_user || event->attr.exclude_kernel ||
 514            event->attr.exclude_hv || event->attr.exclude_idle) {
 515                dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
 516                                    "Can't exclude execution levels\n");
 517                return -EOPNOTSUPP;
 518        }
 519
 520        if (((L2_EVT_GROUP(event->attr.config) > L2_EVT_GROUP_MAX) ||
 521             ((event->attr.config & ~L2_EVT_MASK) != 0)) &&
 522            (event->attr.config != L2CYCLE_CTR_RAW_CODE)) {
 523                dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
 524                                    "Invalid config %llx\n",
 525                                    event->attr.config);
 526                return -EINVAL;
 527        }
 528
 529        /* Don't allow groups with mixed PMUs, except for s/w events */
 530        if (event->group_leader->pmu != event->pmu &&
 531            !is_software_event(event->group_leader)) {
 532                dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
 533                         "Can't create mixed PMU group\n");
 534                return -EINVAL;
 535        }
 536
 537        for_each_sibling_event(sibling, event->group_leader) {
 538                if (sibling->pmu != event->pmu &&
 539                    !is_software_event(sibling)) {
 540                        dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
 541                                 "Can't create mixed PMU group\n");
 542                        return -EINVAL;
 543                }
 544        }
 545
 546        cluster = get_cluster_pmu(l2cache_pmu, event->cpu);
 547        if (!cluster) {
 548                /* CPU has not been initialised */
 549                dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
 550                        "CPU%d not associated with L2 cluster\n", event->cpu);
 551                return -EINVAL;
 552        }
 553
 554        /* Ensure all events in a group are on the same cpu */
 555        if ((event->group_leader != event) &&
 556            (cluster->on_cpu != event->group_leader->cpu)) {
 557                dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
 558                         "Can't create group on CPUs %d and %d",
 559                         event->cpu, event->group_leader->cpu);
 560                return -EINVAL;
 561        }
 562
 563        if ((event != event->group_leader) &&
 564            !is_software_event(event->group_leader) &&
 565            (L2_EVT_GROUP(event->group_leader->attr.config) ==
 566             L2_EVT_GROUP(event->attr.config))) {
 567                dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
 568                         "Column exclusion: conflicting events %llx %llx\n",
 569                       event->group_leader->attr.config,
 570                       event->attr.config);
 571                return -EINVAL;
 572        }
 573
 574        for_each_sibling_event(sibling, event->group_leader) {
 575                if ((sibling != event) &&
 576                    !is_software_event(sibling) &&
 577                    (L2_EVT_GROUP(sibling->attr.config) ==
 578                     L2_EVT_GROUP(event->attr.config))) {
 579                        dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
 580                             "Column exclusion: conflicting events %llx %llx\n",
 581                                            sibling->attr.config,
 582                                            event->attr.config);
 583                        return -EINVAL;
 584                }
 585        }
 586
 587        hwc->idx = -1;
 588        hwc->config_base = event->attr.config;
 589
 590        /*
 591         * Ensure all events are on the same cpu so all events are in the
 592         * same cpu context, to avoid races on pmu_enable etc.
 593         */
 594        event->cpu = cluster->on_cpu;
 595
 596        return 0;
 597}
 598
 599static void l2_cache_event_start(struct perf_event *event, int flags)
 600{
 601        struct cluster_pmu *cluster;
 602        struct hw_perf_event *hwc = &event->hw;
 603        int idx = hwc->idx;
 604        u32 config;
 605        u32 event_cc, event_group;
 606
 607        hwc->state = 0;
 608
 609        cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu);
 610
 611        l2_cache_cluster_set_period(cluster, hwc);
 612
 613        if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) {
 614                cluster_pmu_set_evccntcr(0);
 615        } else {
 616                config = hwc->config_base;
 617                event_cc    = L2_EVT_CODE(config);
 618                event_group = L2_EVT_GROUP(config);
 619
 620                cluster_pmu_set_evcntcr(idx, 0);
 621                cluster_pmu_set_evtyper(idx, event_group);
 622                cluster_pmu_set_resr(cluster, event_group, event_cc);
 623                cluster_pmu_set_evfilter_sys_mode(idx);
 624        }
 625
 626        cluster_pmu_counter_enable_interrupt(idx);
 627        cluster_pmu_counter_enable(idx);
 628}
 629
 630static void l2_cache_event_stop(struct perf_event *event, int flags)
 631{
 632        struct hw_perf_event *hwc = &event->hw;
 633        int idx = hwc->idx;
 634
 635        if (hwc->state & PERF_HES_STOPPED)
 636                return;
 637
 638        cluster_pmu_counter_disable_interrupt(idx);
 639        cluster_pmu_counter_disable(idx);
 640
 641        if (flags & PERF_EF_UPDATE)
 642                l2_cache_event_update(event);
 643        hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
 644}
 645
 646static int l2_cache_event_add(struct perf_event *event, int flags)
 647{
 648        struct hw_perf_event *hwc = &event->hw;
 649        int idx;
 650        int err = 0;
 651        struct cluster_pmu *cluster;
 652
 653        cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu);
 654
 655        idx = l2_cache_get_event_idx(cluster, event);
 656        if (idx < 0)
 657                return idx;
 658
 659        hwc->idx = idx;
 660        hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
 661        cluster->events[idx] = event;
 662        local64_set(&hwc->prev_count, 0);
 663
 664        if (flags & PERF_EF_START)
 665                l2_cache_event_start(event, flags);
 666
 667        /* Propagate changes to the userspace mapping. */
 668        perf_event_update_userpage(event);
 669
 670        return err;
 671}
 672
 673static void l2_cache_event_del(struct perf_event *event, int flags)
 674{
 675        struct hw_perf_event *hwc = &event->hw;
 676        struct cluster_pmu *cluster;
 677        int idx = hwc->idx;
 678
 679        cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu);
 680
 681        l2_cache_event_stop(event, flags | PERF_EF_UPDATE);
 682        cluster->events[idx] = NULL;
 683        l2_cache_clear_event_idx(cluster, event);
 684
 685        perf_event_update_userpage(event);
 686}
 687
 688static void l2_cache_event_read(struct perf_event *event)
 689{
 690        l2_cache_event_update(event);
 691}
 692
 693static ssize_t l2_cache_pmu_cpumask_show(struct device *dev,
 694                                         struct device_attribute *attr,
 695                                         char *buf)
 696{
 697        struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(dev_get_drvdata(dev));
 698
 699        return cpumap_print_to_pagebuf(true, buf, &l2cache_pmu->cpumask);
 700}
 701
 702static struct device_attribute l2_cache_pmu_cpumask_attr =
 703                __ATTR(cpumask, S_IRUGO, l2_cache_pmu_cpumask_show, NULL);
 704
 705static struct attribute *l2_cache_pmu_cpumask_attrs[] = {
 706        &l2_cache_pmu_cpumask_attr.attr,
 707        NULL,
 708};
 709
 710static struct attribute_group l2_cache_pmu_cpumask_group = {
 711        .attrs = l2_cache_pmu_cpumask_attrs,
 712};
 713
 714/* CCG format for perf RAW codes. */
 715PMU_FORMAT_ATTR(l2_code,   "config:4-11");
 716PMU_FORMAT_ATTR(l2_group,  "config:0-3");
 717PMU_FORMAT_ATTR(event,     "config:0-11");
 718
 719static struct attribute *l2_cache_pmu_formats[] = {
 720        &format_attr_l2_code.attr,
 721        &format_attr_l2_group.attr,
 722        &format_attr_event.attr,
 723        NULL,
 724};
 725
 726static struct attribute_group l2_cache_pmu_format_group = {
 727        .name = "format",
 728        .attrs = l2_cache_pmu_formats,
 729};
 730
 731static ssize_t l2cache_pmu_event_show(struct device *dev,
 732                                      struct device_attribute *attr, char *page)
 733{
 734        struct perf_pmu_events_attr *pmu_attr;
 735
 736        pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
 737        return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
 738}
 739
 740#define L2CACHE_EVENT_ATTR(_name, _id)                                       \
 741        (&((struct perf_pmu_events_attr[]) {                                 \
 742                { .attr = __ATTR(_name, 0444, l2cache_pmu_event_show, NULL), \
 743                  .id = _id, }                                               \
 744        })[0].attr.attr)
 745
 746static struct attribute *l2_cache_pmu_events[] = {
 747        L2CACHE_EVENT_ATTR(cycles, L2_EVENT_CYCLES),
 748        L2CACHE_EVENT_ATTR(dcache-ops, L2_EVENT_DCACHE_OPS),
 749        L2CACHE_EVENT_ATTR(icache-ops, L2_EVENT_ICACHE_OPS),
 750        L2CACHE_EVENT_ATTR(tlbi, L2_EVENT_TLBI),
 751        L2CACHE_EVENT_ATTR(barriers, L2_EVENT_BARRIERS),
 752        L2CACHE_EVENT_ATTR(total-reads, L2_EVENT_TOTAL_READS),
 753        L2CACHE_EVENT_ATTR(total-writes, L2_EVENT_TOTAL_WRITES),
 754        L2CACHE_EVENT_ATTR(total-requests, L2_EVENT_TOTAL_REQUESTS),
 755        L2CACHE_EVENT_ATTR(ldrex, L2_EVENT_LDREX),
 756        L2CACHE_EVENT_ATTR(strex, L2_EVENT_STREX),
 757        L2CACHE_EVENT_ATTR(clrex, L2_EVENT_CLREX),
 758        NULL
 759};
 760
 761static struct attribute_group l2_cache_pmu_events_group = {
 762        .name = "events",
 763        .attrs = l2_cache_pmu_events,
 764};
 765
 766static const struct attribute_group *l2_cache_pmu_attr_grps[] = {
 767        &l2_cache_pmu_format_group,
 768        &l2_cache_pmu_cpumask_group,
 769        &l2_cache_pmu_events_group,
 770        NULL,
 771};
 772
 773/*
 774 * Generic device handlers
 775 */
 776
 777static const struct acpi_device_id l2_cache_pmu_acpi_match[] = {
 778        { "QCOM8130", },
 779        { }
 780};
 781
 782static int get_num_counters(void)
 783{
 784        int val;
 785
 786        val = get_l2_indirect_reg(L2PMCR);
 787
 788        /*
 789         * Read number of counters from L2PMCR and add 1
 790         * for the cycle counter.
 791         */
 792        return ((val >> L2PMCR_NUM_EV_SHIFT) & L2PMCR_NUM_EV_MASK) + 1;
 793}
 794
 795static struct cluster_pmu *l2_cache_associate_cpu_with_cluster(
 796        struct l2cache_pmu *l2cache_pmu, int cpu)
 797{
 798        u64 mpidr;
 799        int cpu_cluster_id;
 800        struct cluster_pmu *cluster = NULL;
 801
 802        /*
 803         * This assumes that the cluster_id is in MPIDR[aff1] for
 804         * single-threaded cores, and MPIDR[aff2] for multi-threaded
 805         * cores. This logic will have to be updated if this changes.
 806         */
 807        mpidr = read_cpuid_mpidr();
 808        if (mpidr & MPIDR_MT_BITMASK)
 809                cpu_cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
 810        else
 811                cpu_cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
 812
 813        list_for_each_entry(cluster, &l2cache_pmu->clusters, next) {
 814                if (cluster->cluster_id != cpu_cluster_id)
 815                        continue;
 816
 817                dev_info(&l2cache_pmu->pdev->dev,
 818                         "CPU%d associated with cluster %d\n", cpu,
 819                         cluster->cluster_id);
 820                cpumask_set_cpu(cpu, &cluster->cluster_cpus);
 821                *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu) = cluster;
 822                break;
 823        }
 824
 825        return cluster;
 826}
 827
 828static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
 829{
 830        struct cluster_pmu *cluster;
 831        struct l2cache_pmu *l2cache_pmu;
 832
 833        l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
 834        cluster = get_cluster_pmu(l2cache_pmu, cpu);
 835        if (!cluster) {
 836                /* First time this CPU has come online */
 837                cluster = l2_cache_associate_cpu_with_cluster(l2cache_pmu, cpu);
 838                if (!cluster) {
 839                        /* Only if broken firmware doesn't list every cluster */
 840                        WARN_ONCE(1, "No L2 cache cluster for CPU%d\n", cpu);
 841                        return 0;
 842                }
 843        }
 844
 845        /* If another CPU is managing this cluster, we're done */
 846        if (cluster->on_cpu != -1)
 847                return 0;
 848
 849        /*
 850         * All CPUs on this cluster were down, use this one.
 851         * Reset to put it into sane state.
 852         */
 853        cluster->on_cpu = cpu;
 854        cpumask_set_cpu(cpu, &l2cache_pmu->cpumask);
 855        cluster_pmu_reset();
 856
 857        WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(cpu)));
 858        enable_irq(cluster->irq);
 859
 860        return 0;
 861}
 862
 863static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 864{
 865        struct cluster_pmu *cluster;
 866        struct l2cache_pmu *l2cache_pmu;
 867        cpumask_t cluster_online_cpus;
 868        unsigned int target;
 869
 870        l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
 871        cluster = get_cluster_pmu(l2cache_pmu, cpu);
 872        if (!cluster)
 873                return 0;
 874
 875        /* If this CPU is not managing the cluster, we're done */
 876        if (cluster->on_cpu != cpu)
 877                return 0;
 878
 879        /* Give up ownership of cluster */
 880        cpumask_clear_cpu(cpu, &l2cache_pmu->cpumask);
 881        cluster->on_cpu = -1;
 882
 883        /* Any other CPU for this cluster which is still online */
 884        cpumask_and(&cluster_online_cpus, &cluster->cluster_cpus,
 885                    cpu_online_mask);
 886        target = cpumask_any_but(&cluster_online_cpus, cpu);
 887        if (target >= nr_cpu_ids) {
 888                disable_irq(cluster->irq);
 889                return 0;
 890        }
 891
 892        perf_pmu_migrate_context(&l2cache_pmu->pmu, cpu, target);
 893        cluster->on_cpu = target;
 894        cpumask_set_cpu(target, &l2cache_pmu->cpumask);
 895        WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(target)));
 896
 897        return 0;
 898}
 899
 900static int l2_cache_pmu_probe_cluster(struct device *dev, void *data)
 901{
 902        struct platform_device *pdev = to_platform_device(dev->parent);
 903        struct platform_device *sdev = to_platform_device(dev);
 904        struct l2cache_pmu *l2cache_pmu = data;
 905        struct cluster_pmu *cluster;
 906        struct acpi_device *device;
 907        unsigned long fw_cluster_id;
 908        int err;
 909        int irq;
 910
 911        if (acpi_bus_get_device(ACPI_HANDLE(dev), &device))
 912                return -ENODEV;
 913
 914        if (kstrtoul(device->pnp.unique_id, 10, &fw_cluster_id) < 0) {
 915                dev_err(&pdev->dev, "unable to read ACPI uid\n");
 916                return -ENODEV;
 917        }
 918
 919        cluster = devm_kzalloc(&pdev->dev, sizeof(*cluster), GFP_KERNEL);
 920        if (!cluster)
 921                return -ENOMEM;
 922
 923        INIT_LIST_HEAD(&cluster->next);
 924        list_add(&cluster->next, &l2cache_pmu->clusters);
 925        cluster->cluster_id = fw_cluster_id;
 926
 927        irq = platform_get_irq(sdev, 0);
 928        if (irq < 0) {
 929                dev_err(&pdev->dev,
 930                        "Failed to get valid irq for cluster %ld\n",
 931                        fw_cluster_id);
 932                return irq;
 933        }
 934        irq_set_status_flags(irq, IRQ_NOAUTOEN);
 935        cluster->irq = irq;
 936
 937        cluster->l2cache_pmu = l2cache_pmu;
 938        cluster->on_cpu = -1;
 939
 940        err = devm_request_irq(&pdev->dev, irq, l2_cache_handle_irq,
 941                               IRQF_NOBALANCING | IRQF_NO_THREAD,
 942                               "l2-cache-pmu", cluster);
 943        if (err) {
 944                dev_err(&pdev->dev,
 945                        "Unable to request IRQ%d for L2 PMU counters\n", irq);
 946                return err;
 947        }
 948
 949        dev_info(&pdev->dev,
 950                "Registered L2 cache PMU cluster %ld\n", fw_cluster_id);
 951
 952        spin_lock_init(&cluster->pmu_lock);
 953
 954        l2cache_pmu->num_pmus++;
 955
 956        return 0;
 957}
 958
 959static int l2_cache_pmu_probe(struct platform_device *pdev)
 960{
 961        int err;
 962        struct l2cache_pmu *l2cache_pmu;
 963
 964        l2cache_pmu =
 965                devm_kzalloc(&pdev->dev, sizeof(*l2cache_pmu), GFP_KERNEL);
 966        if (!l2cache_pmu)
 967                return -ENOMEM;
 968
 969        INIT_LIST_HEAD(&l2cache_pmu->clusters);
 970
 971        platform_set_drvdata(pdev, l2cache_pmu);
 972        l2cache_pmu->pmu = (struct pmu) {
 973                /* suffix is instance id for future use with multiple sockets */
 974                .name           = "l2cache_0",
 975                .task_ctx_nr    = perf_invalid_context,
 976                .pmu_enable     = l2_cache_pmu_enable,
 977                .pmu_disable    = l2_cache_pmu_disable,
 978                .event_init     = l2_cache_event_init,
 979                .add            = l2_cache_event_add,
 980                .del            = l2_cache_event_del,
 981                .start          = l2_cache_event_start,
 982                .stop           = l2_cache_event_stop,
 983                .read           = l2_cache_event_read,
 984                .attr_groups    = l2_cache_pmu_attr_grps,
 985        };
 986
 987        l2cache_pmu->num_counters = get_num_counters();
 988        l2cache_pmu->pdev = pdev;
 989        l2cache_pmu->pmu_cluster = devm_alloc_percpu(&pdev->dev,
 990                                                     struct cluster_pmu *);
 991        if (!l2cache_pmu->pmu_cluster)
 992                return -ENOMEM;
 993
 994        l2_cycle_ctr_idx = l2cache_pmu->num_counters - 1;
 995        l2_counter_present_mask = GENMASK(l2cache_pmu->num_counters - 2, 0) |
 996                BIT(L2CYCLE_CTR_BIT);
 997
 998        cpumask_clear(&l2cache_pmu->cpumask);
 999
1000        /* Read cluster info and initialize each cluster */
1001        err = device_for_each_child(&pdev->dev, l2cache_pmu,
1002                                    l2_cache_pmu_probe_cluster);
1003        if (err)
1004                return err;
1005
1006        if (l2cache_pmu->num_pmus == 0) {
1007                dev_err(&pdev->dev, "No hardware L2 cache PMUs found\n");
1008                return -ENODEV;
1009        }
1010
1011        err = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
1012                                       &l2cache_pmu->node);
1013        if (err) {
1014                dev_err(&pdev->dev, "Error %d registering hotplug", err);
1015                return err;
1016        }
1017
1018        err = perf_pmu_register(&l2cache_pmu->pmu, l2cache_pmu->pmu.name, -1);
1019        if (err) {
1020                dev_err(&pdev->dev, "Error %d registering L2 cache PMU\n", err);
1021                goto out_unregister;
1022        }
1023
1024        dev_info(&pdev->dev, "Registered L2 cache PMU using %d HW PMUs\n",
1025                 l2cache_pmu->num_pmus);
1026
1027        return err;
1028
1029out_unregister:
1030        cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
1031                                    &l2cache_pmu->node);
1032        return err;
1033}
1034
1035static int l2_cache_pmu_remove(struct platform_device *pdev)
1036{
1037        struct l2cache_pmu *l2cache_pmu =
1038                to_l2cache_pmu(platform_get_drvdata(pdev));
1039
1040        perf_pmu_unregister(&l2cache_pmu->pmu);
1041        cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
1042                                    &l2cache_pmu->node);
1043        return 0;
1044}
1045
1046static struct platform_driver l2_cache_pmu_driver = {
1047        .driver = {
1048                .name = "qcom-l2cache-pmu",
1049                .acpi_match_table = ACPI_PTR(l2_cache_pmu_acpi_match),
1050        },
1051        .probe = l2_cache_pmu_probe,
1052        .remove = l2_cache_pmu_remove,
1053};
1054
1055static int __init register_l2_cache_pmu_driver(void)
1056{
1057        int err;
1058
1059        err = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
1060                                      "AP_PERF_ARM_QCOM_L2_ONLINE",
1061                                      l2cache_pmu_online_cpu,
1062                                      l2cache_pmu_offline_cpu);
1063        if (err)
1064                return err;
1065
1066        return platform_driver_register(&l2_cache_pmu_driver);
1067}
1068device_initcall(register_l2_cache_pmu_driver);
1069