linux/arch/nds32/include/asm/pmu.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/* Copyright (C) 2008-2018 Andes Technology Corporation */
   3
   4#ifndef __ASM_PMU_H
   5#define __ASM_PMU_H
   6
   7#include <linux/interrupt.h>
   8#include <linux/perf_event.h>
   9#include <asm/unistd.h>
  10#include <asm/bitfield.h>
  11
  12/* Has special meaning for perf core implementation */
  13#define HW_OP_UNSUPPORTED               0x0
  14#define C(_x)                           PERF_COUNT_HW_CACHE_##_x
  15#define CACHE_OP_UNSUPPORTED            0x0
  16
  17/* Enough for both software and hardware defined events */
  18#define SOFTWARE_EVENT_MASK             0xFF
  19
  20#define PFM_OFFSET_MAGIC_0              2       /* DO NOT START FROM 0 */
  21#define PFM_OFFSET_MAGIC_1              (PFM_OFFSET_MAGIC_0 + 36)
  22#define PFM_OFFSET_MAGIC_2              (PFM_OFFSET_MAGIC_1 + 36)
  23
  24enum { PFMC0, PFMC1, PFMC2, MAX_COUNTERS };
  25
  26u32 PFM_CTL_OVF[3] = { PFM_CTL_mskOVF0, PFM_CTL_mskOVF1,
  27                       PFM_CTL_mskOVF2 };
  28u32 PFM_CTL_EN[3] = { PFM_CTL_mskEN0, PFM_CTL_mskEN1,
  29                      PFM_CTL_mskEN2 };
  30u32 PFM_CTL_OFFSEL[3] = { PFM_CTL_offSEL0, PFM_CTL_offSEL1,
  31                          PFM_CTL_offSEL2 };
  32u32 PFM_CTL_IE[3] = { PFM_CTL_mskIE0, PFM_CTL_mskIE1, PFM_CTL_mskIE2 };
  33u32 PFM_CTL_KS[3] = { PFM_CTL_mskKS0, PFM_CTL_mskKS1, PFM_CTL_mskKS2 };
  34u32 PFM_CTL_KU[3] = { PFM_CTL_mskKU0, PFM_CTL_mskKU1, PFM_CTL_mskKU2 };
  35u32 PFM_CTL_SEL[3] = { PFM_CTL_mskSEL0, PFM_CTL_mskSEL1, PFM_CTL_mskSEL2 };
  36/*
  37 * Perf Events' indices
  38 */
  39#define NDS32_IDX_CYCLE_COUNTER                 0
  40#define NDS32_IDX_COUNTER0                      1
  41#define NDS32_IDX_COUNTER1                      2
  42
  43/* The events for a given PMU register set. */
  44struct pmu_hw_events {
  45        /*
  46         * The events that are active on the PMU for the given index.
  47         */
  48        struct perf_event *events[MAX_COUNTERS];
  49
  50        /*
  51         * A 1 bit for an index indicates that the counter is being used for
  52         * an event. A 0 means that the counter can be used.
  53         */
  54        unsigned long used_mask[BITS_TO_LONGS(MAX_COUNTERS)];
  55
  56        /*
  57         * Hardware lock to serialize accesses to PMU registers. Needed for the
  58         * read/modify/write sequences.
  59         */
  60        raw_spinlock_t pmu_lock;
  61};
  62
  63struct nds32_pmu {
  64        struct pmu pmu;
  65        cpumask_t active_irqs;
  66        char *name;
  67         irqreturn_t (*handle_irq)(int irq_num, void *dev);
  68        void (*enable)(struct perf_event *event);
  69        void (*disable)(struct perf_event *event);
  70        int (*get_event_idx)(struct pmu_hw_events *hw_events,
  71                             struct perf_event *event);
  72        int (*set_event_filter)(struct hw_perf_event *evt,
  73                                struct perf_event_attr *attr);
  74        u32 (*read_counter)(struct perf_event *event);
  75        void (*write_counter)(struct perf_event *event, u32 val);
  76        void (*start)(struct nds32_pmu *nds32_pmu);
  77        void (*stop)(struct nds32_pmu *nds32_pmu);
  78        void (*reset)(void *data);
  79        int (*request_irq)(struct nds32_pmu *nds32_pmu, irq_handler_t handler);
  80        void (*free_irq)(struct nds32_pmu *nds32_pmu);
  81        int (*map_event)(struct perf_event *event);
  82        int num_events;
  83        atomic_t active_events;
  84        u64 max_period;
  85        struct platform_device *plat_device;
  86        struct pmu_hw_events *(*get_hw_events)(void);
  87};
  88
  89#define to_nds32_pmu(p)                 (container_of(p, struct nds32_pmu, pmu))
  90
  91int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type);
  92
  93u64 nds32_pmu_event_update(struct perf_event *event);
  94
  95int nds32_pmu_event_set_period(struct perf_event *event);
  96
  97/*
  98 * Common NDS32 SPAv3 event types
  99 *
 100 * Note: An implementation may not be able to count all of these events
 101 * but the encodings are considered to be `reserved' in the case that
 102 * they are not available.
 103 *
 104 * SEL_TOTAL_CYCLES will add an offset is due to ZERO is defined as
 105 * NOT_SUPPORTED EVENT mapping in generic perf code.
 106 * You will need to deal it in the event writing implementation.
 107 */
 108enum spav3_counter_0_perf_types {
 109        SPAV3_0_SEL_BASE = -1 + PFM_OFFSET_MAGIC_0,     /* counting symbol */
 110        SPAV3_0_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_0,
 111        SPAV3_0_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_0,
 112        SPAV3_0_SEL_LAST        /* counting symbol */
 113};
 114
 115enum spav3_counter_1_perf_types {
 116        SPAV3_1_SEL_BASE = -1 + PFM_OFFSET_MAGIC_1,     /* counting symbol */
 117        SPAV3_1_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_1,
 118        SPAV3_1_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_1,
 119        SPAV3_1_SEL_CONDITIONAL_BRANCH = 2 + PFM_OFFSET_MAGIC_1,
 120        SPAV3_1_SEL_TAKEN_CONDITIONAL_BRANCH = 3 + PFM_OFFSET_MAGIC_1,
 121        SPAV3_1_SEL_PREFETCH_INSTRUCTION = 4 + PFM_OFFSET_MAGIC_1,
 122        SPAV3_1_SEL_RET_INST = 5 + PFM_OFFSET_MAGIC_1,
 123        SPAV3_1_SEL_JR_INST = 6 + PFM_OFFSET_MAGIC_1,
 124        SPAV3_1_SEL_JAL_JRAL_INST = 7 + PFM_OFFSET_MAGIC_1,
 125        SPAV3_1_SEL_NOP_INST = 8 + PFM_OFFSET_MAGIC_1,
 126        SPAV3_1_SEL_SCW_INST = 9 + PFM_OFFSET_MAGIC_1,
 127        SPAV3_1_SEL_ISB_DSB_INST = 10 + PFM_OFFSET_MAGIC_1,
 128        SPAV3_1_SEL_CCTL_INST = 11 + PFM_OFFSET_MAGIC_1,
 129        SPAV3_1_SEL_TAKEN_INTERRUPTS = 12 + PFM_OFFSET_MAGIC_1,
 130        SPAV3_1_SEL_LOADS_COMPLETED = 13 + PFM_OFFSET_MAGIC_1,
 131        SPAV3_1_SEL_UITLB_ACCESS = 14 + PFM_OFFSET_MAGIC_1,
 132        SPAV3_1_SEL_UDTLB_ACCESS = 15 + PFM_OFFSET_MAGIC_1,
 133        SPAV3_1_SEL_MTLB_ACCESS = 16 + PFM_OFFSET_MAGIC_1,
 134        SPAV3_1_SEL_CODE_CACHE_ACCESS = 17 + PFM_OFFSET_MAGIC_1,
 135        SPAV3_1_SEL_DATA_DEPENDENCY_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_1,
 136        SPAV3_1_SEL_DATA_CACHE_MISS_STALL_CYCLES = 19 + PFM_OFFSET_MAGIC_1,
 137        SPAV3_1_SEL_DATA_CACHE_ACCESS = 20 + PFM_OFFSET_MAGIC_1,
 138        SPAV3_1_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_1,
 139        SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS = 22 + PFM_OFFSET_MAGIC_1,
 140        SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS = 23 + PFM_OFFSET_MAGIC_1,
 141        SPAV3_1_SEL_ILM_ACCESS = 24 + PFM_OFFSET_MAGIC_1,
 142        SPAV3_1_SEL_LSU_BIU_CYCLES = 25 + PFM_OFFSET_MAGIC_1,
 143        SPAV3_1_SEL_HPTWK_BIU_CYCLES = 26 + PFM_OFFSET_MAGIC_1,
 144        SPAV3_1_SEL_DMA_BIU_CYCLES = 27 + PFM_OFFSET_MAGIC_1,
 145        SPAV3_1_SEL_CODE_CACHE_FILL_BIU_CYCLES = 28 + PFM_OFFSET_MAGIC_1,
 146        SPAV3_1_SEL_LEGAL_UNALIGN_DCACHE_ACCESS = 29 + PFM_OFFSET_MAGIC_1,
 147        SPAV3_1_SEL_PUSH25 = 30 + PFM_OFFSET_MAGIC_1,
 148        SPAV3_1_SEL_SYSCALLS_INST = 31 + PFM_OFFSET_MAGIC_1,
 149        SPAV3_1_SEL_LAST        /* counting symbol */
 150};
 151
 152enum spav3_counter_2_perf_types {
 153        SPAV3_2_SEL_BASE = -1 + PFM_OFFSET_MAGIC_2,     /* counting symbol */
 154        SPAV3_2_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_2,
 155        SPAV3_2_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_2,
 156        SPAV3_2_SEL_CONDITIONAL_BRANCH_MISPREDICT = 2 + PFM_OFFSET_MAGIC_2,
 157        SPAV3_2_SEL_TAKEN_CONDITIONAL_BRANCH_MISPREDICT =
 158            3 + PFM_OFFSET_MAGIC_2,
 159        SPAV3_2_SEL_PREFETCH_INSTRUCTION_CACHE_HIT = 4 + PFM_OFFSET_MAGIC_2,
 160        SPAV3_1_SEL_RET_MISPREDICT = 5 + PFM_OFFSET_MAGIC_2,
 161        SPAV3_1_SEL_IMMEDIATE_J_INST = 6 + PFM_OFFSET_MAGIC_2,
 162        SPAV3_1_SEL_MULTIPLY_INST = 7 + PFM_OFFSET_MAGIC_2,
 163        SPAV3_1_SEL_16_BIT_INST = 8 + PFM_OFFSET_MAGIC_2,
 164        SPAV3_1_SEL_FAILED_SCW_INST = 9 + PFM_OFFSET_MAGIC_2,
 165        SPAV3_1_SEL_LD_AFTER_ST_CONFLICT_REPLAYS = 10 + PFM_OFFSET_MAGIC_2,
 166        SPAV3_1_SEL_TAKEN_EXCEPTIONS = 12 + PFM_OFFSET_MAGIC_2,
 167        SPAV3_1_SEL_STORES_COMPLETED = 13 + PFM_OFFSET_MAGIC_2,
 168        SPAV3_2_SEL_UITLB_MISS = 14 + PFM_OFFSET_MAGIC_2,
 169        SPAV3_2_SEL_UDTLB_MISS = 15 + PFM_OFFSET_MAGIC_2,
 170        SPAV3_2_SEL_MTLB_MISS = 16 + PFM_OFFSET_MAGIC_2,
 171        SPAV3_2_SEL_CODE_CACHE_MISS = 17 + PFM_OFFSET_MAGIC_2,
 172        SPAV3_1_SEL_EMPTY_INST_QUEUE_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_2,
 173        SPAV3_1_SEL_DATA_WRITE_BACK = 19 + PFM_OFFSET_MAGIC_2,
 174        SPAV3_2_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_2,
 175        SPAV3_2_SEL_LOAD_DATA_CACHE_MISS = 22 + PFM_OFFSET_MAGIC_2,
 176        SPAV3_2_SEL_STORE_DATA_CACHE_MISS = 23 + PFM_OFFSET_MAGIC_2,
 177        SPAV3_1_SEL_DLM_ACCESS = 24 + PFM_OFFSET_MAGIC_2,
 178        SPAV3_1_SEL_LSU_BIU_REQUEST = 25 + PFM_OFFSET_MAGIC_2,
 179        SPAV3_1_SEL_HPTWK_BIU_REQUEST = 26 + PFM_OFFSET_MAGIC_2,
 180        SPAV3_1_SEL_DMA_BIU_REQUEST = 27 + PFM_OFFSET_MAGIC_2,
 181        SPAV3_1_SEL_CODE_CACHE_FILL_BIU_REQUEST = 28 + PFM_OFFSET_MAGIC_2,
 182        SPAV3_1_SEL_EXTERNAL_EVENTS = 29 + PFM_OFFSET_MAGIC_2,
 183        SPAV3_1_SEL_POP25 = 30 + PFM_OFFSET_MAGIC_2,
 184        SPAV3_2_SEL_LAST        /* counting symbol */
 185};
 186
 187/* Get converted event counter index */
 188static inline int get_converted_event_idx(unsigned long event)
 189{
 190        int idx;
 191
 192        if ((event) > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST) {
 193                idx = 0;
 194        } else if ((event) > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST) {
 195                idx = 1;
 196        } else if ((event) > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST) {
 197                idx = 2;
 198        } else {
 199                pr_err("GET_CONVERTED_EVENT_IDX PFM counter range error\n");
 200                return -EPERM;
 201        }
 202
 203        return idx;
 204}
 205
 206/* Get converted hardware event number */
 207static inline u32 get_converted_evet_hw_num(u32 event)
 208{
 209        if (event > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST)
 210                event -= PFM_OFFSET_MAGIC_0;
 211        else if (event > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST)
 212                event -= PFM_OFFSET_MAGIC_1;
 213        else if (event > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST)
 214                event -= PFM_OFFSET_MAGIC_2;
 215        else if (event != 0)
 216                pr_err("GET_CONVERTED_EVENT_HW_NUM PFM counter range error\n");
 217
 218        return event;
 219}
 220
 221/*
 222 * NDS32 HW events mapping
 223 *
 224 * The hardware events that we support. We do support cache operations but
 225 * we have harvard caches and no way to combine instruction and data
 226 * accesses/misses in hardware.
 227 */
 228static const unsigned int nds32_pfm_perf_map[PERF_COUNT_HW_MAX] = {
 229        [PERF_COUNT_HW_CPU_CYCLES] = SPAV3_0_SEL_TOTAL_CYCLES,
 230        [PERF_COUNT_HW_INSTRUCTIONS] = SPAV3_1_SEL_COMPLETED_INSTRUCTION,
 231        [PERF_COUNT_HW_CACHE_REFERENCES] = SPAV3_1_SEL_DATA_CACHE_ACCESS,
 232        [PERF_COUNT_HW_CACHE_MISSES] = SPAV3_2_SEL_DATA_CACHE_MISS,
 233        [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED,
 234        [PERF_COUNT_HW_BRANCH_MISSES] = HW_OP_UNSUPPORTED,
 235        [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
 236        [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
 237        [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
 238        [PERF_COUNT_HW_REF_CPU_CYCLES] = HW_OP_UNSUPPORTED
 239};
 240
 241static const unsigned int nds32_pfm_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 242        [PERF_COUNT_HW_CACHE_OP_MAX]
 243        [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
 244        [C(L1D)] = {
 245                    [C(OP_READ)] = {
 246                                    [C(RESULT_ACCESS)] =
 247                                    SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS,
 248                                    [C(RESULT_MISS)] =
 249                                    SPAV3_2_SEL_LOAD_DATA_CACHE_MISS,
 250                                    },
 251                    [C(OP_WRITE)] = {
 252                                     [C(RESULT_ACCESS)] =
 253                                     SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS,
 254                                     [C(RESULT_MISS)] =
 255                                     SPAV3_2_SEL_STORE_DATA_CACHE_MISS,
 256                                     },
 257                    [C(OP_PREFETCH)] = {
 258                                        [C(RESULT_ACCESS)] =
 259                                                CACHE_OP_UNSUPPORTED,
 260                                        [C(RESULT_MISS)] =
 261                                                CACHE_OP_UNSUPPORTED,
 262                                        },
 263                    },
 264        [C(L1I)] = {
 265                    [C(OP_READ)] = {
 266                                    [C(RESULT_ACCESS)] =
 267                                    SPAV3_1_SEL_CODE_CACHE_ACCESS,
 268                                    [C(RESULT_MISS)] =
 269                                    SPAV3_2_SEL_CODE_CACHE_MISS,
 270                                    },
 271                    [C(OP_WRITE)] = {
 272                                     [C(RESULT_ACCESS)] =
 273                                     SPAV3_1_SEL_CODE_CACHE_ACCESS,
 274                                     [C(RESULT_MISS)] =
 275                                     SPAV3_2_SEL_CODE_CACHE_MISS,
 276                                     },
 277                    [C(OP_PREFETCH)] = {
 278                                        [C(RESULT_ACCESS)] =
 279                                        CACHE_OP_UNSUPPORTED,
 280                                        [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 281                                        },
 282                    },
 283        /* TODO: L2CC */
 284        [C(LL)] = {
 285                   [C(OP_READ)] = {
 286                                   [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 287                                   [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 288                                   },
 289                   [C(OP_WRITE)] = {
 290                                    [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 291                                    [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 292                                    },
 293                   [C(OP_PREFETCH)] = {
 294                                       [C(RESULT_ACCESS)] =
 295                                       CACHE_OP_UNSUPPORTED,
 296                                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 297                                       },
 298                   },
 299        /* NDS32 PMU does not support TLB read/write hit/miss,
 300         * However, it can count access/miss, which mixed with read and write.
 301         * Therefore, only READ counter will use it.
 302         * We do as possible as we can.
 303         */
 304        [C(DTLB)] = {
 305                     [C(OP_READ)] = {
 306                                     [C(RESULT_ACCESS)] =
 307                                        SPAV3_1_SEL_UDTLB_ACCESS,
 308                                     [C(RESULT_MISS)] =
 309                                        SPAV3_2_SEL_UDTLB_MISS,
 310                                     },
 311                     [C(OP_WRITE)] = {
 312                                      [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 313                                      [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 314                                      },
 315                     [C(OP_PREFETCH)] = {
 316                                         [C(RESULT_ACCESS)] =
 317                                         CACHE_OP_UNSUPPORTED,
 318                                         [C(RESULT_MISS)] =
 319                                         CACHE_OP_UNSUPPORTED,
 320                                         },
 321                     },
 322        [C(ITLB)] = {
 323                     [C(OP_READ)] = {
 324                                     [C(RESULT_ACCESS)] =
 325                                        SPAV3_1_SEL_UITLB_ACCESS,
 326                                     [C(RESULT_MISS)] =
 327                                        SPAV3_2_SEL_UITLB_MISS,
 328                                     },
 329                     [C(OP_WRITE)] = {
 330                                      [C(RESULT_ACCESS)] =
 331                                        CACHE_OP_UNSUPPORTED,
 332                                      [C(RESULT_MISS)] =
 333                                        CACHE_OP_UNSUPPORTED,
 334                                      },
 335                     [C(OP_PREFETCH)] = {
 336                                         [C(RESULT_ACCESS)] =
 337                                                CACHE_OP_UNSUPPORTED,
 338                                         [C(RESULT_MISS)] =
 339                                                CACHE_OP_UNSUPPORTED,
 340                                         },
 341                     },
 342        [C(BPU)] = {            /* What is BPU? */
 343                    [C(OP_READ)] = {
 344                                    [C(RESULT_ACCESS)] =
 345                                        CACHE_OP_UNSUPPORTED,
 346                                    [C(RESULT_MISS)] =
 347                                        CACHE_OP_UNSUPPORTED,
 348                                    },
 349                    [C(OP_WRITE)] = {
 350                                     [C(RESULT_ACCESS)] =
 351                                        CACHE_OP_UNSUPPORTED,
 352                                     [C(RESULT_MISS)] =
 353                                        CACHE_OP_UNSUPPORTED,
 354                                     },
 355                    [C(OP_PREFETCH)] = {
 356                                        [C(RESULT_ACCESS)] =
 357                                                CACHE_OP_UNSUPPORTED,
 358                                        [C(RESULT_MISS)] =
 359                                                CACHE_OP_UNSUPPORTED,
 360                                        },
 361                    },
 362        [C(NODE)] = {           /* What is NODE? */
 363                     [C(OP_READ)] = {
 364                                     [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 365                                     [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 366                                     },
 367                     [C(OP_WRITE)] = {
 368                                      [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
 369                                      [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
 370                                      },
 371                     [C(OP_PREFETCH)] = {
 372                                         [C(RESULT_ACCESS)] =
 373                                                CACHE_OP_UNSUPPORTED,
 374                                         [C(RESULT_MISS)] =
 375                                                CACHE_OP_UNSUPPORTED,
 376                                         },
 377                     },
 378};
 379
 380int nds32_pmu_map_event(struct perf_event *event,
 381                        const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
 382                        const unsigned int (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
 383                        [PERF_COUNT_HW_CACHE_OP_MAX]
 384                        [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask);
 385
 386#endif /* __ASM_PMU_H */
 387