linux/arch/sh/kernel/cpu/sh4/perf_event.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Performance events support for SH7750-style performance counters
   4 *
   5 *  Copyright (C) 2009  Paul Mundt
   6 */
   7#include <linux/kernel.h>
   8#include <linux/init.h>
   9#include <linux/io.h>
  10#include <linux/irq.h>
  11#include <linux/perf_event.h>
  12#include <asm/processor.h>
  13
  14#define PM_CR_BASE      0xff000084      /* 16-bit */
  15#define PM_CTR_BASE     0xff100004      /* 32-bit */
  16
  17#define PMCR(n)         (PM_CR_BASE + ((n) * 0x04))
  18#define PMCTRH(n)       (PM_CTR_BASE + 0x00 + ((n) * 0x08))
  19#define PMCTRL(n)       (PM_CTR_BASE + 0x04 + ((n) * 0x08))
  20
  21#define PMCR_PMM_MASK   0x0000003f
  22
  23#define PMCR_CLKF       0x00000100
  24#define PMCR_PMCLR      0x00002000
  25#define PMCR_PMST       0x00004000
  26#define PMCR_PMEN       0x00008000
  27
  28static struct sh_pmu sh7750_pmu;
  29
  30/*
  31 * There are a number of events supported by each counter (33 in total).
  32 * Since we have 2 counters, each counter will take the event code as it
  33 * corresponds to the PMCR PMM setting. Each counter can be configured
  34 * independently.
  35 *
  36 *      Event Code      Description
  37 *      ----------      -----------
  38 *
  39 *      0x01            Operand read access
  40 *      0x02            Operand write access
  41 *      0x03            UTLB miss
  42 *      0x04            Operand cache read miss
  43 *      0x05            Operand cache write miss
  44 *      0x06            Instruction fetch (w/ cache)
  45 *      0x07            Instruction TLB miss
  46 *      0x08            Instruction cache miss
  47 *      0x09            All operand accesses
  48 *      0x0a            All instruction accesses
  49 *      0x0b            OC RAM operand access
  50 *      0x0d            On-chip I/O space access
  51 *      0x0e            Operand access (r/w)
  52 *      0x0f            Operand cache miss (r/w)
  53 *      0x10            Branch instruction
  54 *      0x11            Branch taken
  55 *      0x12            BSR/BSRF/JSR
  56 *      0x13            Instruction execution
  57 *      0x14            Instruction execution in parallel
  58 *      0x15            FPU Instruction execution
  59 *      0x16            Interrupt
  60 *      0x17            NMI
  61 *      0x18            trapa instruction execution
  62 *      0x19            UBCA match
  63 *      0x1a            UBCB match
  64 *      0x21            Instruction cache fill
  65 *      0x22            Operand cache fill
  66 *      0x23            Elapsed time
  67 *      0x24            Pipeline freeze by I-cache miss
  68 *      0x25            Pipeline freeze by D-cache miss
  69 *      0x27            Pipeline freeze by branch instruction
  70 *      0x28            Pipeline freeze by CPU register
  71 *      0x29            Pipeline freeze by FPU
  72 */
  73
  74static const int sh7750_general_events[] = {
  75        [PERF_COUNT_HW_CPU_CYCLES]              = 0x0023,
  76        [PERF_COUNT_HW_INSTRUCTIONS]            = 0x000a,
  77        [PERF_COUNT_HW_CACHE_REFERENCES]        = 0x0006,       /* I-cache */
  78        [PERF_COUNT_HW_CACHE_MISSES]            = 0x0008,       /* I-cache */
  79        [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = 0x0010,
  80        [PERF_COUNT_HW_BRANCH_MISSES]           = -1,
  81        [PERF_COUNT_HW_BUS_CYCLES]              = -1,
  82};
  83
  84#define C(x)    PERF_COUNT_HW_CACHE_##x
  85
  86static const int sh7750_cache_events
  87                        [PERF_COUNT_HW_CACHE_MAX]
  88                        [PERF_COUNT_HW_CACHE_OP_MAX]
  89                        [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  90{
  91        [ C(L1D) ] = {
  92                [ C(OP_READ) ] = {
  93                        [ C(RESULT_ACCESS) ] = 0x0001,
  94                        [ C(RESULT_MISS)   ] = 0x0004,
  95                },
  96                [ C(OP_WRITE) ] = {
  97                        [ C(RESULT_ACCESS) ] = 0x0002,
  98                        [ C(RESULT_MISS)   ] = 0x0005,
  99                },
 100                [ C(OP_PREFETCH) ] = {
 101                        [ C(RESULT_ACCESS) ] = 0,
 102                        [ C(RESULT_MISS)   ] = 0,
 103                },
 104        },
 105
 106        [ C(L1I) ] = {
 107                [ C(OP_READ) ] = {
 108                        [ C(RESULT_ACCESS) ] = 0x0006,
 109                        [ C(RESULT_MISS)   ] = 0x0008,
 110                },
 111                [ C(OP_WRITE) ] = {
 112                        [ C(RESULT_ACCESS) ] = -1,
 113                        [ C(RESULT_MISS)   ] = -1,
 114                },
 115                [ C(OP_PREFETCH) ] = {
 116                        [ C(RESULT_ACCESS) ] = 0,
 117                        [ C(RESULT_MISS)   ] = 0,
 118                },
 119        },
 120
 121        [ C(LL) ] = {
 122                [ C(OP_READ) ] = {
 123                        [ C(RESULT_ACCESS) ] = 0,
 124                        [ C(RESULT_MISS)   ] = 0,
 125                },
 126                [ C(OP_WRITE) ] = {
 127                        [ C(RESULT_ACCESS) ] = 0,
 128                        [ C(RESULT_MISS)   ] = 0,
 129                },
 130                [ C(OP_PREFETCH) ] = {
 131                        [ C(RESULT_ACCESS) ] = 0,
 132                        [ C(RESULT_MISS)   ] = 0,
 133                },
 134        },
 135
 136        [ C(DTLB) ] = {
 137                [ C(OP_READ) ] = {
 138                        [ C(RESULT_ACCESS) ] = 0,
 139                        [ C(RESULT_MISS)   ] = 0x0003,
 140                },
 141                [ C(OP_WRITE) ] = {
 142                        [ C(RESULT_ACCESS) ] = 0,
 143                        [ C(RESULT_MISS)   ] = 0,
 144                },
 145                [ C(OP_PREFETCH) ] = {
 146                        [ C(RESULT_ACCESS) ] = 0,
 147                        [ C(RESULT_MISS)   ] = 0,
 148                },
 149        },
 150
 151        [ C(ITLB) ] = {
 152                [ C(OP_READ) ] = {
 153                        [ C(RESULT_ACCESS) ] = 0,
 154                        [ C(RESULT_MISS)   ] = 0x0007,
 155                },
 156                [ C(OP_WRITE) ] = {
 157                        [ C(RESULT_ACCESS) ] = -1,
 158                        [ C(RESULT_MISS)   ] = -1,
 159                },
 160                [ C(OP_PREFETCH) ] = {
 161                        [ C(RESULT_ACCESS) ] = -1,
 162                        [ C(RESULT_MISS)   ] = -1,
 163                },
 164        },
 165
 166        [ C(BPU) ] = {
 167                [ C(OP_READ) ] = {
 168                        [ C(RESULT_ACCESS) ] = -1,
 169                        [ C(RESULT_MISS)   ] = -1,
 170                },
 171                [ C(OP_WRITE) ] = {
 172                        [ C(RESULT_ACCESS) ] = -1,
 173                        [ C(RESULT_MISS)   ] = -1,
 174                },
 175                [ C(OP_PREFETCH) ] = {
 176                        [ C(RESULT_ACCESS) ] = -1,
 177                        [ C(RESULT_MISS)   ] = -1,
 178                },
 179        },
 180
 181        [ C(NODE) ] = {
 182                [ C(OP_READ) ] = {
 183                        [ C(RESULT_ACCESS) ] = -1,
 184                        [ C(RESULT_MISS)   ] = -1,
 185                },
 186                [ C(OP_WRITE) ] = {
 187                        [ C(RESULT_ACCESS) ] = -1,
 188                        [ C(RESULT_MISS)   ] = -1,
 189                },
 190                [ C(OP_PREFETCH) ] = {
 191                        [ C(RESULT_ACCESS) ] = -1,
 192                        [ C(RESULT_MISS)   ] = -1,
 193                },
 194        },
 195};
 196
 197static int sh7750_event_map(int event)
 198{
 199        return sh7750_general_events[event];
 200}
 201
 202static u64 sh7750_pmu_read(int idx)
 203{
 204        return (u64)((u64)(__raw_readl(PMCTRH(idx)) & 0xffff) << 32) |
 205                           __raw_readl(PMCTRL(idx));
 206}
 207
 208static void sh7750_pmu_disable(struct hw_perf_event *hwc, int idx)
 209{
 210        unsigned int tmp;
 211
 212        tmp = __raw_readw(PMCR(idx));
 213        tmp &= ~(PMCR_PMM_MASK | PMCR_PMEN);
 214        __raw_writew(tmp, PMCR(idx));
 215}
 216
 217static void sh7750_pmu_enable(struct hw_perf_event *hwc, int idx)
 218{
 219        __raw_writew(__raw_readw(PMCR(idx)) | PMCR_PMCLR, PMCR(idx));
 220        __raw_writew(hwc->config | PMCR_PMEN | PMCR_PMST, PMCR(idx));
 221}
 222
 223static void sh7750_pmu_disable_all(void)
 224{
 225        int i;
 226
 227        for (i = 0; i < sh7750_pmu.num_events; i++)
 228                __raw_writew(__raw_readw(PMCR(i)) & ~PMCR_PMEN, PMCR(i));
 229}
 230
 231static void sh7750_pmu_enable_all(void)
 232{
 233        int i;
 234
 235        for (i = 0; i < sh7750_pmu.num_events; i++)
 236                __raw_writew(__raw_readw(PMCR(i)) | PMCR_PMEN, PMCR(i));
 237}
 238
 239static struct sh_pmu sh7750_pmu = {
 240        .name           = "sh7750",
 241        .num_events     = 2,
 242        .event_map      = sh7750_event_map,
 243        .max_events     = ARRAY_SIZE(sh7750_general_events),
 244        .raw_event_mask = PMCR_PMM_MASK,
 245        .cache_events   = &sh7750_cache_events,
 246        .read           = sh7750_pmu_read,
 247        .disable        = sh7750_pmu_disable,
 248        .enable         = sh7750_pmu_enable,
 249        .disable_all    = sh7750_pmu_disable_all,
 250        .enable_all     = sh7750_pmu_enable_all,
 251};
 252
 253static int __init sh7750_pmu_init(void)
 254{
 255        /*
 256         * Make sure this CPU actually has perf counters.
 257         */
 258        if (!(boot_cpu_data.flags & CPU_HAS_PERF_COUNTER)) {
 259                pr_notice("HW perf events unsupported, software events only.\n");
 260                return -ENODEV;
 261        }
 262
 263        return register_sh_pmu(&sh7750_pmu);
 264}
 265early_initcall(sh7750_pmu_init);
 266