linux/arch/sparc/kernel/perf_event.c
<<
>>
Prefs
   1/* Performance event support for sparc64.
   2 *
   3 * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
   4 *
   5 * This code is based almost entirely upon the x86 perf event
   6 * code, which is:
   7 *
   8 *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
   9 *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
  10 *  Copyright (C) 2009 Jaswinder Singh Rajput
  11 *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
  12 *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  13 */
  14
  15#include <linux/perf_event.h>
  16#include <linux/kprobes.h>
  17#include <linux/kernel.h>
  18#include <linux/kdebug.h>
  19#include <linux/mutex.h>
  20
  21#include <asm/cpudata.h>
  22#include <asm/atomic.h>
  23#include <asm/nmi.h>
  24#include <asm/pcr.h>
  25
  26/* Sparc64 chips have two performance counters, 32-bits each, with
  27 * overflow interrupts generated on transition from 0xffffffff to 0.
  28 * The counters are accessed in one go using a 64-bit register.
  29 *
  30 * Both counters are controlled using a single control register.  The
  31 * only way to stop all sampling is to clear all of the context (user,
  32 * supervisor, hypervisor) sampling enable bits.  But these bits apply
  33 * to both counters, thus the two counters can't be enabled/disabled
  34 * individually.
  35 *
  36 * The control register has two event fields, one for each of the two
  37 * counters.  It's thus nearly impossible to have one counter going
  38 * while keeping the other one stopped.  Therefore it is possible to
  39 * get overflow interrupts for counters not currently "in use" and
  40 * that condition must be checked in the overflow interrupt handler.
  41 *
  42 * So we use a hack, in that we program inactive counters with the
  43 * "sw_count0" and "sw_count1" events.  These count how many times
  44 * the instruction "sethi %hi(0xfc000), %g0" is executed.  It's an
  45 * unusual way to encode a NOP and therefore will not trigger in
  46 * normal code.
  47 */
  48
  49#define MAX_HWEVENTS                    2
  50#define MAX_PERIOD                      ((1UL << 32) - 1)
  51
  52#define PIC_UPPER_INDEX                 0
  53#define PIC_LOWER_INDEX                 1
  54
  55struct cpu_hw_events {
  56        struct perf_event       *events[MAX_HWEVENTS];
  57        unsigned long           used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
  58        unsigned long           active_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
  59        u64                     pcr;
  60        int                     enabled;
  61};
  62DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
  63
  64struct perf_event_map {
  65        u16     encoding;
  66        u8      pic_mask;
  67#define PIC_NONE        0x00
  68#define PIC_UPPER       0x01
  69#define PIC_LOWER       0x02
  70};
  71
  72static unsigned long perf_event_encode(const struct perf_event_map *pmap)
  73{
  74        return ((unsigned long) pmap->encoding << 16) | pmap->pic_mask;
  75}
  76
  77static void perf_event_decode(unsigned long val, u16 *enc, u8 *msk)
  78{
  79        *msk = val & 0xff;
  80        *enc = val >> 16;
  81}
  82
  83#define C(x) PERF_COUNT_HW_CACHE_##x
  84
  85#define CACHE_OP_UNSUPPORTED    0xfffe
  86#define CACHE_OP_NONSENSE       0xffff
  87
  88typedef struct perf_event_map cache_map_t
  89                                [PERF_COUNT_HW_CACHE_MAX]
  90                                [PERF_COUNT_HW_CACHE_OP_MAX]
  91                                [PERF_COUNT_HW_CACHE_RESULT_MAX];
  92
  93struct sparc_pmu {
  94        const struct perf_event_map     *(*event_map)(int);
  95        const cache_map_t               *cache_map;
  96        int                             max_events;
  97        int                             upper_shift;
  98        int                             lower_shift;
  99        int                             event_mask;
 100        int                             hv_bit;
 101        int                             irq_bit;
 102        int                             upper_nop;
 103        int                             lower_nop;
 104};
 105
 106static const struct perf_event_map ultra3_perfmon_event_map[] = {
 107        [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
 108        [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
 109        [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER },
 110        [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER },
 111};
 112
 113static const struct perf_event_map *ultra3_event_map(int event_id)
 114{
 115        return &ultra3_perfmon_event_map[event_id];
 116}
 117
 118static const cache_map_t ultra3_cache_map = {
 119[C(L1D)] = {
 120        [C(OP_READ)] = {
 121                [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, },
 122                [C(RESULT_MISS)] = { 0x09, PIC_UPPER, },
 123        },
 124        [C(OP_WRITE)] = {
 125                [C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER },
 126                [C(RESULT_MISS)] = { 0x0a, PIC_UPPER },
 127        },
 128        [C(OP_PREFETCH)] = {
 129                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 130                [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
 131        },
 132},
 133[C(L1I)] = {
 134        [C(OP_READ)] = {
 135                [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, },
 136                [C(RESULT_MISS)] = { 0x09, PIC_UPPER, },
 137        },
 138        [ C(OP_WRITE) ] = {
 139                [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
 140                [ C(RESULT_MISS)   ] = { CACHE_OP_NONSENSE },
 141        },
 142        [ C(OP_PREFETCH) ] = {
 143                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 144                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 145        },
 146},
 147[C(LL)] = {
 148        [C(OP_READ)] = {
 149                [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER, },
 150                [C(RESULT_MISS)] = { 0x0c, PIC_UPPER, },
 151        },
 152        [C(OP_WRITE)] = {
 153                [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER },
 154                [C(RESULT_MISS)] = { 0x0c, PIC_UPPER },
 155        },
 156        [C(OP_PREFETCH)] = {
 157                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 158                [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
 159        },
 160},
 161[C(DTLB)] = {
 162        [C(OP_READ)] = {
 163                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 164                [C(RESULT_MISS)] = { 0x12, PIC_UPPER, },
 165        },
 166        [ C(OP_WRITE) ] = {
 167                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 168                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 169        },
 170        [ C(OP_PREFETCH) ] = {
 171                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 172                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 173        },
 174},
 175[C(ITLB)] = {
 176        [C(OP_READ)] = {
 177                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 178                [C(RESULT_MISS)] = { 0x11, PIC_UPPER, },
 179        },
 180        [ C(OP_WRITE) ] = {
 181                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 182                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 183        },
 184        [ C(OP_PREFETCH) ] = {
 185                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 186                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 187        },
 188},
 189[C(BPU)] = {
 190        [C(OP_READ)] = {
 191                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 192                [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
 193        },
 194        [ C(OP_WRITE) ] = {
 195                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 196                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 197        },
 198        [ C(OP_PREFETCH) ] = {
 199                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 200                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 201        },
 202},
 203};
 204
 205static const struct sparc_pmu ultra3_pmu = {
 206        .event_map      = ultra3_event_map,
 207        .cache_map      = &ultra3_cache_map,
 208        .max_events     = ARRAY_SIZE(ultra3_perfmon_event_map),
 209        .upper_shift    = 11,
 210        .lower_shift    = 4,
 211        .event_mask     = 0x3f,
 212        .upper_nop      = 0x1c,
 213        .lower_nop      = 0x14,
 214};
 215
 216/* Niagara1 is very limited.  The upper PIC is hard-locked to count
 217 * only instructions, so it is free running which creates all kinds of
 218 * problems.  Some hardware designs make one wonder if the creator
 219 * even looked at how this stuff gets used by software.
 220 */
 221static const struct perf_event_map niagara1_perfmon_event_map[] = {
 222        [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, PIC_UPPER },
 223        [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, PIC_UPPER },
 224        [PERF_COUNT_HW_CACHE_REFERENCES] = { 0, PIC_NONE },
 225        [PERF_COUNT_HW_CACHE_MISSES] = { 0x03, PIC_LOWER },
 226};
 227
 228static const struct perf_event_map *niagara1_event_map(int event_id)
 229{
 230        return &niagara1_perfmon_event_map[event_id];
 231}
 232
 233static const cache_map_t niagara1_cache_map = {
 234[C(L1D)] = {
 235        [C(OP_READ)] = {
 236                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 237                [C(RESULT_MISS)] = { 0x03, PIC_LOWER, },
 238        },
 239        [C(OP_WRITE)] = {
 240                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 241                [C(RESULT_MISS)] = { 0x03, PIC_LOWER, },
 242        },
 243        [C(OP_PREFETCH)] = {
 244                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 245                [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
 246        },
 247},
 248[C(L1I)] = {
 249        [C(OP_READ)] = {
 250                [C(RESULT_ACCESS)] = { 0x00, PIC_UPPER },
 251                [C(RESULT_MISS)] = { 0x02, PIC_LOWER, },
 252        },
 253        [ C(OP_WRITE) ] = {
 254                [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
 255                [ C(RESULT_MISS)   ] = { CACHE_OP_NONSENSE },
 256        },
 257        [ C(OP_PREFETCH) ] = {
 258                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 259                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 260        },
 261},
 262[C(LL)] = {
 263        [C(OP_READ)] = {
 264                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 265                [C(RESULT_MISS)] = { 0x07, PIC_LOWER, },
 266        },
 267        [C(OP_WRITE)] = {
 268                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 269                [C(RESULT_MISS)] = { 0x07, PIC_LOWER, },
 270        },
 271        [C(OP_PREFETCH)] = {
 272                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 273                [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
 274        },
 275},
 276[C(DTLB)] = {
 277        [C(OP_READ)] = {
 278                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 279                [C(RESULT_MISS)] = { 0x05, PIC_LOWER, },
 280        },
 281        [ C(OP_WRITE) ] = {
 282                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 283                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 284        },
 285        [ C(OP_PREFETCH) ] = {
 286                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 287                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 288        },
 289},
 290[C(ITLB)] = {
 291        [C(OP_READ)] = {
 292                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 293                [C(RESULT_MISS)] = { 0x04, PIC_LOWER, },
 294        },
 295        [ C(OP_WRITE) ] = {
 296                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 297                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 298        },
 299        [ C(OP_PREFETCH) ] = {
 300                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 301                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 302        },
 303},
 304[C(BPU)] = {
 305        [C(OP_READ)] = {
 306                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 307                [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
 308        },
 309        [ C(OP_WRITE) ] = {
 310                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 311                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 312        },
 313        [ C(OP_PREFETCH) ] = {
 314                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 315                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 316        },
 317},
 318};
 319
 320static const struct sparc_pmu niagara1_pmu = {
 321        .event_map      = niagara1_event_map,
 322        .cache_map      = &niagara1_cache_map,
 323        .max_events     = ARRAY_SIZE(niagara1_perfmon_event_map),
 324        .upper_shift    = 0,
 325        .lower_shift    = 4,
 326        .event_mask     = 0x7,
 327        .upper_nop      = 0x0,
 328        .lower_nop      = 0x0,
 329};
 330
 331static const struct perf_event_map niagara2_perfmon_event_map[] = {
 332        [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER },
 333        [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER },
 334        [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER },
 335        [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER },
 336        [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER },
 337        [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER },
 338};
 339
 340static const struct perf_event_map *niagara2_event_map(int event_id)
 341{
 342        return &niagara2_perfmon_event_map[event_id];
 343}
 344
 345static const cache_map_t niagara2_cache_map = {
 346[C(L1D)] = {
 347        [C(OP_READ)] = {
 348                [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, },
 349                [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, },
 350        },
 351        [C(OP_WRITE)] = {
 352                [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, },
 353                [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, },
 354        },
 355        [C(OP_PREFETCH)] = {
 356                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 357                [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
 358        },
 359},
 360[C(L1I)] = {
 361        [C(OP_READ)] = {
 362                [C(RESULT_ACCESS)] = { 0x02ff, PIC_UPPER | PIC_LOWER, },
 363                [C(RESULT_MISS)] = { 0x0301, PIC_UPPER | PIC_LOWER, },
 364        },
 365        [ C(OP_WRITE) ] = {
 366                [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
 367                [ C(RESULT_MISS)   ] = { CACHE_OP_NONSENSE },
 368        },
 369        [ C(OP_PREFETCH) ] = {
 370                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 371                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 372        },
 373},
 374[C(LL)] = {
 375        [C(OP_READ)] = {
 376                [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, },
 377                [C(RESULT_MISS)] = { 0x0330, PIC_UPPER | PIC_LOWER, },
 378        },
 379        [C(OP_WRITE)] = {
 380                [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, },
 381                [C(RESULT_MISS)] = { 0x0320, PIC_UPPER | PIC_LOWER, },
 382        },
 383        [C(OP_PREFETCH)] = {
 384                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 385                [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
 386        },
 387},
 388[C(DTLB)] = {
 389        [C(OP_READ)] = {
 390                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 391                [C(RESULT_MISS)] = { 0x0b08, PIC_UPPER | PIC_LOWER, },
 392        },
 393        [ C(OP_WRITE) ] = {
 394                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 395                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 396        },
 397        [ C(OP_PREFETCH) ] = {
 398                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 399                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 400        },
 401},
 402[C(ITLB)] = {
 403        [C(OP_READ)] = {
 404                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 405                [C(RESULT_MISS)] = { 0xb04, PIC_UPPER | PIC_LOWER, },
 406        },
 407        [ C(OP_WRITE) ] = {
 408                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 409                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 410        },
 411        [ C(OP_PREFETCH) ] = {
 412                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 413                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 414        },
 415},
 416[C(BPU)] = {
 417        [C(OP_READ)] = {
 418                [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
 419                [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
 420        },
 421        [ C(OP_WRITE) ] = {
 422                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 423                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 424        },
 425        [ C(OP_PREFETCH) ] = {
 426                [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
 427                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 428        },
 429},
 430};
 431
 432static const struct sparc_pmu niagara2_pmu = {
 433        .event_map      = niagara2_event_map,
 434        .cache_map      = &niagara2_cache_map,
 435        .max_events     = ARRAY_SIZE(niagara2_perfmon_event_map),
 436        .upper_shift    = 19,
 437        .lower_shift    = 6,
 438        .event_mask     = 0xfff,
 439        .hv_bit         = 0x8,
 440        .irq_bit        = 0x30,
 441        .upper_nop      = 0x220,
 442        .lower_nop      = 0x220,
 443};
 444
 445static const struct sparc_pmu *sparc_pmu __read_mostly;
 446
 447static u64 event_encoding(u64 event_id, int idx)
 448{
 449        if (idx == PIC_UPPER_INDEX)
 450                event_id <<= sparc_pmu->upper_shift;
 451        else
 452                event_id <<= sparc_pmu->lower_shift;
 453        return event_id;
 454}
 455
 456static u64 mask_for_index(int idx)
 457{
 458        return event_encoding(sparc_pmu->event_mask, idx);
 459}
 460
 461static u64 nop_for_index(int idx)
 462{
 463        return event_encoding(idx == PIC_UPPER_INDEX ?
 464                              sparc_pmu->upper_nop :
 465                              sparc_pmu->lower_nop, idx);
 466}
 467
 468static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
 469{
 470        u64 val, mask = mask_for_index(idx);
 471
 472        val = cpuc->pcr;
 473        val &= ~mask;
 474        val |= hwc->config;
 475        cpuc->pcr = val;
 476
 477        pcr_ops->write(cpuc->pcr);
 478}
 479
 480static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
 481{
 482        u64 mask = mask_for_index(idx);
 483        u64 nop = nop_for_index(idx);
 484        u64 val;
 485
 486        val = cpuc->pcr;
 487        val &= ~mask;
 488        val |= nop;
 489        cpuc->pcr = val;
 490
 491        pcr_ops->write(cpuc->pcr);
 492}
 493
 494void hw_perf_enable(void)
 495{
 496        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 497        u64 val;
 498        int i;
 499
 500        if (cpuc->enabled)
 501                return;
 502
 503        cpuc->enabled = 1;
 504        barrier();
 505
 506        val = cpuc->pcr;
 507
 508        for (i = 0; i < MAX_HWEVENTS; i++) {
 509                struct perf_event *cp = cpuc->events[i];
 510                struct hw_perf_event *hwc;
 511
 512                if (!cp)
 513                        continue;
 514                hwc = &cp->hw;
 515                val |= hwc->config_base;
 516        }
 517
 518        cpuc->pcr = val;
 519
 520        pcr_ops->write(cpuc->pcr);
 521}
 522
 523void hw_perf_disable(void)
 524{
 525        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 526        u64 val;
 527
 528        if (!cpuc->enabled)
 529                return;
 530
 531        cpuc->enabled = 0;
 532
 533        val = cpuc->pcr;
 534        val &= ~(PCR_UTRACE | PCR_STRACE |
 535                 sparc_pmu->hv_bit | sparc_pmu->irq_bit);
 536        cpuc->pcr = val;
 537
 538        pcr_ops->write(cpuc->pcr);
 539}
 540
 541static u32 read_pmc(int idx)
 542{
 543        u64 val;
 544
 545        read_pic(val);
 546        if (idx == PIC_UPPER_INDEX)
 547                val >>= 32;
 548
 549        return val & 0xffffffff;
 550}
 551
 552static void write_pmc(int idx, u64 val)
 553{
 554        u64 shift, mask, pic;
 555
 556        shift = 0;
 557        if (idx == PIC_UPPER_INDEX)
 558                shift = 32;
 559
 560        mask = ((u64) 0xffffffff) << shift;
 561        val <<= shift;
 562
 563        read_pic(pic);
 564        pic &= ~mask;
 565        pic |= val;
 566        write_pic(pic);
 567}
 568
 569static int sparc_perf_event_set_period(struct perf_event *event,
 570                                       struct hw_perf_event *hwc, int idx)
 571{
 572        s64 left = atomic64_read(&hwc->period_left);
 573        s64 period = hwc->sample_period;
 574        int ret = 0;
 575
 576        if (unlikely(left <= -period)) {
 577                left = period;
 578                atomic64_set(&hwc->period_left, left);
 579                hwc->last_period = period;
 580                ret = 1;
 581        }
 582
 583        if (unlikely(left <= 0)) {
 584                left += period;
 585                atomic64_set(&hwc->period_left, left);
 586                hwc->last_period = period;
 587                ret = 1;
 588        }
 589        if (left > MAX_PERIOD)
 590                left = MAX_PERIOD;
 591
 592        atomic64_set(&hwc->prev_count, (u64)-left);
 593
 594        write_pmc(idx, (u64)(-left) & 0xffffffff);
 595
 596        perf_event_update_userpage(event);
 597
 598        return ret;
 599}
 600
 601static int sparc_pmu_enable(struct perf_event *event)
 602{
 603        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 604        struct hw_perf_event *hwc = &event->hw;
 605        int idx = hwc->idx;
 606
 607        if (test_and_set_bit(idx, cpuc->used_mask))
 608                return -EAGAIN;
 609
 610        sparc_pmu_disable_event(cpuc, hwc, idx);
 611
 612        cpuc->events[idx] = event;
 613        set_bit(idx, cpuc->active_mask);
 614
 615        sparc_perf_event_set_period(event, hwc, idx);
 616        sparc_pmu_enable_event(cpuc, hwc, idx);
 617        perf_event_update_userpage(event);
 618        return 0;
 619}
 620
 621static u64 sparc_perf_event_update(struct perf_event *event,
 622                                   struct hw_perf_event *hwc, int idx)
 623{
 624        int shift = 64 - 32;
 625        u64 prev_raw_count, new_raw_count;
 626        s64 delta;
 627
 628again:
 629        prev_raw_count = atomic64_read(&hwc->prev_count);
 630        new_raw_count = read_pmc(idx);
 631
 632        if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
 633                             new_raw_count) != prev_raw_count)
 634                goto again;
 635
 636        delta = (new_raw_count << shift) - (prev_raw_count << shift);
 637        delta >>= shift;
 638
 639        atomic64_add(delta, &event->count);
 640        atomic64_sub(delta, &hwc->period_left);
 641
 642        return new_raw_count;
 643}
 644
 645static void sparc_pmu_disable(struct perf_event *event)
 646{
 647        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 648        struct hw_perf_event *hwc = &event->hw;
 649        int idx = hwc->idx;
 650
 651        clear_bit(idx, cpuc->active_mask);
 652        sparc_pmu_disable_event(cpuc, hwc, idx);
 653
 654        barrier();
 655
 656        sparc_perf_event_update(event, hwc, idx);
 657        cpuc->events[idx] = NULL;
 658        clear_bit(idx, cpuc->used_mask);
 659
 660        perf_event_update_userpage(event);
 661}
 662
 663static void sparc_pmu_read(struct perf_event *event)
 664{
 665        struct hw_perf_event *hwc = &event->hw;
 666
 667        sparc_perf_event_update(event, hwc, hwc->idx);
 668}
 669
 670static void sparc_pmu_unthrottle(struct perf_event *event)
 671{
 672        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 673        struct hw_perf_event *hwc = &event->hw;
 674
 675        sparc_pmu_enable_event(cpuc, hwc, hwc->idx);
 676}
 677
 678static atomic_t active_events = ATOMIC_INIT(0);
 679static DEFINE_MUTEX(pmc_grab_mutex);
 680
 681static void perf_stop_nmi_watchdog(void *unused)
 682{
 683        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 684
 685        stop_nmi_watchdog(NULL);
 686        cpuc->pcr = pcr_ops->read();
 687}
 688
 689void perf_event_grab_pmc(void)
 690{
 691        if (atomic_inc_not_zero(&active_events))
 692                return;
 693
 694        mutex_lock(&pmc_grab_mutex);
 695        if (atomic_read(&active_events) == 0) {
 696                if (atomic_read(&nmi_active) > 0) {
 697                        on_each_cpu(perf_stop_nmi_watchdog, NULL, 1);
 698                        BUG_ON(atomic_read(&nmi_active) != 0);
 699                }
 700                atomic_inc(&active_events);
 701        }
 702        mutex_unlock(&pmc_grab_mutex);
 703}
 704
 705void perf_event_release_pmc(void)
 706{
 707        if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) {
 708                if (atomic_read(&nmi_active) == 0)
 709                        on_each_cpu(start_nmi_watchdog, NULL, 1);
 710                mutex_unlock(&pmc_grab_mutex);
 711        }
 712}
 713
 714static const struct perf_event_map *sparc_map_cache_event(u64 config)
 715{
 716        unsigned int cache_type, cache_op, cache_result;
 717        const struct perf_event_map *pmap;
 718
 719        if (!sparc_pmu->cache_map)
 720                return ERR_PTR(-ENOENT);
 721
 722        cache_type = (config >>  0) & 0xff;
 723        if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
 724                return ERR_PTR(-EINVAL);
 725
 726        cache_op = (config >>  8) & 0xff;
 727        if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
 728                return ERR_PTR(-EINVAL);
 729
 730        cache_result = (config >> 16) & 0xff;
 731        if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 732                return ERR_PTR(-EINVAL);
 733
 734        pmap = &((*sparc_pmu->cache_map)[cache_type][cache_op][cache_result]);
 735
 736        if (pmap->encoding == CACHE_OP_UNSUPPORTED)
 737                return ERR_PTR(-ENOENT);
 738
 739        if (pmap->encoding == CACHE_OP_NONSENSE)
 740                return ERR_PTR(-EINVAL);
 741
 742        return pmap;
 743}
 744
 745static void hw_perf_event_destroy(struct perf_event *event)
 746{
 747        perf_event_release_pmc();
 748}
 749
 750/* Make sure all events can be scheduled into the hardware at
 751 * the same time.  This is simplified by the fact that we only
 752 * need to support 2 simultaneous HW events.
 753 */
 754static int sparc_check_constraints(unsigned long *events, int n_ev)
 755{
 756        if (n_ev <= perf_max_events) {
 757                u8 msk1, msk2;
 758                u16 dummy;
 759
 760                if (n_ev == 1)
 761                        return 0;
 762                BUG_ON(n_ev != 2);
 763                perf_event_decode(events[0], &dummy, &msk1);
 764                perf_event_decode(events[1], &dummy, &msk2);
 765
 766                /* If both events can go on any counter, OK.  */
 767                if (msk1 == (PIC_UPPER | PIC_LOWER) &&
 768                    msk2 == (PIC_UPPER | PIC_LOWER))
 769                        return 0;
 770
 771                /* If one event is limited to a specific counter,
 772                 * and the other can go on both, OK.
 773                 */
 774                if ((msk1 == PIC_UPPER || msk1 == PIC_LOWER) &&
 775                    msk2 == (PIC_UPPER | PIC_LOWER))
 776                        return 0;
 777                if ((msk2 == PIC_UPPER || msk2 == PIC_LOWER) &&
 778                    msk1 == (PIC_UPPER | PIC_LOWER))
 779                        return 0;
 780
 781                /* If the events are fixed to different counters, OK.  */
 782                if ((msk1 == PIC_UPPER && msk2 == PIC_LOWER) ||
 783                    (msk1 == PIC_LOWER && msk2 == PIC_UPPER))
 784                        return 0;
 785
 786                /* Otherwise, there is a conflict.  */
 787        }
 788
 789        return -1;
 790}
 791
 792static int check_excludes(struct perf_event **evts, int n_prev, int n_new)
 793{
 794        int eu = 0, ek = 0, eh = 0;
 795        struct perf_event *event;
 796        int i, n, first;
 797
 798        n = n_prev + n_new;
 799        if (n <= 1)
 800                return 0;
 801
 802        first = 1;
 803        for (i = 0; i < n; i++) {
 804                event = evts[i];
 805                if (first) {
 806                        eu = event->attr.exclude_user;
 807                        ek = event->attr.exclude_kernel;
 808                        eh = event->attr.exclude_hv;
 809                        first = 0;
 810                } else if (event->attr.exclude_user != eu ||
 811                           event->attr.exclude_kernel != ek ||
 812                           event->attr.exclude_hv != eh) {
 813                        return -EAGAIN;
 814                }
 815        }
 816
 817        return 0;
 818}
 819
 820static int collect_events(struct perf_event *group, int max_count,
 821                          struct perf_event *evts[], unsigned long *events)
 822{
 823        struct perf_event *event;
 824        int n = 0;
 825
 826        if (!is_software_event(group)) {
 827                if (n >= max_count)
 828                        return -1;
 829                evts[n] = group;
 830                events[n++] = group->hw.event_base;
 831        }
 832        list_for_each_entry(event, &group->sibling_list, group_entry) {
 833                if (!is_software_event(event) &&
 834                    event->state != PERF_EVENT_STATE_OFF) {
 835                        if (n >= max_count)
 836                                return -1;
 837                        evts[n] = event;
 838                        events[n++] = event->hw.event_base;
 839                }
 840        }
 841        return n;
 842}
 843
 844static int __hw_perf_event_init(struct perf_event *event)
 845{
 846        struct perf_event_attr *attr = &event->attr;
 847        struct perf_event *evts[MAX_HWEVENTS];
 848        struct hw_perf_event *hwc = &event->hw;
 849        unsigned long events[MAX_HWEVENTS];
 850        const struct perf_event_map *pmap;
 851        u64 enc;
 852        int n;
 853
 854        if (atomic_read(&nmi_active) < 0)
 855                return -ENODEV;
 856
 857        if (attr->type == PERF_TYPE_HARDWARE) {
 858                if (attr->config >= sparc_pmu->max_events)
 859                        return -EINVAL;
 860                pmap = sparc_pmu->event_map(attr->config);
 861        } else if (attr->type == PERF_TYPE_HW_CACHE) {
 862                pmap = sparc_map_cache_event(attr->config);
 863                if (IS_ERR(pmap))
 864                        return PTR_ERR(pmap);
 865        } else
 866                return -EOPNOTSUPP;
 867
 868        /* We save the enable bits in the config_base.  So to
 869         * turn off sampling just write 'config', and to enable
 870         * things write 'config | config_base'.
 871         */
 872        hwc->config_base = sparc_pmu->irq_bit;
 873        if (!attr->exclude_user)
 874                hwc->config_base |= PCR_UTRACE;
 875        if (!attr->exclude_kernel)
 876                hwc->config_base |= PCR_STRACE;
 877        if (!attr->exclude_hv)
 878                hwc->config_base |= sparc_pmu->hv_bit;
 879
 880        hwc->event_base = perf_event_encode(pmap);
 881
 882        enc = pmap->encoding;
 883
 884        n = 0;
 885        if (event->group_leader != event) {
 886                n = collect_events(event->group_leader,
 887                                   perf_max_events - 1,
 888                                   evts, events);
 889                if (n < 0)
 890                        return -EINVAL;
 891        }
 892        events[n] = hwc->event_base;
 893        evts[n] = event;
 894
 895        if (check_excludes(evts, n, 1))
 896                return -EINVAL;
 897
 898        if (sparc_check_constraints(events, n + 1))
 899                return -EINVAL;
 900
 901        /* Try to do all error checking before this point, as unwinding
 902         * state after grabbing the PMC is difficult.
 903         */
 904        perf_event_grab_pmc();
 905        event->destroy = hw_perf_event_destroy;
 906
 907        if (!hwc->sample_period) {
 908                hwc->sample_period = MAX_PERIOD;
 909                hwc->last_period = hwc->sample_period;
 910                atomic64_set(&hwc->period_left, hwc->sample_period);
 911        }
 912
 913        if (pmap->pic_mask & PIC_UPPER) {
 914                hwc->idx = PIC_UPPER_INDEX;
 915                enc <<= sparc_pmu->upper_shift;
 916        } else {
 917                hwc->idx = PIC_LOWER_INDEX;
 918                enc <<= sparc_pmu->lower_shift;
 919        }
 920
 921        hwc->config |= enc;
 922        return 0;
 923}
 924
 925static const struct pmu pmu = {
 926        .enable         = sparc_pmu_enable,
 927        .disable        = sparc_pmu_disable,
 928        .read           = sparc_pmu_read,
 929        .unthrottle     = sparc_pmu_unthrottle,
 930};
 931
 932const struct pmu *hw_perf_event_init(struct perf_event *event)
 933{
 934        int err = __hw_perf_event_init(event);
 935
 936        if (err)
 937                return ERR_PTR(err);
 938        return &pmu;
 939}
 940
 941void perf_event_print_debug(void)
 942{
 943        unsigned long flags;
 944        u64 pcr, pic;
 945        int cpu;
 946
 947        if (!sparc_pmu)
 948                return;
 949
 950        local_irq_save(flags);
 951
 952        cpu = smp_processor_id();
 953
 954        pcr = pcr_ops->read();
 955        read_pic(pic);
 956
 957        pr_info("\n");
 958        pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n",
 959                cpu, pcr, pic);
 960
 961        local_irq_restore(flags);
 962}
 963
 964static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
 965                                            unsigned long cmd, void *__args)
 966{
 967        struct die_args *args = __args;
 968        struct perf_sample_data data;
 969        struct cpu_hw_events *cpuc;
 970        struct pt_regs *regs;
 971        int idx;
 972
 973        if (!atomic_read(&active_events))
 974                return NOTIFY_DONE;
 975
 976        switch (cmd) {
 977        case DIE_NMI:
 978                break;
 979
 980        default:
 981                return NOTIFY_DONE;
 982        }
 983
 984        regs = args->regs;
 985
 986        data.addr = 0;
 987
 988        cpuc = &__get_cpu_var(cpu_hw_events);
 989        for (idx = 0; idx < MAX_HWEVENTS; idx++) {
 990                struct perf_event *event = cpuc->events[idx];
 991                struct hw_perf_event *hwc;
 992                u64 val;
 993
 994                if (!test_bit(idx, cpuc->active_mask))
 995                        continue;
 996                hwc = &event->hw;
 997                val = sparc_perf_event_update(event, hwc, idx);
 998                if (val & (1ULL << 31))
 999                        continue;
1000
1001                data.period = event->hw.last_period;
1002                if (!sparc_perf_event_set_period(event, hwc, idx))
1003                        continue;
1004
1005                if (perf_event_overflow(event, 1, &data, regs))
1006                        sparc_pmu_disable_event(cpuc, hwc, idx);
1007        }
1008
1009        return NOTIFY_STOP;
1010}
1011
1012static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1013        .notifier_call          = perf_event_nmi_handler,
1014};
1015
1016static bool __init supported_pmu(void)
1017{
1018        if (!strcmp(sparc_pmu_type, "ultra3") ||
1019            !strcmp(sparc_pmu_type, "ultra3+") ||
1020            !strcmp(sparc_pmu_type, "ultra3i") ||
1021            !strcmp(sparc_pmu_type, "ultra4+")) {
1022                sparc_pmu = &ultra3_pmu;
1023                return true;
1024        }
1025        if (!strcmp(sparc_pmu_type, "niagara")) {
1026                sparc_pmu = &niagara1_pmu;
1027                return true;
1028        }
1029        if (!strcmp(sparc_pmu_type, "niagara2")) {
1030                sparc_pmu = &niagara2_pmu;
1031                return true;
1032        }
1033        return false;
1034}
1035
1036void __init init_hw_perf_events(void)
1037{
1038        pr_info("Performance events: ");
1039
1040        if (!supported_pmu()) {
1041                pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
1042                return;
1043        }
1044
1045        pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
1046
1047        /* All sparc64 PMUs currently have 2 events.  But this simple
1048         * driver only supports one active event at a time.
1049         */
1050        perf_max_events = 1;
1051
1052        register_die_notifier(&perf_event_nmi_notifier);
1053}
1054