linux/arch/x86/events/amd/core.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2#include <linux/perf_event.h>
   3#include <linux/export.h>
   4#include <linux/types.h>
   5#include <linux/init.h>
   6#include <linux/slab.h>
   7#include <linux/delay.h>
   8#include <linux/jiffies.h>
   9#include <asm/apicdef.h>
  10#include <asm/nmi.h>
  11
  12#include "../perf_event.h"
  13
  14static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
  15static unsigned long perf_nmi_window;
  16
  17/* AMD Event 0xFFF: Merge.  Used with Large Increment per Cycle events */
  18#define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL)
  19#define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE)
  20
  21static __initconst const u64 amd_hw_cache_event_ids
  22                                [PERF_COUNT_HW_CACHE_MAX]
  23                                [PERF_COUNT_HW_CACHE_OP_MAX]
  24                                [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  25{
  26 [ C(L1D) ] = {
  27        [ C(OP_READ) ] = {
  28                [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
  29                [ C(RESULT_MISS)   ] = 0x0141, /* Data Cache Misses          */
  30        },
  31        [ C(OP_WRITE) ] = {
  32                [ C(RESULT_ACCESS) ] = 0,
  33                [ C(RESULT_MISS)   ] = 0,
  34        },
  35        [ C(OP_PREFETCH) ] = {
  36                [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
  37                [ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
  38        },
  39 },
  40 [ C(L1I ) ] = {
  41        [ C(OP_READ) ] = {
  42                [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
  43                [ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
  44        },
  45        [ C(OP_WRITE) ] = {
  46                [ C(RESULT_ACCESS) ] = -1,
  47                [ C(RESULT_MISS)   ] = -1,
  48        },
  49        [ C(OP_PREFETCH) ] = {
  50                [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
  51                [ C(RESULT_MISS)   ] = 0,
  52        },
  53 },
  54 [ C(LL  ) ] = {
  55        [ C(OP_READ) ] = {
  56                [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
  57                [ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
  58        },
  59        [ C(OP_WRITE) ] = {
  60                [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
  61                [ C(RESULT_MISS)   ] = 0,
  62        },
  63        [ C(OP_PREFETCH) ] = {
  64                [ C(RESULT_ACCESS) ] = 0,
  65                [ C(RESULT_MISS)   ] = 0,
  66        },
  67 },
  68 [ C(DTLB) ] = {
  69        [ C(OP_READ) ] = {
  70                [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
  71                [ C(RESULT_MISS)   ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
  72        },
  73        [ C(OP_WRITE) ] = {
  74                [ C(RESULT_ACCESS) ] = 0,
  75                [ C(RESULT_MISS)   ] = 0,
  76        },
  77        [ C(OP_PREFETCH) ] = {
  78                [ C(RESULT_ACCESS) ] = 0,
  79                [ C(RESULT_MISS)   ] = 0,
  80        },
  81 },
  82 [ C(ITLB) ] = {
  83        [ C(OP_READ) ] = {
  84                [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
  85                [ C(RESULT_MISS)   ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
  86        },
  87        [ C(OP_WRITE) ] = {
  88                [ C(RESULT_ACCESS) ] = -1,
  89                [ C(RESULT_MISS)   ] = -1,
  90        },
  91        [ C(OP_PREFETCH) ] = {
  92                [ C(RESULT_ACCESS) ] = -1,
  93                [ C(RESULT_MISS)   ] = -1,
  94        },
  95 },
  96 [ C(BPU ) ] = {
  97        [ C(OP_READ) ] = {
  98                [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
  99                [ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
 100        },
 101        [ C(OP_WRITE) ] = {
 102                [ C(RESULT_ACCESS) ] = -1,
 103                [ C(RESULT_MISS)   ] = -1,
 104        },
 105        [ C(OP_PREFETCH) ] = {
 106                [ C(RESULT_ACCESS) ] = -1,
 107                [ C(RESULT_MISS)   ] = -1,
 108        },
 109 },
 110 [ C(NODE) ] = {
 111        [ C(OP_READ) ] = {
 112                [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
 113                [ C(RESULT_MISS)   ] = 0x98e9, /* CPU Request to Memory, r   */
 114        },
 115        [ C(OP_WRITE) ] = {
 116                [ C(RESULT_ACCESS) ] = -1,
 117                [ C(RESULT_MISS)   ] = -1,
 118        },
 119        [ C(OP_PREFETCH) ] = {
 120                [ C(RESULT_ACCESS) ] = -1,
 121                [ C(RESULT_MISS)   ] = -1,
 122        },
 123 },
 124};
 125
 126static __initconst const u64 amd_hw_cache_event_ids_f17h
 127                                [PERF_COUNT_HW_CACHE_MAX]
 128                                [PERF_COUNT_HW_CACHE_OP_MAX]
 129                                [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
 130[C(L1D)] = {
 131        [C(OP_READ)] = {
 132                [C(RESULT_ACCESS)] = 0x0040, /* Data Cache Accesses */
 133                [C(RESULT_MISS)]   = 0xc860, /* L2$ access from DC Miss */
 134        },
 135        [C(OP_WRITE)] = {
 136                [C(RESULT_ACCESS)] = 0,
 137                [C(RESULT_MISS)]   = 0,
 138        },
 139        [C(OP_PREFETCH)] = {
 140                [C(RESULT_ACCESS)] = 0xff5a, /* h/w prefetch DC Fills */
 141                [C(RESULT_MISS)]   = 0,
 142        },
 143},
 144[C(L1I)] = {
 145        [C(OP_READ)] = {
 146                [C(RESULT_ACCESS)] = 0x0080, /* Instruction cache fetches  */
 147                [C(RESULT_MISS)]   = 0x0081, /* Instruction cache misses   */
 148        },
 149        [C(OP_WRITE)] = {
 150                [C(RESULT_ACCESS)] = -1,
 151                [C(RESULT_MISS)]   = -1,
 152        },
 153        [C(OP_PREFETCH)] = {
 154                [C(RESULT_ACCESS)] = 0,
 155                [C(RESULT_MISS)]   = 0,
 156        },
 157},
 158[C(LL)] = {
 159        [C(OP_READ)] = {
 160                [C(RESULT_ACCESS)] = 0,
 161                [C(RESULT_MISS)]   = 0,
 162        },
 163        [C(OP_WRITE)] = {
 164                [C(RESULT_ACCESS)] = 0,
 165                [C(RESULT_MISS)]   = 0,
 166        },
 167        [C(OP_PREFETCH)] = {
 168                [C(RESULT_ACCESS)] = 0,
 169                [C(RESULT_MISS)]   = 0,
 170        },
 171},
 172[C(DTLB)] = {
 173        [C(OP_READ)] = {
 174                [C(RESULT_ACCESS)] = 0xff45, /* All L2 DTLB accesses */
 175                [C(RESULT_MISS)]   = 0xf045, /* L2 DTLB misses (PT walks) */
 176        },
 177        [C(OP_WRITE)] = {
 178                [C(RESULT_ACCESS)] = 0,
 179                [C(RESULT_MISS)]   = 0,
 180        },
 181        [C(OP_PREFETCH)] = {
 182                [C(RESULT_ACCESS)] = 0,
 183                [C(RESULT_MISS)]   = 0,
 184        },
 185},
 186[C(ITLB)] = {
 187        [C(OP_READ)] = {
 188                [C(RESULT_ACCESS)] = 0x0084, /* L1 ITLB misses, L2 ITLB hits */
 189                [C(RESULT_MISS)]   = 0xff85, /* L1 ITLB misses, L2 misses */
 190        },
 191        [C(OP_WRITE)] = {
 192                [C(RESULT_ACCESS)] = -1,
 193                [C(RESULT_MISS)]   = -1,
 194        },
 195        [C(OP_PREFETCH)] = {
 196                [C(RESULT_ACCESS)] = -1,
 197                [C(RESULT_MISS)]   = -1,
 198        },
 199},
 200[C(BPU)] = {
 201        [C(OP_READ)] = {
 202                [C(RESULT_ACCESS)] = 0x00c2, /* Retired Branch Instr.      */
 203                [C(RESULT_MISS)]   = 0x00c3, /* Retired Mispredicted BI    */
 204        },
 205        [C(OP_WRITE)] = {
 206                [C(RESULT_ACCESS)] = -1,
 207                [C(RESULT_MISS)]   = -1,
 208        },
 209        [C(OP_PREFETCH)] = {
 210                [C(RESULT_ACCESS)] = -1,
 211                [C(RESULT_MISS)]   = -1,
 212        },
 213},
 214[C(NODE)] = {
 215        [C(OP_READ)] = {
 216                [C(RESULT_ACCESS)] = 0,
 217                [C(RESULT_MISS)]   = 0,
 218        },
 219        [C(OP_WRITE)] = {
 220                [C(RESULT_ACCESS)] = -1,
 221                [C(RESULT_MISS)]   = -1,
 222        },
 223        [C(OP_PREFETCH)] = {
 224                [C(RESULT_ACCESS)] = -1,
 225                [C(RESULT_MISS)]   = -1,
 226        },
 227},
 228};
 229
 230/*
 231 * AMD Performance Monitor K7 and later, up to and including Family 16h:
 232 */
 233static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
 234{
 235        [PERF_COUNT_HW_CPU_CYCLES]              = 0x0076,
 236        [PERF_COUNT_HW_INSTRUCTIONS]            = 0x00c0,
 237        [PERF_COUNT_HW_CACHE_REFERENCES]        = 0x077d,
 238        [PERF_COUNT_HW_CACHE_MISSES]            = 0x077e,
 239        [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = 0x00c2,
 240        [PERF_COUNT_HW_BRANCH_MISSES]           = 0x00c3,
 241        [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
 242        [PERF_COUNT_HW_STALLED_CYCLES_BACKEND]  = 0x00d1, /* "Dispatch stalls" event */
 243};
 244
 245/*
 246 * AMD Performance Monitor Family 17h and later:
 247 */
 248static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
 249{
 250        [PERF_COUNT_HW_CPU_CYCLES]              = 0x0076,
 251        [PERF_COUNT_HW_INSTRUCTIONS]            = 0x00c0,
 252        [PERF_COUNT_HW_CACHE_REFERENCES]        = 0xff60,
 253        [PERF_COUNT_HW_CACHE_MISSES]            = 0x0964,
 254        [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = 0x00c2,
 255        [PERF_COUNT_HW_BRANCH_MISSES]           = 0x00c3,
 256        [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287,
 257        [PERF_COUNT_HW_STALLED_CYCLES_BACKEND]  = 0x0187,
 258};
 259
 260static u64 amd_pmu_event_map(int hw_event)
 261{
 262        if (boot_cpu_data.x86 >= 0x17)
 263                return amd_f17h_perfmon_event_map[hw_event];
 264
 265        return amd_perfmon_event_map[hw_event];
 266}
 267
 268/*
 269 * Previously calculated offsets
 270 */
 271static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
 272static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
 273
 274/*
 275 * Legacy CPUs:
 276 *   4 counters starting at 0xc0010000 each offset by 1
 277 *
 278 * CPUs with core performance counter extensions:
 279 *   6 counters starting at 0xc0010200 each offset by 2
 280 */
 281static inline int amd_pmu_addr_offset(int index, bool eventsel)
 282{
 283        int offset;
 284
 285        if (!index)
 286                return index;
 287
 288        if (eventsel)
 289                offset = event_offsets[index];
 290        else
 291                offset = count_offsets[index];
 292
 293        if (offset)
 294                return offset;
 295
 296        if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
 297                offset = index;
 298        else
 299                offset = index << 1;
 300
 301        if (eventsel)
 302                event_offsets[index] = offset;
 303        else
 304                count_offsets[index] = offset;
 305
 306        return offset;
 307}
 308
 309/*
 310 * AMD64 events are detected based on their event codes.
 311 */
 312static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
 313{
 314        return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
 315}
 316
 317static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
 318{
 319        if (!(x86_pmu.flags & PMU_FL_PAIR))
 320                return false;
 321
 322        switch (amd_get_event_code(hwc)) {
 323        case 0x003:     return true;    /* Retired SSE/AVX FLOPs */
 324        default:        return false;
 325        }
 326}
 327
 328static int amd_core_hw_config(struct perf_event *event)
 329{
 330        if (event->attr.exclude_host && event->attr.exclude_guest)
 331                /*
 332                 * When HO == GO == 1 the hardware treats that as GO == HO == 0
 333                 * and will count in both modes. We don't want to count in that
 334                 * case so we emulate no-counting by setting US = OS = 0.
 335                 */
 336                event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
 337                                      ARCH_PERFMON_EVENTSEL_OS);
 338        else if (event->attr.exclude_host)
 339                event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
 340        else if (event->attr.exclude_guest)
 341                event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
 342
 343        if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw))
 344                event->hw.flags |= PERF_X86_EVENT_PAIR;
 345
 346        return 0;
 347}
 348
 349static inline int amd_is_nb_event(struct hw_perf_event *hwc)
 350{
 351        return (hwc->config & 0xe0) == 0xe0;
 352}
 353
 354static inline int amd_has_nb(struct cpu_hw_events *cpuc)
 355{
 356        struct amd_nb *nb = cpuc->amd_nb;
 357
 358        return nb && nb->nb_id != -1;
 359}
 360
 361static int amd_pmu_hw_config(struct perf_event *event)
 362{
 363        int ret;
 364
 365        /* pass precise event sampling to ibs: */
 366        if (event->attr.precise_ip && get_ibs_caps())
 367                return -ENOENT;
 368
 369        if (has_branch_stack(event))
 370                return -EOPNOTSUPP;
 371
 372        ret = x86_pmu_hw_config(event);
 373        if (ret)
 374                return ret;
 375
 376        if (event->attr.type == PERF_TYPE_RAW)
 377                event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
 378
 379        return amd_core_hw_config(event);
 380}
 381
 382static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
 383                                           struct perf_event *event)
 384{
 385        struct amd_nb *nb = cpuc->amd_nb;
 386        int i;
 387
 388        /*
 389         * need to scan whole list because event may not have
 390         * been assigned during scheduling
 391         *
 392         * no race condition possible because event can only
 393         * be removed on one CPU at a time AND PMU is disabled
 394         * when we come here
 395         */
 396        for (i = 0; i < x86_pmu.num_counters; i++) {
 397                if (cmpxchg(nb->owners + i, event, NULL) == event)
 398                        break;
 399        }
 400}
 401
 402 /*
 403  * AMD64 NorthBridge events need special treatment because
 404  * counter access needs to be synchronized across all cores
 405  * of a package. Refer to BKDG section 3.12
 406  *
 407  * NB events are events measuring L3 cache, Hypertransport
 408  * traffic. They are identified by an event code >= 0xe00.
 409  * They measure events on the NorthBride which is shared
 410  * by all cores on a package. NB events are counted on a
 411  * shared set of counters. When a NB event is programmed
 412  * in a counter, the data actually comes from a shared
 413  * counter. Thus, access to those counters needs to be
 414  * synchronized.
 415  *
 416  * We implement the synchronization such that no two cores
 417  * can be measuring NB events using the same counters. Thus,
 418  * we maintain a per-NB allocation table. The available slot
 419  * is propagated using the event_constraint structure.
 420  *
 421  * We provide only one choice for each NB event based on
 422  * the fact that only NB events have restrictions. Consequently,
 423  * if a counter is available, there is a guarantee the NB event
 424  * will be assigned to it. If no slot is available, an empty
 425  * constraint is returned and scheduling will eventually fail
 426  * for this event.
 427  *
 428  * Note that all cores attached the same NB compete for the same
 429  * counters to host NB events, this is why we use atomic ops. Some
 430  * multi-chip CPUs may have more than one NB.
 431  *
 432  * Given that resources are allocated (cmpxchg), they must be
 433  * eventually freed for others to use. This is accomplished by
 434  * calling __amd_put_nb_event_constraints()
 435  *
 436  * Non NB events are not impacted by this restriction.
 437  */
 438static struct event_constraint *
 439__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 440                               struct event_constraint *c)
 441{
 442        struct hw_perf_event *hwc = &event->hw;
 443        struct amd_nb *nb = cpuc->amd_nb;
 444        struct perf_event *old;
 445        int idx, new = -1;
 446
 447        if (!c)
 448                c = &unconstrained;
 449
 450        if (cpuc->is_fake)
 451                return c;
 452
 453        /*
 454         * detect if already present, if so reuse
 455         *
 456         * cannot merge with actual allocation
 457         * because of possible holes
 458         *
 459         * event can already be present yet not assigned (in hwc->idx)
 460         * because of successive calls to x86_schedule_events() from
 461         * hw_perf_group_sched_in() without hw_perf_enable()
 462         */
 463        for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
 464                if (new == -1 || hwc->idx == idx)
 465                        /* assign free slot, prefer hwc->idx */
 466                        old = cmpxchg(nb->owners + idx, NULL, event);
 467                else if (nb->owners[idx] == event)
 468                        /* event already present */
 469                        old = event;
 470                else
 471                        continue;
 472
 473                if (old && old != event)
 474                        continue;
 475
 476                /* reassign to this slot */
 477                if (new != -1)
 478                        cmpxchg(nb->owners + new, event, NULL);
 479                new = idx;
 480
 481                /* already present, reuse */
 482                if (old == event)
 483                        break;
 484        }
 485
 486        if (new == -1)
 487                return &emptyconstraint;
 488
 489        return &nb->event_constraints[new];
 490}
 491
 492static struct amd_nb *amd_alloc_nb(int cpu)
 493{
 494        struct amd_nb *nb;
 495        int i;
 496
 497        nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu));
 498        if (!nb)
 499                return NULL;
 500
 501        nb->nb_id = -1;
 502
 503        /*
 504         * initialize all possible NB constraints
 505         */
 506        for (i = 0; i < x86_pmu.num_counters; i++) {
 507                __set_bit(i, nb->event_constraints[i].idxmsk);
 508                nb->event_constraints[i].weight = 1;
 509        }
 510        return nb;
 511}
 512
 513static int amd_pmu_cpu_prepare(int cpu)
 514{
 515        struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 516
 517        WARN_ON_ONCE(cpuc->amd_nb);
 518
 519        if (!x86_pmu.amd_nb_constraints)
 520                return 0;
 521
 522        cpuc->amd_nb = amd_alloc_nb(cpu);
 523        if (!cpuc->amd_nb)
 524                return -ENOMEM;
 525
 526        return 0;
 527}
 528
 529static void amd_pmu_cpu_starting(int cpu)
 530{
 531        struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 532        void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
 533        struct amd_nb *nb;
 534        int i, nb_id;
 535
 536        cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
 537
 538        if (!x86_pmu.amd_nb_constraints)
 539                return;
 540
 541        nb_id = topology_die_id(cpu);
 542        WARN_ON_ONCE(nb_id == BAD_APICID);
 543
 544        for_each_online_cpu(i) {
 545                nb = per_cpu(cpu_hw_events, i).amd_nb;
 546                if (WARN_ON_ONCE(!nb))
 547                        continue;
 548
 549                if (nb->nb_id == nb_id) {
 550                        *onln = cpuc->amd_nb;
 551                        cpuc->amd_nb = nb;
 552                        break;
 553                }
 554        }
 555
 556        cpuc->amd_nb->nb_id = nb_id;
 557        cpuc->amd_nb->refcnt++;
 558}
 559
 560static void amd_pmu_cpu_dead(int cpu)
 561{
 562        struct cpu_hw_events *cpuhw;
 563
 564        if (!x86_pmu.amd_nb_constraints)
 565                return;
 566
 567        cpuhw = &per_cpu(cpu_hw_events, cpu);
 568
 569        if (cpuhw->amd_nb) {
 570                struct amd_nb *nb = cpuhw->amd_nb;
 571
 572                if (nb->nb_id == -1 || --nb->refcnt == 0)
 573                        kfree(nb);
 574
 575                cpuhw->amd_nb = NULL;
 576        }
 577}
 578
 579/*
 580 * When a PMC counter overflows, an NMI is used to process the event and
 581 * reset the counter. NMI latency can result in the counter being updated
 582 * before the NMI can run, which can result in what appear to be spurious
 583 * NMIs. This function is intended to wait for the NMI to run and reset
 584 * the counter to avoid possible unhandled NMI messages.
 585 */
 586#define OVERFLOW_WAIT_COUNT     50
 587
 588static void amd_pmu_wait_on_overflow(int idx)
 589{
 590        unsigned int i;
 591        u64 counter;
 592
 593        /*
 594         * Wait for the counter to be reset if it has overflowed. This loop
 595         * should exit very, very quickly, but just in case, don't wait
 596         * forever...
 597         */
 598        for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
 599                rdmsrl(x86_pmu_event_addr(idx), counter);
 600                if (counter & (1ULL << (x86_pmu.cntval_bits - 1)))
 601                        break;
 602
 603                /* Might be in IRQ context, so can't sleep */
 604                udelay(1);
 605        }
 606}
 607
 608static void amd_pmu_disable_all(void)
 609{
 610        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 611        int idx;
 612
 613        x86_pmu_disable_all();
 614
 615        /*
 616         * This shouldn't be called from NMI context, but add a safeguard here
 617         * to return, since if we're in NMI context we can't wait for an NMI
 618         * to reset an overflowed counter value.
 619         */
 620        if (in_nmi())
 621                return;
 622
 623        /*
 624         * Check each counter for overflow and wait for it to be reset by the
 625         * NMI if it has overflowed. This relies on the fact that all active
 626         * counters are always enabled when this function is called and
 627         * ARCH_PERFMON_EVENTSEL_INT is always set.
 628         */
 629        for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 630                if (!test_bit(idx, cpuc->active_mask))
 631                        continue;
 632
 633                amd_pmu_wait_on_overflow(idx);
 634        }
 635}
 636
 637static void amd_pmu_disable_event(struct perf_event *event)
 638{
 639        x86_pmu_disable_event(event);
 640
 641        /*
 642         * This can be called from NMI context (via x86_pmu_stop). The counter
 643         * may have overflowed, but either way, we'll never see it get reset
 644         * by the NMI if we're already in the NMI. And the NMI latency support
 645         * below will take care of any pending NMI that might have been
 646         * generated by the overflow.
 647         */
 648        if (in_nmi())
 649                return;
 650
 651        amd_pmu_wait_on_overflow(event->hw.idx);
 652}
 653
 654/*
 655 * Because of NMI latency, if multiple PMC counters are active or other sources
 656 * of NMIs are received, the perf NMI handler can handle one or more overflowed
 657 * PMC counters outside of the NMI associated with the PMC overflow. If the NMI
 658 * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel
 659 * back-to-back NMI support won't be active. This PMC handler needs to take into
 660 * account that this can occur, otherwise this could result in unknown NMI
 661 * messages being issued. Examples of this is PMC overflow while in the NMI
 662 * handler when multiple PMCs are active or PMC overflow while handling some
 663 * other source of an NMI.
 664 *
 665 * Attempt to mitigate this by creating an NMI window in which un-handled NMIs
 666 * received during this window will be claimed. This prevents extending the
 667 * window past when it is possible that latent NMIs should be received. The
 668 * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has
 669 * handled a counter. When an un-handled NMI is received, it will be claimed
 670 * only if arriving within that window.
 671 */
 672static int amd_pmu_handle_irq(struct pt_regs *regs)
 673{
 674        int handled;
 675
 676        /* Process any counter overflows */
 677        handled = x86_pmu_handle_irq(regs);
 678
 679        /*
 680         * If a counter was handled, record a timestamp such that un-handled
 681         * NMIs will be claimed if arriving within that window.
 682         */
 683        if (handled) {
 684                this_cpu_write(perf_nmi_tstamp, jiffies + perf_nmi_window);
 685
 686                return handled;
 687        }
 688
 689        if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp)))
 690                return NMI_DONE;
 691
 692        return NMI_HANDLED;
 693}
 694
 695static struct event_constraint *
 696amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 697                          struct perf_event *event)
 698{
 699        /*
 700         * if not NB event or no NB, then no constraints
 701         */
 702        if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
 703                return &unconstrained;
 704
 705        return __amd_get_nb_event_constraints(cpuc, event, NULL);
 706}
 707
 708static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
 709                                      struct perf_event *event)
 710{
 711        if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
 712                __amd_put_nb_event_constraints(cpuc, event);
 713}
 714
 715PMU_FORMAT_ATTR(event,  "config:0-7,32-35");
 716PMU_FORMAT_ATTR(umask,  "config:8-15"   );
 717PMU_FORMAT_ATTR(edge,   "config:18"     );
 718PMU_FORMAT_ATTR(inv,    "config:23"     );
 719PMU_FORMAT_ATTR(cmask,  "config:24-31"  );
 720
 721static struct attribute *amd_format_attr[] = {
 722        &format_attr_event.attr,
 723        &format_attr_umask.attr,
 724        &format_attr_edge.attr,
 725        &format_attr_inv.attr,
 726        &format_attr_cmask.attr,
 727        NULL,
 728};
 729
 730/* AMD Family 15h */
 731
 732#define AMD_EVENT_TYPE_MASK     0x000000F0ULL
 733
 734#define AMD_EVENT_FP            0x00000000ULL ... 0x00000010ULL
 735#define AMD_EVENT_LS            0x00000020ULL ... 0x00000030ULL
 736#define AMD_EVENT_DC            0x00000040ULL ... 0x00000050ULL
 737#define AMD_EVENT_CU            0x00000060ULL ... 0x00000070ULL
 738#define AMD_EVENT_IC_DE         0x00000080ULL ... 0x00000090ULL
 739#define AMD_EVENT_EX_LS         0x000000C0ULL
 740#define AMD_EVENT_DE            0x000000D0ULL
 741#define AMD_EVENT_NB            0x000000E0ULL ... 0x000000F0ULL
 742
 743/*
 744 * AMD family 15h event code/PMC mappings:
 745 *
 746 * type = event_code & 0x0F0:
 747 *
 748 * 0x000        FP      PERF_CTL[5:3]
 749 * 0x010        FP      PERF_CTL[5:3]
 750 * 0x020        LS      PERF_CTL[5:0]
 751 * 0x030        LS      PERF_CTL[5:0]
 752 * 0x040        DC      PERF_CTL[5:0]
 753 * 0x050        DC      PERF_CTL[5:0]
 754 * 0x060        CU      PERF_CTL[2:0]
 755 * 0x070        CU      PERF_CTL[2:0]
 756 * 0x080        IC/DE   PERF_CTL[2:0]
 757 * 0x090        IC/DE   PERF_CTL[2:0]
 758 * 0x0A0        ---
 759 * 0x0B0        ---
 760 * 0x0C0        EX/LS   PERF_CTL[5:0]
 761 * 0x0D0        DE      PERF_CTL[2:0]
 762 * 0x0E0        NB      NB_PERF_CTL[3:0]
 763 * 0x0F0        NB      NB_PERF_CTL[3:0]
 764 *
 765 * Exceptions:
 766 *
 767 * 0x000        FP      PERF_CTL[3], PERF_CTL[5:3] (*)
 768 * 0x003        FP      PERF_CTL[3]
 769 * 0x004        FP      PERF_CTL[3], PERF_CTL[5:3] (*)
 770 * 0x00B        FP      PERF_CTL[3]
 771 * 0x00D        FP      PERF_CTL[3]
 772 * 0x023        DE      PERF_CTL[2:0]
 773 * 0x02D        LS      PERF_CTL[3]
 774 * 0x02E        LS      PERF_CTL[3,0]
 775 * 0x031        LS      PERF_CTL[2:0] (**)
 776 * 0x043        CU      PERF_CTL[2:0]
 777 * 0x045        CU      PERF_CTL[2:0]
 778 * 0x046        CU      PERF_CTL[2:0]
 779 * 0x054        CU      PERF_CTL[2:0]
 780 * 0x055        CU      PERF_CTL[2:0]
 781 * 0x08F        IC      PERF_CTL[0]
 782 * 0x187        DE      PERF_CTL[0]
 783 * 0x188        DE      PERF_CTL[0]
 784 * 0x0DB        EX      PERF_CTL[5:0]
 785 * 0x0DC        LS      PERF_CTL[5:0]
 786 * 0x0DD        LS      PERF_CTL[5:0]
 787 * 0x0DE        LS      PERF_CTL[5:0]
 788 * 0x0DF        LS      PERF_CTL[5:0]
 789 * 0x1C0        EX      PERF_CTL[5:3]
 790 * 0x1D6        EX      PERF_CTL[5:0]
 791 * 0x1D8        EX      PERF_CTL[5:0]
 792 *
 793 * (*)  depending on the umask all FPU counters may be used
 794 * (**) only one unitmask enabled at a time
 795 */
 796
 797static struct event_constraint amd_f15_PMC0  = EVENT_CONSTRAINT(0, 0x01, 0);
 798static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
 799static struct event_constraint amd_f15_PMC3  = EVENT_CONSTRAINT(0, 0x08, 0);
 800static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
 801static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
 802static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
 803
 804static struct event_constraint *
 805amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
 806                               struct perf_event *event)
 807{
 808        struct hw_perf_event *hwc = &event->hw;
 809        unsigned int event_code = amd_get_event_code(hwc);
 810
 811        switch (event_code & AMD_EVENT_TYPE_MASK) {
 812        case AMD_EVENT_FP:
 813                switch (event_code) {
 814                case 0x000:
 815                        if (!(hwc->config & 0x0000F000ULL))
 816                                break;
 817                        if (!(hwc->config & 0x00000F00ULL))
 818                                break;
 819                        return &amd_f15_PMC3;
 820                case 0x004:
 821                        if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
 822                                break;
 823                        return &amd_f15_PMC3;
 824                case 0x003:
 825                case 0x00B:
 826                case 0x00D:
 827                        return &amd_f15_PMC3;
 828                }
 829                return &amd_f15_PMC53;
 830        case AMD_EVENT_LS:
 831        case AMD_EVENT_DC:
 832        case AMD_EVENT_EX_LS:
 833                switch (event_code) {
 834                case 0x023:
 835                case 0x043:
 836                case 0x045:
 837                case 0x046:
 838                case 0x054:
 839                case 0x055:
 840                        return &amd_f15_PMC20;
 841                case 0x02D:
 842                        return &amd_f15_PMC3;
 843                case 0x02E:
 844                        return &amd_f15_PMC30;
 845                case 0x031:
 846                        if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
 847                                return &amd_f15_PMC20;
 848                        return &emptyconstraint;
 849                case 0x1C0:
 850                        return &amd_f15_PMC53;
 851                default:
 852                        return &amd_f15_PMC50;
 853                }
 854        case AMD_EVENT_CU:
 855        case AMD_EVENT_IC_DE:
 856        case AMD_EVENT_DE:
 857                switch (event_code) {
 858                case 0x08F:
 859                case 0x187:
 860                case 0x188:
 861                        return &amd_f15_PMC0;
 862                case 0x0DB ... 0x0DF:
 863                case 0x1D6:
 864                case 0x1D8:
 865                        return &amd_f15_PMC50;
 866                default:
 867                        return &amd_f15_PMC20;
 868                }
 869        case AMD_EVENT_NB:
 870                /* moved to uncore.c */
 871                return &emptyconstraint;
 872        default:
 873                return &emptyconstraint;
 874        }
 875}
 876
 877static struct event_constraint pair_constraint;
 878
 879static struct event_constraint *
 880amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx,
 881                               struct perf_event *event)
 882{
 883        struct hw_perf_event *hwc = &event->hw;
 884
 885        if (amd_is_pair_event_code(hwc))
 886                return &pair_constraint;
 887
 888        return &unconstrained;
 889}
 890
 891static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc,
 892                                           struct perf_event *event)
 893{
 894        struct hw_perf_event *hwc = &event->hw;
 895
 896        if (is_counter_pair(hwc))
 897                --cpuc->n_pair;
 898}
 899
 900static ssize_t amd_event_sysfs_show(char *page, u64 config)
 901{
 902        u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
 903                    (config & AMD64_EVENTSEL_EVENT) >> 24;
 904
 905        return x86_event_sysfs_show(page, config, event);
 906}
 907
 908static __initconst const struct x86_pmu amd_pmu = {
 909        .name                   = "AMD",
 910        .handle_irq             = amd_pmu_handle_irq,
 911        .disable_all            = amd_pmu_disable_all,
 912        .enable_all             = x86_pmu_enable_all,
 913        .enable                 = x86_pmu_enable_event,
 914        .disable                = amd_pmu_disable_event,
 915        .hw_config              = amd_pmu_hw_config,
 916        .schedule_events        = x86_schedule_events,
 917        .eventsel               = MSR_K7_EVNTSEL0,
 918        .perfctr                = MSR_K7_PERFCTR0,
 919        .addr_offset            = amd_pmu_addr_offset,
 920        .event_map              = amd_pmu_event_map,
 921        .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
 922        .num_counters           = AMD64_NUM_COUNTERS,
 923        .cntval_bits            = 48,
 924        .cntval_mask            = (1ULL << 48) - 1,
 925        .apic                   = 1,
 926        /* use highest bit to detect overflow */
 927        .max_period             = (1ULL << 47) - 1,
 928        .get_event_constraints  = amd_get_event_constraints,
 929        .put_event_constraints  = amd_put_event_constraints,
 930
 931        .format_attrs           = amd_format_attr,
 932        .events_sysfs_show      = amd_event_sysfs_show,
 933
 934        .cpu_prepare            = amd_pmu_cpu_prepare,
 935        .cpu_starting           = amd_pmu_cpu_starting,
 936        .cpu_dead               = amd_pmu_cpu_dead,
 937
 938        .amd_nb_constraints     = 1,
 939};
 940
 941static int __init amd_core_pmu_init(void)
 942{
 943        u64 even_ctr_mask = 0ULL;
 944        int i;
 945
 946        if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
 947                return 0;
 948
 949        /* Avoid calculating the value each time in the NMI handler */
 950        perf_nmi_window = msecs_to_jiffies(100);
 951
 952        /*
 953         * If core performance counter extensions exists, we must use
 954         * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
 955         * amd_pmu_addr_offset().
 956         */
 957        x86_pmu.eventsel        = MSR_F15H_PERF_CTL;
 958        x86_pmu.perfctr         = MSR_F15H_PERF_CTR;
 959        x86_pmu.num_counters    = AMD64_NUM_COUNTERS_CORE;
 960        /*
 961         * AMD Core perfctr has separate MSRs for the NB events, see
 962         * the amd/uncore.c driver.
 963         */
 964        x86_pmu.amd_nb_constraints = 0;
 965
 966        if (boot_cpu_data.x86 == 0x15) {
 967                pr_cont("Fam15h ");
 968                x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
 969        }
 970        if (boot_cpu_data.x86 >= 0x17) {
 971                pr_cont("Fam17h+ ");
 972                /*
 973                 * Family 17h and compatibles have constraints for Large
 974                 * Increment per Cycle events: they may only be assigned an
 975                 * even numbered counter that has a consecutive adjacent odd
 976                 * numbered counter following it.
 977                 */
 978                for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
 979                        even_ctr_mask |= 1 << i;
 980
 981                pair_constraint = (struct event_constraint)
 982                                    __EVENT_CONSTRAINT(0, even_ctr_mask, 0,
 983                                    x86_pmu.num_counters / 2, 0,
 984                                    PERF_X86_EVENT_PAIR);
 985
 986                x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;
 987                x86_pmu.put_event_constraints = amd_put_event_constraints_f17h;
 988                x86_pmu.perf_ctr_pair_en = AMD_MERGE_EVENT_ENABLE;
 989                x86_pmu.flags |= PMU_FL_PAIR;
 990        }
 991
 992        pr_cont("core perfctr, ");
 993        return 0;
 994}
 995
 996__init int amd_pmu_init(void)
 997{
 998        int ret;
 999
1000        /* Performance-monitoring supported from K7 and later: */
1001        if (boot_cpu_data.x86 < 6)
1002                return -ENODEV;
1003
1004        x86_pmu = amd_pmu;
1005
1006        ret = amd_core_pmu_init();
1007        if (ret)
1008                return ret;
1009
1010        if (num_possible_cpus() == 1) {
1011                /*
1012                 * No point in allocating data structures to serialize
1013                 * against other CPUs, when there is only the one CPU.
1014                 */
1015                x86_pmu.amd_nb_constraints = 0;
1016        }
1017
1018        if (boot_cpu_data.x86 >= 0x17)
1019                memcpy(hw_cache_event_ids, amd_hw_cache_event_ids_f17h, sizeof(hw_cache_event_ids));
1020        else
1021                memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids));
1022
1023        return 0;
1024}
1025
1026void amd_pmu_enable_virt(void)
1027{
1028        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1029
1030        cpuc->perf_ctr_virt_mask = 0;
1031
1032        /* Reload all events */
1033        amd_pmu_disable_all();
1034        x86_pmu_enable_all(0);
1035}
1036EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
1037
1038void amd_pmu_disable_virt(void)
1039{
1040        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1041
1042        /*
1043         * We only mask out the Host-only bit so that host-only counting works
1044         * when SVM is disabled. If someone sets up a guest-only counter when
1045         * SVM is disabled the Guest-only bits still gets set and the counter
1046         * will not count anything.
1047         */
1048        cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
1049
1050        /* Reload all events */
1051        amd_pmu_disable_all();
1052        x86_pmu_enable_all(0);
1053}
1054EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
1055