linux/arch/x86/events/intel/ds.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/bitops.h>
   3#include <linux/types.h>
   4#include <linux/slab.h>
   5
   6#include <asm/cpu_entry_area.h>
   7#include <asm/perf_event.h>
   8#include <asm/tlbflush.h>
   9#include <asm/insn.h>
  10#include <asm/io.h>
  11
  12#include "../perf_event.h"
  13
  14/* Waste a full page so it can be mapped into the cpu_entry_area */
  15DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
  16
  17/* The size of a BTS record in bytes: */
  18#define BTS_RECORD_SIZE         24
  19
  20#define PEBS_FIXUP_SIZE         PAGE_SIZE
  21
  22/*
  23 * pebs_record_32 for p4 and core not supported
  24
  25struct pebs_record_32 {
  26        u32 flags, ip;
  27        u32 ax, bc, cx, dx;
  28        u32 si, di, bp, sp;
  29};
  30
  31 */
  32
  33union intel_x86_pebs_dse {
  34        u64 val;
  35        struct {
  36                unsigned int ld_dse:4;
  37                unsigned int ld_stlb_miss:1;
  38                unsigned int ld_locked:1;
  39                unsigned int ld_data_blk:1;
  40                unsigned int ld_addr_blk:1;
  41                unsigned int ld_reserved:24;
  42        };
  43        struct {
  44                unsigned int st_l1d_hit:1;
  45                unsigned int st_reserved1:3;
  46                unsigned int st_stlb_miss:1;
  47                unsigned int st_locked:1;
  48                unsigned int st_reserved2:26;
  49        };
  50        struct {
  51                unsigned int st_lat_dse:4;
  52                unsigned int st_lat_stlb_miss:1;
  53                unsigned int st_lat_locked:1;
  54                unsigned int ld_reserved3:26;
  55        };
  56};
  57
  58
  59/*
  60 * Map PEBS Load Latency Data Source encodings to generic
  61 * memory data source information
  62 */
  63#define P(a, b) PERF_MEM_S(a, b)
  64#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
  65#define LEVEL(x) P(LVLNUM, x)
  66#define REM P(REMOTE, REMOTE)
  67#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
  68
  69/* Version for Sandy Bridge and later */
  70static u64 pebs_data_source[] = {
  71        P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
  72        OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),  /* 0x01: L1 local */
  73        OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
  74        OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),  /* 0x03: L2 hit */
  75        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, NONE),  /* 0x04: L3 hit */
  76        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, MISS),  /* 0x05: L3 hit, snoop miss */
  77        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HIT),   /* 0x06: L3 hit, snoop hit */
  78        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),  /* 0x07: L3 hit, snoop hitm */
  79        OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
  80        OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
  81        OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, HIT),       /* 0x0a: L3 miss, shared */
  82        OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
  83        OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | SNOOP_NONE_MISS,     /* 0x0c: L3 miss, excl */
  84        OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
  85        OP_LH | P(LVL, IO)  | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
  86        OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
  87};
  88
  89/* Patch up minor differences in the bits */
  90void __init intel_pmu_pebs_data_source_nhm(void)
  91{
  92        pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
  93        pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
  94        pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
  95}
  96
  97void __init intel_pmu_pebs_data_source_skl(bool pmem)
  98{
  99        u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);
 100
 101        pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
 102        pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
 103        pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
 104        pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
 105        pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
 106}
 107
 108static u64 precise_store_data(u64 status)
 109{
 110        union intel_x86_pebs_dse dse;
 111        u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
 112
 113        dse.val = status;
 114
 115        /*
 116         * bit 4: TLB access
 117         * 1 = stored missed 2nd level TLB
 118         *
 119         * so it either hit the walker or the OS
 120         * otherwise hit 2nd level TLB
 121         */
 122        if (dse.st_stlb_miss)
 123                val |= P(TLB, MISS);
 124        else
 125                val |= P(TLB, HIT);
 126
 127        /*
 128         * bit 0: hit L1 data cache
 129         * if not set, then all we know is that
 130         * it missed L1D
 131         */
 132        if (dse.st_l1d_hit)
 133                val |= P(LVL, HIT);
 134        else
 135                val |= P(LVL, MISS);
 136
 137        /*
 138         * bit 5: Locked prefix
 139         */
 140        if (dse.st_locked)
 141                val |= P(LOCK, LOCKED);
 142
 143        return val;
 144}
 145
 146static u64 precise_datala_hsw(struct perf_event *event, u64 status)
 147{
 148        union perf_mem_data_src dse;
 149
 150        dse.val = PERF_MEM_NA;
 151
 152        if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
 153                dse.mem_op = PERF_MEM_OP_STORE;
 154        else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
 155                dse.mem_op = PERF_MEM_OP_LOAD;
 156
 157        /*
 158         * L1 info only valid for following events:
 159         *
 160         * MEM_UOPS_RETIRED.STLB_MISS_STORES
 161         * MEM_UOPS_RETIRED.LOCK_STORES
 162         * MEM_UOPS_RETIRED.SPLIT_STORES
 163         * MEM_UOPS_RETIRED.ALL_STORES
 164         */
 165        if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
 166                if (status & 1)
 167                        dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
 168                else
 169                        dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
 170        }
 171        return dse.val;
 172}
 173
 174static u64 load_latency_data(u64 status)
 175{
 176        union intel_x86_pebs_dse dse;
 177        u64 val;
 178
 179        dse.val = status;
 180
 181        /*
 182         * use the mapping table for bit 0-3
 183         */
 184        val = pebs_data_source[dse.ld_dse];
 185
 186        /*
 187         * Nehalem models do not support TLB, Lock infos
 188         */
 189        if (x86_pmu.pebs_no_tlb) {
 190                val |= P(TLB, NA) | P(LOCK, NA);
 191                return val;
 192        }
 193        /*
 194         * bit 4: TLB access
 195         * 0 = did not miss 2nd level TLB
 196         * 1 = missed 2nd level TLB
 197         */
 198        if (dse.ld_stlb_miss)
 199                val |= P(TLB, MISS) | P(TLB, L2);
 200        else
 201                val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
 202
 203        /*
 204         * bit 5: locked prefix
 205         */
 206        if (dse.ld_locked)
 207                val |= P(LOCK, LOCKED);
 208
 209        /*
 210         * Ice Lake and earlier models do not support block infos.
 211         */
 212        if (!x86_pmu.pebs_block) {
 213                val |= P(BLK, NA);
 214                return val;
 215        }
 216        /*
 217         * bit 6: load was blocked since its data could not be forwarded
 218         *        from a preceding store
 219         */
 220        if (dse.ld_data_blk)
 221                val |= P(BLK, DATA);
 222
 223        /*
 224         * bit 7: load was blocked due to potential address conflict with
 225         *        a preceding store
 226         */
 227        if (dse.ld_addr_blk)
 228                val |= P(BLK, ADDR);
 229
 230        if (!dse.ld_data_blk && !dse.ld_addr_blk)
 231                val |= P(BLK, NA);
 232
 233        return val;
 234}
 235
 236static u64 store_latency_data(u64 status)
 237{
 238        union intel_x86_pebs_dse dse;
 239        u64 val;
 240
 241        dse.val = status;
 242
 243        /*
 244         * use the mapping table for bit 0-3
 245         */
 246        val = pebs_data_source[dse.st_lat_dse];
 247
 248        /*
 249         * bit 4: TLB access
 250         * 0 = did not miss 2nd level TLB
 251         * 1 = missed 2nd level TLB
 252         */
 253        if (dse.st_lat_stlb_miss)
 254                val |= P(TLB, MISS) | P(TLB, L2);
 255        else
 256                val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
 257
 258        /*
 259         * bit 5: locked prefix
 260         */
 261        if (dse.st_lat_locked)
 262                val |= P(LOCK, LOCKED);
 263
 264        val |= P(BLK, NA);
 265
 266        return val;
 267}
 268
 269struct pebs_record_core {
 270        u64 flags, ip;
 271        u64 ax, bx, cx, dx;
 272        u64 si, di, bp, sp;
 273        u64 r8,  r9,  r10, r11;
 274        u64 r12, r13, r14, r15;
 275};
 276
 277struct pebs_record_nhm {
 278        u64 flags, ip;
 279        u64 ax, bx, cx, dx;
 280        u64 si, di, bp, sp;
 281        u64 r8,  r9,  r10, r11;
 282        u64 r12, r13, r14, r15;
 283        u64 status, dla, dse, lat;
 284};
 285
 286/*
 287 * Same as pebs_record_nhm, with two additional fields.
 288 */
 289struct pebs_record_hsw {
 290        u64 flags, ip;
 291        u64 ax, bx, cx, dx;
 292        u64 si, di, bp, sp;
 293        u64 r8,  r9,  r10, r11;
 294        u64 r12, r13, r14, r15;
 295        u64 status, dla, dse, lat;
 296        u64 real_ip, tsx_tuning;
 297};
 298
 299union hsw_tsx_tuning {
 300        struct {
 301                u32 cycles_last_block     : 32,
 302                    hle_abort             : 1,
 303                    rtm_abort             : 1,
 304                    instruction_abort     : 1,
 305                    non_instruction_abort : 1,
 306                    retry                 : 1,
 307                    data_conflict         : 1,
 308                    capacity_writes       : 1,
 309                    capacity_reads        : 1;
 310        };
 311        u64         value;
 312};
 313
 314#define PEBS_HSW_TSX_FLAGS      0xff00000000ULL
 315
 316/* Same as HSW, plus TSC */
 317
 318struct pebs_record_skl {
 319        u64 flags, ip;
 320        u64 ax, bx, cx, dx;
 321        u64 si, di, bp, sp;
 322        u64 r8,  r9,  r10, r11;
 323        u64 r12, r13, r14, r15;
 324        u64 status, dla, dse, lat;
 325        u64 real_ip, tsx_tuning;
 326        u64 tsc;
 327};
 328
 329void init_debug_store_on_cpu(int cpu)
 330{
 331        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 332
 333        if (!ds)
 334                return;
 335
 336        wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
 337                     (u32)((u64)(unsigned long)ds),
 338                     (u32)((u64)(unsigned long)ds >> 32));
 339}
 340
 341void fini_debug_store_on_cpu(int cpu)
 342{
 343        if (!per_cpu(cpu_hw_events, cpu).ds)
 344                return;
 345
 346        wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
 347}
 348
 349static DEFINE_PER_CPU(void *, insn_buffer);
 350
 351static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
 352{
 353        unsigned long start = (unsigned long)cea;
 354        phys_addr_t pa;
 355        size_t msz = 0;
 356
 357        pa = virt_to_phys(addr);
 358
 359        preempt_disable();
 360        for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
 361                cea_set_pte(cea, pa, prot);
 362
 363        /*
 364         * This is a cross-CPU update of the cpu_entry_area, we must shoot down
 365         * all TLB entries for it.
 366         */
 367        flush_tlb_kernel_range(start, start + size);
 368        preempt_enable();
 369}
 370
 371static void ds_clear_cea(void *cea, size_t size)
 372{
 373        unsigned long start = (unsigned long)cea;
 374        size_t msz = 0;
 375
 376        preempt_disable();
 377        for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
 378                cea_set_pte(cea, 0, PAGE_NONE);
 379
 380        flush_tlb_kernel_range(start, start + size);
 381        preempt_enable();
 382}
 383
 384static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
 385{
 386        unsigned int order = get_order(size);
 387        int node = cpu_to_node(cpu);
 388        struct page *page;
 389
 390        page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
 391        return page ? page_address(page) : NULL;
 392}
 393
 394static void dsfree_pages(const void *buffer, size_t size)
 395{
 396        if (buffer)
 397                free_pages((unsigned long)buffer, get_order(size));
 398}
 399
 400static int alloc_pebs_buffer(int cpu)
 401{
 402        struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
 403        struct debug_store *ds = hwev->ds;
 404        size_t bsiz = x86_pmu.pebs_buffer_size;
 405        int max, node = cpu_to_node(cpu);
 406        void *buffer, *insn_buff, *cea;
 407
 408        if (!x86_pmu.pebs)
 409                return 0;
 410
 411        buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
 412        if (unlikely(!buffer))
 413                return -ENOMEM;
 414
 415        /*
 416         * HSW+ already provides us the eventing ip; no need to allocate this
 417         * buffer then.
 418         */
 419        if (x86_pmu.intel_cap.pebs_format < 2) {
 420                insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
 421                if (!insn_buff) {
 422                        dsfree_pages(buffer, bsiz);
 423                        return -ENOMEM;
 424                }
 425                per_cpu(insn_buffer, cpu) = insn_buff;
 426        }
 427        hwev->ds_pebs_vaddr = buffer;
 428        /* Update the cpu entry area mapping */
 429        cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
 430        ds->pebs_buffer_base = (unsigned long) cea;
 431        ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
 432        ds->pebs_index = ds->pebs_buffer_base;
 433        max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
 434        ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
 435        return 0;
 436}
 437
 438static void release_pebs_buffer(int cpu)
 439{
 440        struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
 441        void *cea;
 442
 443        if (!x86_pmu.pebs)
 444                return;
 445
 446        kfree(per_cpu(insn_buffer, cpu));
 447        per_cpu(insn_buffer, cpu) = NULL;
 448
 449        /* Clear the fixmap */
 450        cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
 451        ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
 452        dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
 453        hwev->ds_pebs_vaddr = NULL;
 454}
 455
 456static int alloc_bts_buffer(int cpu)
 457{
 458        struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
 459        struct debug_store *ds = hwev->ds;
 460        void *buffer, *cea;
 461        int max;
 462
 463        if (!x86_pmu.bts)
 464                return 0;
 465
 466        buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
 467        if (unlikely(!buffer)) {
 468                WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
 469                return -ENOMEM;
 470        }
 471        hwev->ds_bts_vaddr = buffer;
 472        /* Update the fixmap */
 473        cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
 474        ds->bts_buffer_base = (unsigned long) cea;
 475        ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
 476        ds->bts_index = ds->bts_buffer_base;
 477        max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
 478        ds->bts_absolute_maximum = ds->bts_buffer_base +
 479                                        max * BTS_RECORD_SIZE;
 480        ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
 481                                        (max / 16) * BTS_RECORD_SIZE;
 482        return 0;
 483}
 484
 485static void release_bts_buffer(int cpu)
 486{
 487        struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
 488        void *cea;
 489
 490        if (!x86_pmu.bts)
 491                return;
 492
 493        /* Clear the fixmap */
 494        cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
 495        ds_clear_cea(cea, BTS_BUFFER_SIZE);
 496        dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
 497        hwev->ds_bts_vaddr = NULL;
 498}
 499
 500static int alloc_ds_buffer(int cpu)
 501{
 502        struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
 503
 504        memset(ds, 0, sizeof(*ds));
 505        per_cpu(cpu_hw_events, cpu).ds = ds;
 506        return 0;
 507}
 508
 509static void release_ds_buffer(int cpu)
 510{
 511        per_cpu(cpu_hw_events, cpu).ds = NULL;
 512}
 513
 514void release_ds_buffers(void)
 515{
 516        int cpu;
 517
 518        if (!x86_pmu.bts && !x86_pmu.pebs)
 519                return;
 520
 521        for_each_possible_cpu(cpu)
 522                release_ds_buffer(cpu);
 523
 524        for_each_possible_cpu(cpu) {
 525                /*
 526                 * Again, ignore errors from offline CPUs, they will no longer
 527                 * observe cpu_hw_events.ds and not program the DS_AREA when
 528                 * they come up.
 529                 */
 530                fini_debug_store_on_cpu(cpu);
 531        }
 532
 533        for_each_possible_cpu(cpu) {
 534                release_pebs_buffer(cpu);
 535                release_bts_buffer(cpu);
 536        }
 537}
 538
 539void reserve_ds_buffers(void)
 540{
 541        int bts_err = 0, pebs_err = 0;
 542        int cpu;
 543
 544        x86_pmu.bts_active = 0;
 545        x86_pmu.pebs_active = 0;
 546
 547        if (!x86_pmu.bts && !x86_pmu.pebs)
 548                return;
 549
 550        if (!x86_pmu.bts)
 551                bts_err = 1;
 552
 553        if (!x86_pmu.pebs)
 554                pebs_err = 1;
 555
 556        for_each_possible_cpu(cpu) {
 557                if (alloc_ds_buffer(cpu)) {
 558                        bts_err = 1;
 559                        pebs_err = 1;
 560                }
 561
 562                if (!bts_err && alloc_bts_buffer(cpu))
 563                        bts_err = 1;
 564
 565                if (!pebs_err && alloc_pebs_buffer(cpu))
 566                        pebs_err = 1;
 567
 568                if (bts_err && pebs_err)
 569                        break;
 570        }
 571
 572        if (bts_err) {
 573                for_each_possible_cpu(cpu)
 574                        release_bts_buffer(cpu);
 575        }
 576
 577        if (pebs_err) {
 578                for_each_possible_cpu(cpu)
 579                        release_pebs_buffer(cpu);
 580        }
 581
 582        if (bts_err && pebs_err) {
 583                for_each_possible_cpu(cpu)
 584                        release_ds_buffer(cpu);
 585        } else {
 586                if (x86_pmu.bts && !bts_err)
 587                        x86_pmu.bts_active = 1;
 588
 589                if (x86_pmu.pebs && !pebs_err)
 590                        x86_pmu.pebs_active = 1;
 591
 592                for_each_possible_cpu(cpu) {
 593                        /*
 594                         * Ignores wrmsr_on_cpu() errors for offline CPUs they
 595                         * will get this call through intel_pmu_cpu_starting().
 596                         */
 597                        init_debug_store_on_cpu(cpu);
 598                }
 599        }
 600}
 601
 602/*
 603 * BTS
 604 */
 605
 606struct event_constraint bts_constraint =
 607        EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
 608
 609void intel_pmu_enable_bts(u64 config)
 610{
 611        unsigned long debugctlmsr;
 612
 613        debugctlmsr = get_debugctlmsr();
 614
 615        debugctlmsr |= DEBUGCTLMSR_TR;
 616        debugctlmsr |= DEBUGCTLMSR_BTS;
 617        if (config & ARCH_PERFMON_EVENTSEL_INT)
 618                debugctlmsr |= DEBUGCTLMSR_BTINT;
 619
 620        if (!(config & ARCH_PERFMON_EVENTSEL_OS))
 621                debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
 622
 623        if (!(config & ARCH_PERFMON_EVENTSEL_USR))
 624                debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
 625
 626        update_debugctlmsr(debugctlmsr);
 627}
 628
 629void intel_pmu_disable_bts(void)
 630{
 631        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 632        unsigned long debugctlmsr;
 633
 634        if (!cpuc->ds)
 635                return;
 636
 637        debugctlmsr = get_debugctlmsr();
 638
 639        debugctlmsr &=
 640                ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
 641                  DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
 642
 643        update_debugctlmsr(debugctlmsr);
 644}
 645
 646int intel_pmu_drain_bts_buffer(void)
 647{
 648        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 649        struct debug_store *ds = cpuc->ds;
 650        struct bts_record {
 651                u64     from;
 652                u64     to;
 653                u64     flags;
 654        };
 655        struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
 656        struct bts_record *at, *base, *top;
 657        struct perf_output_handle handle;
 658        struct perf_event_header header;
 659        struct perf_sample_data data;
 660        unsigned long skip = 0;
 661        struct pt_regs regs;
 662
 663        if (!event)
 664                return 0;
 665
 666        if (!x86_pmu.bts_active)
 667                return 0;
 668
 669        base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
 670        top  = (struct bts_record *)(unsigned long)ds->bts_index;
 671
 672        if (top <= base)
 673                return 0;
 674
 675        memset(&regs, 0, sizeof(regs));
 676
 677        ds->bts_index = ds->bts_buffer_base;
 678
 679        perf_sample_data_init(&data, 0, event->hw.last_period);
 680
 681        /*
 682         * BTS leaks kernel addresses in branches across the cpl boundary,
 683         * such as traps or system calls, so unless the user is asking for
 684         * kernel tracing (and right now it's not possible), we'd need to
 685         * filter them out. But first we need to count how many of those we
 686         * have in the current batch. This is an extra O(n) pass, however,
 687         * it's much faster than the other one especially considering that
 688         * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
 689         * alloc_bts_buffer()).
 690         */
 691        for (at = base; at < top; at++) {
 692                /*
 693                 * Note that right now *this* BTS code only works if
 694                 * attr::exclude_kernel is set, but let's keep this extra
 695                 * check here in case that changes.
 696                 */
 697                if (event->attr.exclude_kernel &&
 698                    (kernel_ip(at->from) || kernel_ip(at->to)))
 699                        skip++;
 700        }
 701
 702        /*
 703         * Prepare a generic sample, i.e. fill in the invariant fields.
 704         * We will overwrite the from and to address before we output
 705         * the sample.
 706         */
 707        rcu_read_lock();
 708        perf_prepare_sample(&header, &data, event, &regs);
 709
 710        if (perf_output_begin(&handle, &data, event,
 711                              header.size * (top - base - skip)))
 712                goto unlock;
 713
 714        for (at = base; at < top; at++) {
 715                /* Filter out any records that contain kernel addresses. */
 716                if (event->attr.exclude_kernel &&
 717                    (kernel_ip(at->from) || kernel_ip(at->to)))
 718                        continue;
 719
 720                data.ip         = at->from;
 721                data.addr       = at->to;
 722
 723                perf_output_sample(&handle, &header, &data, event);
 724        }
 725
 726        perf_output_end(&handle);
 727
 728        /* There's new data available. */
 729        event->hw.interrupts++;
 730        event->pending_kill = POLL_IN;
 731unlock:
 732        rcu_read_unlock();
 733        return 1;
 734}
 735
 736static inline void intel_pmu_drain_pebs_buffer(void)
 737{
 738        struct perf_sample_data data;
 739
 740        x86_pmu.drain_pebs(NULL, &data);
 741}
 742
 743/*
 744 * PEBS
 745 */
 746struct event_constraint intel_core2_pebs_event_constraints[] = {
 747        INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
 748        INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
 749        INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
 750        INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
 751        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
 752        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 753        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
 754        EVENT_CONSTRAINT_END
 755};
 756
 757struct event_constraint intel_atom_pebs_event_constraints[] = {
 758        INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
 759        INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
 760        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
 761        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 762        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
 763        /* Allow all events as PEBS with no flags */
 764        INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
 765        EVENT_CONSTRAINT_END
 766};
 767
 768struct event_constraint intel_slm_pebs_event_constraints[] = {
 769        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 770        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1),
 771        /* Allow all events as PEBS with no flags */
 772        INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
 773        EVENT_CONSTRAINT_END
 774};
 775
 776struct event_constraint intel_glm_pebs_event_constraints[] = {
 777        /* Allow all events as PEBS with no flags */
 778        INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
 779        EVENT_CONSTRAINT_END
 780};
 781
 782struct event_constraint intel_grt_pebs_event_constraints[] = {
 783        /* Allow all events as PEBS with no flags */
 784        INTEL_PLD_CONSTRAINT(0x5d0, 0xf),
 785        INTEL_PSD_CONSTRAINT(0x6d0, 0xf),
 786        EVENT_CONSTRAINT_END
 787};
 788
 789struct event_constraint intel_nehalem_pebs_event_constraints[] = {
 790        INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
 791        INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
 792        INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
 793        INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
 794        INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
 795        INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
 796        INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
 797        INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
 798        INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
 799        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
 800        INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
 801        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 802        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
 803        EVENT_CONSTRAINT_END
 804};
 805
 806struct event_constraint intel_westmere_pebs_event_constraints[] = {
 807        INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
 808        INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
 809        INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
 810        INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
 811        INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
 812        INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
 813        INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
 814        INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
 815        INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
 816        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
 817        INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
 818        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 819        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
 820        EVENT_CONSTRAINT_END
 821};
 822
 823struct event_constraint intel_snb_pebs_event_constraints[] = {
 824        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
 825        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
 826        INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
 827        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 828        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
 829        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
 830        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
 831        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
 832        INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
 833        /* Allow all events as PEBS with no flags */
 834        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 835        EVENT_CONSTRAINT_END
 836};
 837
 838struct event_constraint intel_ivb_pebs_event_constraints[] = {
 839        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
 840        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
 841        INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
 842        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 843        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
 844        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
 845        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
 846        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
 847        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
 848        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
 849        INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
 850        /* Allow all events as PEBS with no flags */
 851        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 852        EVENT_CONSTRAINT_END
 853};
 854
 855struct event_constraint intel_hsw_pebs_event_constraints[] = {
 856        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
 857        INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
 858        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 859        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
 860        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
 861        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
 862        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
 863        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
 864        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
 865        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
 866        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
 867        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
 868        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
 869        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
 870        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
 871        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
 872        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
 873        /* Allow all events as PEBS with no flags */
 874        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 875        EVENT_CONSTRAINT_END
 876};
 877
 878struct event_constraint intel_bdw_pebs_event_constraints[] = {
 879        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
 880        INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
 881        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 882        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
 883        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
 884        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
 885        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
 886        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
 887        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
 888        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
 889        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
 890        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
 891        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
 892        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
 893        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
 894        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
 895        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
 896        /* Allow all events as PEBS with no flags */
 897        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 898        EVENT_CONSTRAINT_END
 899};
 900
 901
 902struct event_constraint intel_skl_pebs_event_constraints[] = {
 903        INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),      /* INST_RETIRED.PREC_DIST */
 904        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
 905        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
 906        /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
 907        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
 908        INTEL_PLD_CONSTRAINT(0x1cd, 0xf),                     /* MEM_TRANS_RETIRED.* */
 909        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
 910        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
 911        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
 912        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
 913        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
 914        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
 915        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
 916        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
 917        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_RETIRED.* */
 918        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_L3_HIT_RETIRED.* */
 919        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_L3_MISS_RETIRED.* */
 920        /* Allow all events as PEBS with no flags */
 921        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 922        EVENT_CONSTRAINT_END
 923};
 924
 925struct event_constraint intel_icl_pebs_event_constraints[] = {
 926        INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),   /* INST_RETIRED.PREC_DIST */
 927        INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),  /* SLOTS */
 928
 929        INTEL_PLD_CONSTRAINT(0x1cd, 0xff),                      /* MEM_TRANS_RETIRED.LOAD_LATENCY */
 930        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),    /* MEM_INST_RETIRED.LOAD */
 931        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),    /* MEM_INST_RETIRED.STORE */
 932
 933        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
 934
 935        INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),                /* MEM_INST_RETIRED.* */
 936
 937        /*
 938         * Everything else is handled by PMU_FL_PEBS_ALL, because we
 939         * need the full constraints from the main table.
 940         */
 941
 942        EVENT_CONSTRAINT_END
 943};
 944
 945struct event_constraint intel_spr_pebs_event_constraints[] = {
 946        INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),
 947        INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
 948
 949        INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
 950        INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
 951        INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
 952        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),
 953        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),
 954
 955        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
 956
 957        INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
 958
 959        /*
 960         * Everything else is handled by PMU_FL_PEBS_ALL, because we
 961         * need the full constraints from the main table.
 962         */
 963
 964        EVENT_CONSTRAINT_END
 965};
 966
 967struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 968{
 969        struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints);
 970        struct event_constraint *c;
 971
 972        if (!event->attr.precise_ip)
 973                return NULL;
 974
 975        if (pebs_constraints) {
 976                for_each_event_constraint(c, pebs_constraints) {
 977                        if (constraint_match(c, event->hw.config)) {
 978                                event->hw.flags |= c->flags;
 979                                return c;
 980                        }
 981                }
 982        }
 983
 984        /*
 985         * Extended PEBS support
 986         * Makes the PEBS code search the normal constraints.
 987         */
 988        if (x86_pmu.flags & PMU_FL_PEBS_ALL)
 989                return NULL;
 990
 991        return &emptyconstraint;
 992}
 993
 994/*
 995 * We need the sched_task callback even for per-cpu events when we use
 996 * the large interrupt threshold, such that we can provide PID and TID
 997 * to PEBS samples.
 998 */
 999static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
1000{
1001        if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
1002                return false;
1003
1004        return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
1005}
1006
1007void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
1008{
1009        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1010
1011        if (!sched_in && pebs_needs_sched_cb(cpuc))
1012                intel_pmu_drain_pebs_buffer();
1013}
1014
1015static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
1016{
1017        struct debug_store *ds = cpuc->ds;
1018        int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
1019        int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
1020        u64 threshold;
1021        int reserved;
1022
1023        if (cpuc->n_pebs_via_pt)
1024                return;
1025
1026        if (x86_pmu.flags & PMU_FL_PEBS_ALL)
1027                reserved = max_pebs_events + num_counters_fixed;
1028        else
1029                reserved = max_pebs_events;
1030
1031        if (cpuc->n_pebs == cpuc->n_large_pebs) {
1032                threshold = ds->pebs_absolute_maximum -
1033                        reserved * cpuc->pebs_record_size;
1034        } else {
1035                threshold = ds->pebs_buffer_base + cpuc->pebs_record_size;
1036        }
1037
1038        ds->pebs_interrupt_threshold = threshold;
1039}
1040
1041static void adaptive_pebs_record_size_update(void)
1042{
1043        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1044        u64 pebs_data_cfg = cpuc->pebs_data_cfg;
1045        int sz = sizeof(struct pebs_basic);
1046
1047        if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
1048                sz += sizeof(struct pebs_meminfo);
1049        if (pebs_data_cfg & PEBS_DATACFG_GP)
1050                sz += sizeof(struct pebs_gprs);
1051        if (pebs_data_cfg & PEBS_DATACFG_XMMS)
1052                sz += sizeof(struct pebs_xmm);
1053        if (pebs_data_cfg & PEBS_DATACFG_LBRS)
1054                sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1055
1056        cpuc->pebs_record_size = sz;
1057}
1058
1059#define PERF_PEBS_MEMINFO_TYPE  (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC |   \
1060                                PERF_SAMPLE_PHYS_ADDR |                      \
1061                                PERF_SAMPLE_WEIGHT_TYPE |                    \
1062                                PERF_SAMPLE_TRANSACTION |                    \
1063                                PERF_SAMPLE_DATA_PAGE_SIZE)
1064
1065static u64 pebs_update_adaptive_cfg(struct perf_event *event)
1066{
1067        struct perf_event_attr *attr = &event->attr;
1068        u64 sample_type = attr->sample_type;
1069        u64 pebs_data_cfg = 0;
1070        bool gprs, tsx_weight;
1071
1072        if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
1073            attr->precise_ip > 1)
1074                return pebs_data_cfg;
1075
1076        if (sample_type & PERF_PEBS_MEMINFO_TYPE)
1077                pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
1078
1079        /*
1080         * We need GPRs when:
1081         * + user requested them
1082         * + precise_ip < 2 for the non event IP
1083         * + For RTM TSX weight we need GPRs for the abort code.
1084         */
1085        gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
1086               (attr->sample_regs_intr & PEBS_GP_REGS);
1087
1088        tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
1089                     ((attr->config & INTEL_ARCH_EVENT_MASK) ==
1090                      x86_pmu.rtm_abort_event);
1091
1092        if (gprs || (attr->precise_ip < 2) || tsx_weight)
1093                pebs_data_cfg |= PEBS_DATACFG_GP;
1094
1095        if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
1096            (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK))
1097                pebs_data_cfg |= PEBS_DATACFG_XMMS;
1098
1099        if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
1100                /*
1101                 * For now always log all LBRs. Could configure this
1102                 * later.
1103                 */
1104                pebs_data_cfg |= PEBS_DATACFG_LBRS |
1105                        ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
1106        }
1107
1108        return pebs_data_cfg;
1109}
1110
1111static void
1112pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
1113                  struct perf_event *event, bool add)
1114{
1115        struct pmu *pmu = event->ctx->pmu;
1116        /*
1117         * Make sure we get updated with the first PEBS
1118         * event. It will trigger also during removal, but
1119         * that does not hurt:
1120         */
1121        bool update = cpuc->n_pebs == 1;
1122
1123        if (needed_cb != pebs_needs_sched_cb(cpuc)) {
1124                if (!needed_cb)
1125                        perf_sched_cb_inc(pmu);
1126                else
1127                        perf_sched_cb_dec(pmu);
1128
1129                update = true;
1130        }
1131
1132        /*
1133         * The PEBS record doesn't shrink on pmu::del(). Doing so would require
1134         * iterating all remaining PEBS events to reconstruct the config.
1135         */
1136        if (x86_pmu.intel_cap.pebs_baseline && add) {
1137                u64 pebs_data_cfg;
1138
1139                /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
1140                if (cpuc->n_pebs == 1) {
1141                        cpuc->pebs_data_cfg = 0;
1142                        cpuc->pebs_record_size = sizeof(struct pebs_basic);
1143                }
1144
1145                pebs_data_cfg = pebs_update_adaptive_cfg(event);
1146
1147                /* Update pebs_record_size if new event requires more data. */
1148                if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
1149                        cpuc->pebs_data_cfg |= pebs_data_cfg;
1150                        adaptive_pebs_record_size_update();
1151                        update = true;
1152                }
1153        }
1154
1155        if (update)
1156                pebs_update_threshold(cpuc);
1157}
1158
1159void intel_pmu_pebs_add(struct perf_event *event)
1160{
1161        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1162        struct hw_perf_event *hwc = &event->hw;
1163        bool needed_cb = pebs_needs_sched_cb(cpuc);
1164
1165        cpuc->n_pebs++;
1166        if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1167                cpuc->n_large_pebs++;
1168        if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1169                cpuc->n_pebs_via_pt++;
1170
1171        pebs_update_state(needed_cb, cpuc, event, true);
1172}
1173
1174static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
1175{
1176        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1177
1178        if (!is_pebs_pt(event))
1179                return;
1180
1181        if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
1182                cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
1183}
1184
1185static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
1186{
1187        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1188        struct hw_perf_event *hwc = &event->hw;
1189        struct debug_store *ds = cpuc->ds;
1190        u64 value = ds->pebs_event_reset[hwc->idx];
1191        u32 base = MSR_RELOAD_PMC0;
1192        unsigned int idx = hwc->idx;
1193
1194        if (!is_pebs_pt(event))
1195                return;
1196
1197        if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
1198                cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;
1199
1200        cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
1201
1202        if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
1203                base = MSR_RELOAD_FIXED_CTR0;
1204                idx = hwc->idx - INTEL_PMC_IDX_FIXED;
1205                value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
1206        }
1207        wrmsrl(base + idx, value);
1208}
1209
1210void intel_pmu_pebs_enable(struct perf_event *event)
1211{
1212        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1213        struct hw_perf_event *hwc = &event->hw;
1214        struct debug_store *ds = cpuc->ds;
1215        unsigned int idx = hwc->idx;
1216
1217        hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
1218
1219        cpuc->pebs_enabled |= 1ULL << hwc->idx;
1220
1221        if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
1222                cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
1223        else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1224                cpuc->pebs_enabled |= 1ULL << 63;
1225
1226        if (x86_pmu.intel_cap.pebs_baseline) {
1227                hwc->config |= ICL_EVENTSEL_ADAPTIVE;
1228                if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
1229                        wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
1230                        cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
1231                }
1232        }
1233
1234        if (idx >= INTEL_PMC_IDX_FIXED)
1235                idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
1236
1237        /*
1238         * Use auto-reload if possible to save a MSR write in the PMI.
1239         * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
1240         */
1241        if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
1242                ds->pebs_event_reset[idx] =
1243                        (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
1244        } else {
1245                ds->pebs_event_reset[idx] = 0;
1246        }
1247
1248        intel_pmu_pebs_via_pt_enable(event);
1249}
1250
1251void intel_pmu_pebs_del(struct perf_event *event)
1252{
1253        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1254        struct hw_perf_event *hwc = &event->hw;
1255        bool needed_cb = pebs_needs_sched_cb(cpuc);
1256
1257        cpuc->n_pebs--;
1258        if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1259                cpuc->n_large_pebs--;
1260        if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1261                cpuc->n_pebs_via_pt--;
1262
1263        pebs_update_state(needed_cb, cpuc, event, false);
1264}
1265
1266void intel_pmu_pebs_disable(struct perf_event *event)
1267{
1268        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1269        struct hw_perf_event *hwc = &event->hw;
1270
1271        if (cpuc->n_pebs == cpuc->n_large_pebs &&
1272            cpuc->n_pebs != cpuc->n_pebs_via_pt)
1273                intel_pmu_drain_pebs_buffer();
1274
1275        cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
1276
1277        if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
1278            (x86_pmu.version < 5))
1279                cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
1280        else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1281                cpuc->pebs_enabled &= ~(1ULL << 63);
1282
1283        intel_pmu_pebs_via_pt_disable(event);
1284
1285        if (cpuc->enabled)
1286                wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1287
1288        hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
1289}
1290
1291void intel_pmu_pebs_enable_all(void)
1292{
1293        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1294
1295        if (cpuc->pebs_enabled)
1296                wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1297}
1298
1299void intel_pmu_pebs_disable_all(void)
1300{
1301        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1302
1303        if (cpuc->pebs_enabled)
1304                wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
1305}
1306
1307static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
1308{
1309        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1310        unsigned long from = cpuc->lbr_entries[0].from;
1311        unsigned long old_to, to = cpuc->lbr_entries[0].to;
1312        unsigned long ip = regs->ip;
1313        int is_64bit = 0;
1314        void *kaddr;
1315        int size;
1316
1317        /*
1318         * We don't need to fixup if the PEBS assist is fault like
1319         */
1320        if (!x86_pmu.intel_cap.pebs_trap)
1321                return 1;
1322
1323        /*
1324         * No LBR entry, no basic block, no rewinding
1325         */
1326        if (!cpuc->lbr_stack.nr || !from || !to)
1327                return 0;
1328
1329        /*
1330         * Basic blocks should never cross user/kernel boundaries
1331         */
1332        if (kernel_ip(ip) != kernel_ip(to))
1333                return 0;
1334
1335        /*
1336         * unsigned math, either ip is before the start (impossible) or
1337         * the basic block is larger than 1 page (sanity)
1338         */
1339        if ((ip - to) > PEBS_FIXUP_SIZE)
1340                return 0;
1341
1342        /*
1343         * We sampled a branch insn, rewind using the LBR stack
1344         */
1345        if (ip == to) {
1346                set_linear_ip(regs, from);
1347                return 1;
1348        }
1349
1350        size = ip - to;
1351        if (!kernel_ip(ip)) {
1352                int bytes;
1353                u8 *buf = this_cpu_read(insn_buffer);
1354
1355                /* 'size' must fit our buffer, see above */
1356                bytes = copy_from_user_nmi(buf, (void __user *)to, size);
1357                if (bytes != 0)
1358                        return 0;
1359
1360                kaddr = buf;
1361        } else {
1362                kaddr = (void *)to;
1363        }
1364
1365        do {
1366                struct insn insn;
1367
1368                old_to = to;
1369
1370#ifdef CONFIG_X86_64
1371                is_64bit = kernel_ip(to) || any_64bit_mode(regs);
1372#endif
1373                insn_init(&insn, kaddr, size, is_64bit);
1374
1375                /*
1376                 * Make sure there was not a problem decoding the instruction.
1377                 * This is doubly important because we have an infinite loop if
1378                 * insn.length=0.
1379                 */
1380                if (insn_get_length(&insn))
1381                        break;
1382
1383                to += insn.length;
1384                kaddr += insn.length;
1385                size -= insn.length;
1386        } while (to < ip);
1387
1388        if (to == ip) {
1389                set_linear_ip(regs, old_to);
1390                return 1;
1391        }
1392
1393        /*
1394         * Even though we decoded the basic block, the instruction stream
1395         * never matched the given IP, either the TO or the IP got corrupted.
1396         */
1397        return 0;
1398}
1399
1400static inline u64 intel_get_tsx_weight(u64 tsx_tuning)
1401{
1402        if (tsx_tuning) {
1403                union hsw_tsx_tuning tsx = { .value = tsx_tuning };
1404                return tsx.cycles_last_block;
1405        }
1406        return 0;
1407}
1408
1409static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
1410{
1411        u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
1412
1413        /* For RTM XABORTs also log the abort code from AX */
1414        if ((txn & PERF_TXN_TRANSACTION) && (ax & 1))
1415                txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
1416        return txn;
1417}
1418
1419static inline u64 get_pebs_status(void *n)
1420{
1421        if (x86_pmu.intel_cap.pebs_format < 4)
1422                return ((struct pebs_record_nhm *)n)->status;
1423        return ((struct pebs_basic *)n)->applicable_counters;
1424}
1425
1426#define PERF_X86_EVENT_PEBS_HSW_PREC \
1427                (PERF_X86_EVENT_PEBS_ST_HSW | \
1428                 PERF_X86_EVENT_PEBS_LD_HSW | \
1429                 PERF_X86_EVENT_PEBS_NA_HSW)
1430
1431static u64 get_data_src(struct perf_event *event, u64 aux)
1432{
1433        u64 val = PERF_MEM_NA;
1434        int fl = event->hw.flags;
1435        bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
1436
1437        if (fl & PERF_X86_EVENT_PEBS_LDLAT)
1438                val = load_latency_data(aux);
1439        else if (fl & PERF_X86_EVENT_PEBS_STLAT)
1440                val = store_latency_data(aux);
1441        else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
1442                val = precise_datala_hsw(event, aux);
1443        else if (fst)
1444                val = precise_store_data(aux);
1445        return val;
1446}
1447
1448#define PERF_SAMPLE_ADDR_TYPE   (PERF_SAMPLE_ADDR |             \
1449                                 PERF_SAMPLE_PHYS_ADDR |        \
1450                                 PERF_SAMPLE_DATA_PAGE_SIZE)
1451
1452static void setup_pebs_fixed_sample_data(struct perf_event *event,
1453                                   struct pt_regs *iregs, void *__pebs,
1454                                   struct perf_sample_data *data,
1455                                   struct pt_regs *regs)
1456{
1457        /*
1458         * We cast to the biggest pebs_record but are careful not to
1459         * unconditionally access the 'extra' entries.
1460         */
1461        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1462        struct pebs_record_skl *pebs = __pebs;
1463        u64 sample_type;
1464        int fll;
1465
1466        if (pebs == NULL)
1467                return;
1468
1469        sample_type = event->attr.sample_type;
1470        fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
1471
1472        perf_sample_data_init(data, 0, event->hw.last_period);
1473
1474        data->period = event->hw.last_period;
1475
1476        /*
1477         * Use latency for weight (only avail with PEBS-LL)
1478         */
1479        if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
1480                data->weight.full = pebs->lat;
1481
1482        /*
1483         * data.data_src encodes the data source
1484         */
1485        if (sample_type & PERF_SAMPLE_DATA_SRC)
1486                data->data_src.val = get_data_src(event, pebs->dse);
1487
1488        /*
1489         * We must however always use iregs for the unwinder to stay sane; the
1490         * record BP,SP,IP can point into thin air when the record is from a
1491         * previous PMI context or an (I)RET happened between the record and
1492         * PMI.
1493         */
1494        if (sample_type & PERF_SAMPLE_CALLCHAIN)
1495                data->callchain = perf_callchain(event, iregs);
1496
1497        /*
1498         * We use the interrupt regs as a base because the PEBS record does not
1499         * contain a full regs set, specifically it seems to lack segment
1500         * descriptors, which get used by things like user_mode().
1501         *
1502         * In the simple case fix up only the IP for PERF_SAMPLE_IP.
1503         */
1504        *regs = *iregs;
1505
1506        /*
1507         * Initialize regs_>flags from PEBS,
1508         * Clear exact bit (which uses x86 EFLAGS Reserved bit 3),
1509         * i.e., do not rely on it being zero:
1510         */
1511        regs->flags = pebs->flags & ~PERF_EFLAGS_EXACT;
1512
1513        if (sample_type & PERF_SAMPLE_REGS_INTR) {
1514                regs->ax = pebs->ax;
1515                regs->bx = pebs->bx;
1516                regs->cx = pebs->cx;
1517                regs->dx = pebs->dx;
1518                regs->si = pebs->si;
1519                regs->di = pebs->di;
1520
1521                regs->bp = pebs->bp;
1522                regs->sp = pebs->sp;
1523
1524#ifndef CONFIG_X86_32
1525                regs->r8 = pebs->r8;
1526                regs->r9 = pebs->r9;
1527                regs->r10 = pebs->r10;
1528                regs->r11 = pebs->r11;
1529                regs->r12 = pebs->r12;
1530                regs->r13 = pebs->r13;
1531                regs->r14 = pebs->r14;
1532                regs->r15 = pebs->r15;
1533#endif
1534        }
1535
1536        if (event->attr.precise_ip > 1) {
1537                /*
1538                 * Haswell and later processors have an 'eventing IP'
1539                 * (real IP) which fixes the off-by-1 skid in hardware.
1540                 * Use it when precise_ip >= 2 :
1541                 */
1542                if (x86_pmu.intel_cap.pebs_format >= 2) {
1543                        set_linear_ip(regs, pebs->real_ip);
1544                        regs->flags |= PERF_EFLAGS_EXACT;
1545                } else {
1546                        /* Otherwise, use PEBS off-by-1 IP: */
1547                        set_linear_ip(regs, pebs->ip);
1548
1549                        /*
1550                         * With precise_ip >= 2, try to fix up the off-by-1 IP
1551                         * using the LBR. If successful, the fixup function
1552                         * corrects regs->ip and calls set_linear_ip() on regs:
1553                         */
1554                        if (intel_pmu_pebs_fixup_ip(regs))
1555                                regs->flags |= PERF_EFLAGS_EXACT;
1556                }
1557        } else {
1558                /*
1559                 * When precise_ip == 1, return the PEBS off-by-1 IP,
1560                 * no fixup attempted:
1561                 */
1562                set_linear_ip(regs, pebs->ip);
1563        }
1564
1565
1566        if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
1567            x86_pmu.intel_cap.pebs_format >= 1)
1568                data->addr = pebs->dla;
1569
1570        if (x86_pmu.intel_cap.pebs_format >= 2) {
1571                /* Only set the TSX weight when no memory weight. */
1572                if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
1573                        data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
1574
1575                if (sample_type & PERF_SAMPLE_TRANSACTION)
1576                        data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
1577                                                              pebs->ax);
1578        }
1579
1580        /*
1581         * v3 supplies an accurate time stamp, so we use that
1582         * for the time stamp.
1583         *
1584         * We can only do this for the default trace clock.
1585         */
1586        if (x86_pmu.intel_cap.pebs_format >= 3 &&
1587                event->attr.use_clockid == 0)
1588                data->time = native_sched_clock_from_tsc(pebs->tsc);
1589
1590        if (has_branch_stack(event))
1591                data->br_stack = &cpuc->lbr_stack;
1592}
1593
1594static void adaptive_pebs_save_regs(struct pt_regs *regs,
1595                                    struct pebs_gprs *gprs)
1596{
1597        regs->ax = gprs->ax;
1598        regs->bx = gprs->bx;
1599        regs->cx = gprs->cx;
1600        regs->dx = gprs->dx;
1601        regs->si = gprs->si;
1602        regs->di = gprs->di;
1603        regs->bp = gprs->bp;
1604        regs->sp = gprs->sp;
1605#ifndef CONFIG_X86_32
1606        regs->r8 = gprs->r8;
1607        regs->r9 = gprs->r9;
1608        regs->r10 = gprs->r10;
1609        regs->r11 = gprs->r11;
1610        regs->r12 = gprs->r12;
1611        regs->r13 = gprs->r13;
1612        regs->r14 = gprs->r14;
1613        regs->r15 = gprs->r15;
1614#endif
1615}
1616
1617#define PEBS_LATENCY_MASK                       0xffff
1618#define PEBS_CACHE_LATENCY_OFFSET               32
1619
1620/*
1621 * With adaptive PEBS the layout depends on what fields are configured.
1622 */
1623
1624static void setup_pebs_adaptive_sample_data(struct perf_event *event,
1625                                            struct pt_regs *iregs, void *__pebs,
1626                                            struct perf_sample_data *data,
1627                                            struct pt_regs *regs)
1628{
1629        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1630        struct pebs_basic *basic = __pebs;
1631        void *next_record = basic + 1;
1632        u64 sample_type;
1633        u64 format_size;
1634        struct pebs_meminfo *meminfo = NULL;
1635        struct pebs_gprs *gprs = NULL;
1636        struct x86_perf_regs *perf_regs;
1637
1638        if (basic == NULL)
1639                return;
1640
1641        perf_regs = container_of(regs, struct x86_perf_regs, regs);
1642        perf_regs->xmm_regs = NULL;
1643
1644        sample_type = event->attr.sample_type;
1645        format_size = basic->format_size;
1646        perf_sample_data_init(data, 0, event->hw.last_period);
1647        data->period = event->hw.last_period;
1648
1649        if (event->attr.use_clockid == 0)
1650                data->time = native_sched_clock_from_tsc(basic->tsc);
1651
1652        /*
1653         * We must however always use iregs for the unwinder to stay sane; the
1654         * record BP,SP,IP can point into thin air when the record is from a
1655         * previous PMI context or an (I)RET happened between the record and
1656         * PMI.
1657         */
1658        if (sample_type & PERF_SAMPLE_CALLCHAIN)
1659                data->callchain = perf_callchain(event, iregs);
1660
1661        *regs = *iregs;
1662        /* The ip in basic is EventingIP */
1663        set_linear_ip(regs, basic->ip);
1664        regs->flags = PERF_EFLAGS_EXACT;
1665
1666        /*
1667         * The record for MEMINFO is in front of GP
1668         * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
1669         * Save the pointer here but process later.
1670         */
1671        if (format_size & PEBS_DATACFG_MEMINFO) {
1672                meminfo = next_record;
1673                next_record = meminfo + 1;
1674        }
1675
1676        if (format_size & PEBS_DATACFG_GP) {
1677                gprs = next_record;
1678                next_record = gprs + 1;
1679
1680                if (event->attr.precise_ip < 2) {
1681                        set_linear_ip(regs, gprs->ip);
1682                        regs->flags &= ~PERF_EFLAGS_EXACT;
1683                }
1684
1685                if (sample_type & PERF_SAMPLE_REGS_INTR)
1686                        adaptive_pebs_save_regs(regs, gprs);
1687        }
1688
1689        if (format_size & PEBS_DATACFG_MEMINFO) {
1690                if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
1691                        u64 weight = meminfo->latency;
1692
1693                        if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) {
1694                                data->weight.var2_w = weight & PEBS_LATENCY_MASK;
1695                                weight >>= PEBS_CACHE_LATENCY_OFFSET;
1696                        }
1697
1698                        /*
1699                         * Although meminfo::latency is defined as a u64,
1700                         * only the lower 32 bits include the valid data
1701                         * in practice on Ice Lake and earlier platforms.
1702                         */
1703                        if (sample_type & PERF_SAMPLE_WEIGHT) {
1704                                data->weight.full = weight ?:
1705                                        intel_get_tsx_weight(meminfo->tsx_tuning);
1706                        } else {
1707                                data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
1708                                        intel_get_tsx_weight(meminfo->tsx_tuning);
1709                        }
1710                }
1711
1712                if (sample_type & PERF_SAMPLE_DATA_SRC)
1713                        data->data_src.val = get_data_src(event, meminfo->aux);
1714
1715                if (sample_type & PERF_SAMPLE_ADDR_TYPE)
1716                        data->addr = meminfo->address;
1717
1718                if (sample_type & PERF_SAMPLE_TRANSACTION)
1719                        data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
1720                                                          gprs ? gprs->ax : 0);
1721        }
1722
1723        if (format_size & PEBS_DATACFG_XMMS) {
1724                struct pebs_xmm *xmm = next_record;
1725
1726                next_record = xmm + 1;
1727                perf_regs->xmm_regs = xmm->xmm;
1728        }
1729
1730        if (format_size & PEBS_DATACFG_LBRS) {
1731                struct lbr_entry *lbr = next_record;
1732                int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
1733                                        & 0xff) + 1;
1734                next_record = next_record + num_lbr * sizeof(struct lbr_entry);
1735
1736                if (has_branch_stack(event)) {
1737                        intel_pmu_store_pebs_lbrs(lbr);
1738                        data->br_stack = &cpuc->lbr_stack;
1739                }
1740        }
1741
1742        WARN_ONCE(next_record != __pebs + (format_size >> 48),
1743                        "PEBS record size %llu, expected %llu, config %llx\n",
1744                        format_size >> 48,
1745                        (u64)(next_record - __pebs),
1746                        basic->format_size);
1747}
1748
1749static inline void *
1750get_next_pebs_record_by_bit(void *base, void *top, int bit)
1751{
1752        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1753        void *at;
1754        u64 pebs_status;
1755
1756        /*
1757         * fmt0 does not have a status bitfield (does not use
1758         * perf_record_nhm format)
1759         */
1760        if (x86_pmu.intel_cap.pebs_format < 1)
1761                return base;
1762
1763        if (base == NULL)
1764                return NULL;
1765
1766        for (at = base; at < top; at += cpuc->pebs_record_size) {
1767                unsigned long status = get_pebs_status(at);
1768
1769                if (test_bit(bit, (unsigned long *)&status)) {
1770                        /* PEBS v3 has accurate status bits */
1771                        if (x86_pmu.intel_cap.pebs_format >= 3)
1772                                return at;
1773
1774                        if (status == (1 << bit))
1775                                return at;
1776
1777                        /* clear non-PEBS bit and re-check */
1778                        pebs_status = status & cpuc->pebs_enabled;
1779                        pebs_status &= PEBS_COUNTER_MASK;
1780                        if (pebs_status == (1 << bit))
1781                                return at;
1782                }
1783        }
1784        return NULL;
1785}
1786
1787void intel_pmu_auto_reload_read(struct perf_event *event)
1788{
1789        WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
1790
1791        perf_pmu_disable(event->pmu);
1792        intel_pmu_drain_pebs_buffer();
1793        perf_pmu_enable(event->pmu);
1794}
1795
1796/*
1797 * Special variant of intel_pmu_save_and_restart() for auto-reload.
1798 */
1799static int
1800intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
1801{
1802        struct hw_perf_event *hwc = &event->hw;
1803        int shift = 64 - x86_pmu.cntval_bits;
1804        u64 period = hwc->sample_period;
1805        u64 prev_raw_count, new_raw_count;
1806        s64 new, old;
1807
1808        WARN_ON(!period);
1809
1810        /*
1811         * drain_pebs() only happens when the PMU is disabled.
1812         */
1813        WARN_ON(this_cpu_read(cpu_hw_events.enabled));
1814
1815        prev_raw_count = local64_read(&hwc->prev_count);
1816        rdpmcl(hwc->event_base_rdpmc, new_raw_count);
1817        local64_set(&hwc->prev_count, new_raw_count);
1818
1819        /*
1820         * Since the counter increments a negative counter value and
1821         * overflows on the sign switch, giving the interval:
1822         *
1823         *   [-period, 0]
1824         *
1825         * the difference between two consecutive reads is:
1826         *
1827         *   A) value2 - value1;
1828         *      when no overflows have happened in between,
1829         *
1830         *   B) (0 - value1) + (value2 - (-period));
1831         *      when one overflow happened in between,
1832         *
1833         *   C) (0 - value1) + (n - 1) * (period) + (value2 - (-period));
1834         *      when @n overflows happened in between.
1835         *
1836         * Here A) is the obvious difference, B) is the extension to the
1837         * discrete interval, where the first term is to the top of the
1838         * interval and the second term is from the bottom of the next
1839         * interval and C) the extension to multiple intervals, where the
1840         * middle term is the whole intervals covered.
1841         *
1842         * An equivalent of C, by reduction, is:
1843         *
1844         *   value2 - value1 + n * period
1845         */
1846        new = ((s64)(new_raw_count << shift) >> shift);
1847        old = ((s64)(prev_raw_count << shift) >> shift);
1848        local64_add(new - old + count * period, &event->count);
1849
1850        local64_set(&hwc->period_left, -new);
1851
1852        perf_event_update_userpage(event);
1853
1854        return 0;
1855}
1856
1857static __always_inline void
1858__intel_pmu_pebs_event(struct perf_event *event,
1859                       struct pt_regs *iregs,
1860                       struct perf_sample_data *data,
1861                       void *base, void *top,
1862                       int bit, int count,
1863                       void (*setup_sample)(struct perf_event *,
1864                                            struct pt_regs *,
1865                                            void *,
1866                                            struct perf_sample_data *,
1867                                            struct pt_regs *))
1868{
1869        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1870        struct hw_perf_event *hwc = &event->hw;
1871        struct x86_perf_regs perf_regs;
1872        struct pt_regs *regs = &perf_regs.regs;
1873        void *at = get_next_pebs_record_by_bit(base, top, bit);
1874        static struct pt_regs dummy_iregs;
1875
1876        if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
1877                /*
1878                 * Now, auto-reload is only enabled in fixed period mode.
1879                 * The reload value is always hwc->sample_period.
1880                 * May need to change it, if auto-reload is enabled in
1881                 * freq mode later.
1882                 */
1883                intel_pmu_save_and_restart_reload(event, count);
1884        } else if (!intel_pmu_save_and_restart(event))
1885                return;
1886
1887        if (!iregs)
1888                iregs = &dummy_iregs;
1889
1890        while (count > 1) {
1891                setup_sample(event, iregs, at, data, regs);
1892                perf_event_output(event, data, regs);
1893                at += cpuc->pebs_record_size;
1894                at = get_next_pebs_record_by_bit(at, top, bit);
1895                count--;
1896        }
1897
1898        setup_sample(event, iregs, at, data, regs);
1899        if (iregs == &dummy_iregs) {
1900                /*
1901                 * The PEBS records may be drained in the non-overflow context,
1902                 * e.g., large PEBS + context switch. Perf should treat the
1903                 * last record the same as other PEBS records, and doesn't
1904                 * invoke the generic overflow handler.
1905                 */
1906                perf_event_output(event, data, regs);
1907        } else {
1908                /*
1909                 * All but the last records are processed.
1910                 * The last one is left to be able to call the overflow handler.
1911                 */
1912                if (perf_event_overflow(event, data, regs))
1913                        x86_pmu_stop(event, 0);
1914        }
1915}
1916
1917static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
1918{
1919        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1920        struct debug_store *ds = cpuc->ds;
1921        struct perf_event *event = cpuc->events[0]; /* PMC0 only */
1922        struct pebs_record_core *at, *top;
1923        int n;
1924
1925        if (!x86_pmu.pebs_active)
1926                return;
1927
1928        at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
1929        top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
1930
1931        /*
1932         * Whatever else happens, drain the thing
1933         */
1934        ds->pebs_index = ds->pebs_buffer_base;
1935
1936        if (!test_bit(0, cpuc->active_mask))
1937                return;
1938
1939        WARN_ON_ONCE(!event);
1940
1941        if (!event->attr.precise_ip)
1942                return;
1943
1944        n = top - at;
1945        if (n <= 0) {
1946                if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
1947                        intel_pmu_save_and_restart_reload(event, 0);
1948                return;
1949        }
1950
1951        __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n,
1952                               setup_pebs_fixed_sample_data);
1953}
1954
1955static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
1956{
1957        struct perf_event *event;
1958        int bit;
1959
1960        /*
1961         * The drain_pebs() could be called twice in a short period
1962         * for auto-reload event in pmu::read(). There are no
1963         * overflows have happened in between.
1964         * It needs to call intel_pmu_save_and_restart_reload() to
1965         * update the event->count for this case.
1966         */
1967        for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) {
1968                event = cpuc->events[bit];
1969                if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
1970                        intel_pmu_save_and_restart_reload(event, 0);
1971        }
1972}
1973
1974static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data)
1975{
1976        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1977        struct debug_store *ds = cpuc->ds;
1978        struct perf_event *event;
1979        void *base, *at, *top;
1980        short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
1981        short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
1982        int bit, i, size;
1983        u64 mask;
1984
1985        if (!x86_pmu.pebs_active)
1986                return;
1987
1988        base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
1989        top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
1990
1991        ds->pebs_index = ds->pebs_buffer_base;
1992
1993        mask = (1ULL << x86_pmu.max_pebs_events) - 1;
1994        size = x86_pmu.max_pebs_events;
1995        if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
1996                mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
1997                size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
1998        }
1999
2000        if (unlikely(base >= top)) {
2001                intel_pmu_pebs_event_update_no_drain(cpuc, size);
2002                return;
2003        }
2004
2005        for (at = base; at < top; at += x86_pmu.pebs_record_size) {
2006                struct pebs_record_nhm *p = at;
2007                u64 pebs_status;
2008
2009                pebs_status = p->status & cpuc->pebs_enabled;
2010                pebs_status &= mask;
2011
2012                /* PEBS v3 has more accurate status bits */
2013                if (x86_pmu.intel_cap.pebs_format >= 3) {
2014                        for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
2015                                counts[bit]++;
2016
2017                        continue;
2018                }
2019
2020                /*
2021                 * On some CPUs the PEBS status can be zero when PEBS is
2022                 * racing with clearing of GLOBAL_STATUS.
2023                 *
2024                 * Normally we would drop that record, but in the
2025                 * case when there is only a single active PEBS event
2026                 * we can assume it's for that event.
2027                 */
2028                if (!pebs_status && cpuc->pebs_enabled &&
2029                        !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
2030                        pebs_status = p->status = cpuc->pebs_enabled;
2031
2032                bit = find_first_bit((unsigned long *)&pebs_status,
2033                                        x86_pmu.max_pebs_events);
2034                if (bit >= x86_pmu.max_pebs_events)
2035                        continue;
2036
2037                /*
2038                 * The PEBS hardware does not deal well with the situation
2039                 * when events happen near to each other and multiple bits
2040                 * are set. But it should happen rarely.
2041                 *
2042                 * If these events include one PEBS and multiple non-PEBS
2043                 * events, it doesn't impact PEBS record. The record will
2044                 * be handled normally. (slow path)
2045                 *
2046                 * If these events include two or more PEBS events, the
2047                 * records for the events can be collapsed into a single
2048                 * one, and it's not possible to reconstruct all events
2049                 * that caused the PEBS record. It's called collision.
2050                 * If collision happened, the record will be dropped.
2051                 */
2052                if (pebs_status != (1ULL << bit)) {
2053                        for_each_set_bit(i, (unsigned long *)&pebs_status, size)
2054                                error[i]++;
2055                        continue;
2056                }
2057
2058                counts[bit]++;
2059        }
2060
2061        for_each_set_bit(bit, (unsigned long *)&mask, size) {
2062                if ((counts[bit] == 0) && (error[bit] == 0))
2063                        continue;
2064
2065                event = cpuc->events[bit];
2066                if (WARN_ON_ONCE(!event))
2067                        continue;
2068
2069                if (WARN_ON_ONCE(!event->attr.precise_ip))
2070                        continue;
2071
2072                /* log dropped samples number */
2073                if (error[bit]) {
2074                        perf_log_lost_samples(event, error[bit]);
2075
2076                        if (iregs && perf_event_account_interrupt(event))
2077                                x86_pmu_stop(event, 0);
2078                }
2079
2080                if (counts[bit]) {
2081                        __intel_pmu_pebs_event(event, iregs, data, base,
2082                                               top, bit, counts[bit],
2083                                               setup_pebs_fixed_sample_data);
2084                }
2085        }
2086}
2087
2088static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
2089{
2090        short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
2091        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2092        int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
2093        int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
2094        struct debug_store *ds = cpuc->ds;
2095        struct perf_event *event;
2096        void *base, *at, *top;
2097        int bit, size;
2098        u64 mask;
2099
2100        if (!x86_pmu.pebs_active)
2101                return;
2102
2103        base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
2104        top = (struct pebs_basic *)(unsigned long)ds->pebs_index;
2105
2106        ds->pebs_index = ds->pebs_buffer_base;
2107
2108        mask = ((1ULL << max_pebs_events) - 1) |
2109               (((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
2110        size = INTEL_PMC_IDX_FIXED + num_counters_fixed;
2111
2112        if (unlikely(base >= top)) {
2113                intel_pmu_pebs_event_update_no_drain(cpuc, size);
2114                return;
2115        }
2116
2117        for (at = base; at < top; at += cpuc->pebs_record_size) {
2118                u64 pebs_status;
2119
2120                pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
2121                pebs_status &= mask;
2122
2123                for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
2124                        counts[bit]++;
2125        }
2126
2127        for_each_set_bit(bit, (unsigned long *)&mask, size) {
2128                if (counts[bit] == 0)
2129                        continue;
2130
2131                event = cpuc->events[bit];
2132                if (WARN_ON_ONCE(!event))
2133                        continue;
2134
2135                if (WARN_ON_ONCE(!event->attr.precise_ip))
2136                        continue;
2137
2138                __intel_pmu_pebs_event(event, iregs, data, base,
2139                                       top, bit, counts[bit],
2140                                       setup_pebs_adaptive_sample_data);
2141        }
2142}
2143
2144/*
2145 * BTS, PEBS probe and setup
2146 */
2147
2148void __init intel_ds_init(void)
2149{
2150        /*
2151         * No support for 32bit formats
2152         */
2153        if (!boot_cpu_has(X86_FEATURE_DTES64))
2154                return;
2155
2156        x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
2157        x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
2158        x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
2159        if (x86_pmu.version <= 4)
2160                x86_pmu.pebs_no_isolation = 1;
2161
2162        if (x86_pmu.pebs) {
2163                char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
2164                char *pebs_qual = "";
2165                int format = x86_pmu.intel_cap.pebs_format;
2166
2167                if (format < 4)
2168                        x86_pmu.intel_cap.pebs_baseline = 0;
2169
2170                switch (format) {
2171                case 0:
2172                        pr_cont("PEBS fmt0%c, ", pebs_type);
2173                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
2174                        /*
2175                         * Using >PAGE_SIZE buffers makes the WRMSR to
2176                         * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
2177                         * mysteriously hang on Core2.
2178                         *
2179                         * As a workaround, we don't do this.
2180                         */
2181                        x86_pmu.pebs_buffer_size = PAGE_SIZE;
2182                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
2183                        break;
2184
2185                case 1:
2186                        pr_cont("PEBS fmt1%c, ", pebs_type);
2187                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
2188                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2189                        break;
2190
2191                case 2:
2192                        pr_cont("PEBS fmt2%c, ", pebs_type);
2193                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
2194                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2195                        break;
2196
2197                case 3:
2198                        pr_cont("PEBS fmt3%c, ", pebs_type);
2199                        x86_pmu.pebs_record_size =
2200                                                sizeof(struct pebs_record_skl);
2201                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2202                        x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
2203                        break;
2204
2205                case 4:
2206                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
2207                        x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
2208                        if (x86_pmu.intel_cap.pebs_baseline) {
2209                                x86_pmu.large_pebs_flags |=
2210                                        PERF_SAMPLE_BRANCH_STACK |
2211                                        PERF_SAMPLE_TIME;
2212                                x86_pmu.flags |= PMU_FL_PEBS_ALL;
2213                                pebs_qual = "-baseline";
2214                                x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
2215                        } else {
2216                                /* Only basic record supported */
2217                                x86_pmu.large_pebs_flags &=
2218                                        ~(PERF_SAMPLE_ADDR |
2219                                          PERF_SAMPLE_TIME |
2220                                          PERF_SAMPLE_DATA_SRC |
2221                                          PERF_SAMPLE_TRANSACTION |
2222                                          PERF_SAMPLE_REGS_USER |
2223                                          PERF_SAMPLE_REGS_INTR);
2224                        }
2225                        pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
2226
2227                        if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) {
2228                                pr_cont("PEBS-via-PT, ");
2229                                x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
2230                        }
2231
2232                        break;
2233
2234                default:
2235                        pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
2236                        x86_pmu.pebs = 0;
2237                }
2238        }
2239}
2240
2241void perf_restore_debug_store(void)
2242{
2243        struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
2244
2245        if (!x86_pmu.bts && !x86_pmu.pebs)
2246                return;
2247
2248        wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
2249}
2250