linux/arch/x86/events/intel/ds.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/bitops.h>
   3#include <linux/types.h>
   4#include <linux/slab.h>
   5
   6#include <asm/cpu_entry_area.h>
   7#include <asm/perf_event.h>
   8#include <asm/tlbflush.h>
   9#include <asm/insn.h>
  10#include <asm/io.h>
  11
  12#include "../perf_event.h"
  13
  14/* Waste a full page so it can be mapped into the cpu_entry_area */
  15DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
  16
  17/* The size of a BTS record in bytes: */
  18#define BTS_RECORD_SIZE         24
  19
  20#define PEBS_FIXUP_SIZE         PAGE_SIZE
  21
  22/*
  23 * pebs_record_32 for p4 and core not supported
  24
  25struct pebs_record_32 {
  26        u32 flags, ip;
  27        u32 ax, bc, cx, dx;
  28        u32 si, di, bp, sp;
  29};
  30
  31 */
  32
  33union intel_x86_pebs_dse {
  34        u64 val;
  35        struct {
  36                unsigned int ld_dse:4;
  37                unsigned int ld_stlb_miss:1;
  38                unsigned int ld_locked:1;
  39                unsigned int ld_data_blk:1;
  40                unsigned int ld_addr_blk:1;
  41                unsigned int ld_reserved:24;
  42        };
  43        struct {
  44                unsigned int st_l1d_hit:1;
  45                unsigned int st_reserved1:3;
  46                unsigned int st_stlb_miss:1;
  47                unsigned int st_locked:1;
  48                unsigned int st_reserved2:26;
  49        };
  50        struct {
  51                unsigned int st_lat_dse:4;
  52                unsigned int st_lat_stlb_miss:1;
  53                unsigned int st_lat_locked:1;
  54                unsigned int ld_reserved3:26;
  55        };
  56};
  57
  58
  59/*
  60 * Map PEBS Load Latency Data Source encodings to generic
  61 * memory data source information
  62 */
  63#define P(a, b) PERF_MEM_S(a, b)
  64#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
  65#define LEVEL(x) P(LVLNUM, x)
  66#define REM P(REMOTE, REMOTE)
  67#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
  68
  69/* Version for Sandy Bridge and later */
  70static u64 pebs_data_source[] = {
  71        P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
  72        OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),  /* 0x01: L1 local */
  73        OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
  74        OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),  /* 0x03: L2 hit */
  75        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, NONE),  /* 0x04: L3 hit */
  76        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, MISS),  /* 0x05: L3 hit, snoop miss */
  77        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HIT),   /* 0x06: L3 hit, snoop hit */
  78        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),  /* 0x07: L3 hit, snoop hitm */
  79        OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
  80        OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
  81        OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, HIT),       /* 0x0a: L3 miss, shared */
  82        OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
  83        OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | SNOOP_NONE_MISS,     /* 0x0c: L3 miss, excl */
  84        OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
  85        OP_LH | P(LVL, IO)  | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
  86        OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
  87};
  88
  89/* Patch up minor differences in the bits */
  90void __init intel_pmu_pebs_data_source_nhm(void)
  91{
  92        pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
  93        pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
  94        pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
  95}
  96
  97void __init intel_pmu_pebs_data_source_skl(bool pmem)
  98{
  99        u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);
 100
 101        pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
 102        pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
 103        pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
 104        pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
 105        pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
 106}
 107
 108static u64 precise_store_data(u64 status)
 109{
 110        union intel_x86_pebs_dse dse;
 111        u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
 112
 113        dse.val = status;
 114
 115        /*
 116         * bit 4: TLB access
 117         * 1 = stored missed 2nd level TLB
 118         *
 119         * so it either hit the walker or the OS
 120         * otherwise hit 2nd level TLB
 121         */
 122        if (dse.st_stlb_miss)
 123                val |= P(TLB, MISS);
 124        else
 125                val |= P(TLB, HIT);
 126
 127        /*
 128         * bit 0: hit L1 data cache
 129         * if not set, then all we know is that
 130         * it missed L1D
 131         */
 132        if (dse.st_l1d_hit)
 133                val |= P(LVL, HIT);
 134        else
 135                val |= P(LVL, MISS);
 136
 137        /*
 138         * bit 5: Locked prefix
 139         */
 140        if (dse.st_locked)
 141                val |= P(LOCK, LOCKED);
 142
 143        return val;
 144}
 145
 146static u64 precise_datala_hsw(struct perf_event *event, u64 status)
 147{
 148        union perf_mem_data_src dse;
 149
 150        dse.val = PERF_MEM_NA;
 151
 152        if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
 153                dse.mem_op = PERF_MEM_OP_STORE;
 154        else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
 155                dse.mem_op = PERF_MEM_OP_LOAD;
 156
 157        /*
 158         * L1 info only valid for following events:
 159         *
 160         * MEM_UOPS_RETIRED.STLB_MISS_STORES
 161         * MEM_UOPS_RETIRED.LOCK_STORES
 162         * MEM_UOPS_RETIRED.SPLIT_STORES
 163         * MEM_UOPS_RETIRED.ALL_STORES
 164         */
 165        if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
 166                if (status & 1)
 167                        dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
 168                else
 169                        dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
 170        }
 171        return dse.val;
 172}
 173
 174static u64 load_latency_data(u64 status)
 175{
 176        union intel_x86_pebs_dse dse;
 177        u64 val;
 178
 179        dse.val = status;
 180
 181        /*
 182         * use the mapping table for bit 0-3
 183         */
 184        val = pebs_data_source[dse.ld_dse];
 185
 186        /*
 187         * Nehalem models do not support TLB, Lock infos
 188         */
 189        if (x86_pmu.pebs_no_tlb) {
 190                val |= P(TLB, NA) | P(LOCK, NA);
 191                return val;
 192        }
 193        /*
 194         * bit 4: TLB access
 195         * 0 = did not miss 2nd level TLB
 196         * 1 = missed 2nd level TLB
 197         */
 198        if (dse.ld_stlb_miss)
 199                val |= P(TLB, MISS) | P(TLB, L2);
 200        else
 201                val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
 202
 203        /*
 204         * bit 5: locked prefix
 205         */
 206        if (dse.ld_locked)
 207                val |= P(LOCK, LOCKED);
 208
 209        /*
 210         * Ice Lake and earlier models do not support block infos.
 211         */
 212        if (!x86_pmu.pebs_block) {
 213                val |= P(BLK, NA);
 214                return val;
 215        }
 216        /*
 217         * bit 6: load was blocked since its data could not be forwarded
 218         *        from a preceding store
 219         */
 220        if (dse.ld_data_blk)
 221                val |= P(BLK, DATA);
 222
 223        /*
 224         * bit 7: load was blocked due to potential address conflict with
 225         *        a preceding store
 226         */
 227        if (dse.ld_addr_blk)
 228                val |= P(BLK, ADDR);
 229
 230        if (!dse.ld_data_blk && !dse.ld_addr_blk)
 231                val |= P(BLK, NA);
 232
 233        return val;
 234}
 235
 236static u64 store_latency_data(u64 status)
 237{
 238        union intel_x86_pebs_dse dse;
 239        u64 val;
 240
 241        dse.val = status;
 242
 243        /*
 244         * use the mapping table for bit 0-3
 245         */
 246        val = pebs_data_source[dse.st_lat_dse];
 247
 248        /*
 249         * bit 4: TLB access
 250         * 0 = did not miss 2nd level TLB
 251         * 1 = missed 2nd level TLB
 252         */
 253        if (dse.st_lat_stlb_miss)
 254                val |= P(TLB, MISS) | P(TLB, L2);
 255        else
 256                val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
 257
 258        /*
 259         * bit 5: locked prefix
 260         */
 261        if (dse.st_lat_locked)
 262                val |= P(LOCK, LOCKED);
 263
 264        val |= P(BLK, NA);
 265
 266        return val;
 267}
 268
 269struct pebs_record_core {
 270        u64 flags, ip;
 271        u64 ax, bx, cx, dx;
 272        u64 si, di, bp, sp;
 273        u64 r8,  r9,  r10, r11;
 274        u64 r12, r13, r14, r15;
 275};
 276
 277struct pebs_record_nhm {
 278        u64 flags, ip;
 279        u64 ax, bx, cx, dx;
 280        u64 si, di, bp, sp;
 281        u64 r8,  r9,  r10, r11;
 282        u64 r12, r13, r14, r15;
 283        u64 status, dla, dse, lat;
 284};
 285
 286/*
 287 * Same as pebs_record_nhm, with two additional fields.
 288 */
 289struct pebs_record_hsw {
 290        u64 flags, ip;
 291        u64 ax, bx, cx, dx;
 292        u64 si, di, bp, sp;
 293        u64 r8,  r9,  r10, r11;
 294        u64 r12, r13, r14, r15;
 295        u64 status, dla, dse, lat;
 296        u64 real_ip, tsx_tuning;
 297};
 298
 299union hsw_tsx_tuning {
 300        struct {
 301                u32 cycles_last_block     : 32,
 302                    hle_abort             : 1,
 303                    rtm_abort             : 1,
 304                    instruction_abort     : 1,
 305                    non_instruction_abort : 1,
 306                    retry                 : 1,
 307                    data_conflict         : 1,
 308                    capacity_writes       : 1,
 309                    capacity_reads        : 1;
 310        };
 311        u64         value;
 312};
 313
 314#define PEBS_HSW_TSX_FLAGS      0xff00000000ULL
 315
 316/* Same as HSW, plus TSC */
 317
 318struct pebs_record_skl {
 319        u64 flags, ip;
 320        u64 ax, bx, cx, dx;
 321        u64 si, di, bp, sp;
 322        u64 r8,  r9,  r10, r11;
 323        u64 r12, r13, r14, r15;
 324        u64 status, dla, dse, lat;
 325        u64 real_ip, tsx_tuning;
 326        u64 tsc;
 327};
 328
 329void init_debug_store_on_cpu(int cpu)
 330{
 331        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 332
 333        if (!ds)
 334                return;
 335
 336        wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
 337                     (u32)((u64)(unsigned long)ds),
 338                     (u32)((u64)(unsigned long)ds >> 32));
 339}
 340
 341void fini_debug_store_on_cpu(int cpu)
 342{
 343        if (!per_cpu(cpu_hw_events, cpu).ds)
 344                return;
 345
 346        wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
 347}
 348
 349static DEFINE_PER_CPU(void *, insn_buffer);
 350
 351static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
 352{
 353        unsigned long start = (unsigned long)cea;
 354        phys_addr_t pa;
 355        size_t msz = 0;
 356
 357        pa = virt_to_phys(addr);
 358
 359        preempt_disable();
 360        for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
 361                cea_set_pte(cea, pa, prot);
 362
 363        /*
 364         * This is a cross-CPU update of the cpu_entry_area, we must shoot down
 365         * all TLB entries for it.
 366         */
 367        flush_tlb_kernel_range(start, start + size);
 368        preempt_enable();
 369}
 370
 371static void ds_clear_cea(void *cea, size_t size)
 372{
 373        unsigned long start = (unsigned long)cea;
 374        size_t msz = 0;
 375
 376        preempt_disable();
 377        for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
 378                cea_set_pte(cea, 0, PAGE_NONE);
 379
 380        flush_tlb_kernel_range(start, start + size);
 381        preempt_enable();
 382}
 383
 384static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
 385{
 386        unsigned int order = get_order(size);
 387        int node = cpu_to_node(cpu);
 388        struct page *page;
 389
 390        page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
 391        return page ? page_address(page) : NULL;
 392}
 393
 394static void dsfree_pages(const void *buffer, size_t size)
 395{
 396        if (buffer)
 397                free_pages((unsigned long)buffer, get_order(size));
 398}
 399
 400static int alloc_pebs_buffer(int cpu)
 401{
 402        struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
 403        struct debug_store *ds = hwev->ds;
 404        size_t bsiz = x86_pmu.pebs_buffer_size;
 405        int max, node = cpu_to_node(cpu);
 406        void *buffer, *ibuffer, *cea;
 407
 408        if (!x86_pmu.pebs)
 409                return 0;
 410
 411        buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
 412        if (unlikely(!buffer))
 413                return -ENOMEM;
 414
 415        /*
 416         * HSW+ already provides us the eventing ip; no need to allocate this
 417         * buffer then.
 418         */
 419        if (x86_pmu.intel_cap.pebs_format < 2) {
 420                ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
 421                if (!ibuffer) {
 422                        dsfree_pages(buffer, bsiz);
 423                        return -ENOMEM;
 424                }
 425                per_cpu(insn_buffer, cpu) = ibuffer;
 426        }
 427        hwev->ds_pebs_vaddr = buffer;
 428        /* Update the cpu entry area mapping */
 429        cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
 430        ds->pebs_buffer_base = (unsigned long) cea;
 431        ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
 432        ds->pebs_index = ds->pebs_buffer_base;
 433        max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
 434        ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
 435        return 0;
 436}
 437
 438static void release_pebs_buffer(int cpu)
 439{
 440        struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
 441        void *cea;
 442
 443        if (!x86_pmu.pebs)
 444                return;
 445
 446        kfree(per_cpu(insn_buffer, cpu));
 447        per_cpu(insn_buffer, cpu) = NULL;
 448
 449        /* Clear the fixmap */
 450        cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
 451        ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
 452        dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
 453        hwev->ds_pebs_vaddr = NULL;
 454}
 455
 456static int alloc_bts_buffer(int cpu)
 457{
 458        struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
 459        struct debug_store *ds = hwev->ds;
 460        void *buffer, *cea;
 461        int max;
 462
 463        if (!x86_pmu.bts)
 464                return 0;
 465
 466        buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
 467        if (unlikely(!buffer)) {
 468                WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
 469                return -ENOMEM;
 470        }
 471        hwev->ds_bts_vaddr = buffer;
 472        /* Update the fixmap */
 473        cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
 474        ds->bts_buffer_base = (unsigned long) cea;
 475        ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
 476        ds->bts_index = ds->bts_buffer_base;
 477        max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
 478        ds->bts_absolute_maximum = ds->bts_buffer_base +
 479                                        max * BTS_RECORD_SIZE;
 480        ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
 481                                        (max / 16) * BTS_RECORD_SIZE;
 482        return 0;
 483}
 484
 485static void release_bts_buffer(int cpu)
 486{
 487        struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
 488        void *cea;
 489
 490        if (!x86_pmu.bts)
 491                return;
 492
 493        /* Clear the fixmap */
 494        cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
 495        ds_clear_cea(cea, BTS_BUFFER_SIZE);
 496        dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
 497        hwev->ds_bts_vaddr = NULL;
 498}
 499
 500static int alloc_ds_buffer(int cpu)
 501{
 502        struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
 503
 504        memset(ds, 0, sizeof(*ds));
 505        per_cpu(cpu_hw_events, cpu).ds = ds;
 506        return 0;
 507}
 508
 509static void release_ds_buffer(int cpu)
 510{
 511        per_cpu(cpu_hw_events, cpu).ds = NULL;
 512}
 513
 514void release_ds_buffers(void)
 515{
 516        int cpu;
 517
 518        if (!x86_pmu.bts && !x86_pmu.pebs)
 519                return;
 520
 521        for_each_possible_cpu(cpu)
 522                release_ds_buffer(cpu);
 523
 524        for_each_possible_cpu(cpu) {
 525                /*
 526                 * Again, ignore errors from offline CPUs, they will no longer
 527                 * observe cpu_hw_events.ds and not program the DS_AREA when
 528                 * they come up.
 529                 */
 530                fini_debug_store_on_cpu(cpu);
 531        }
 532
 533        for_each_possible_cpu(cpu) {
 534                release_pebs_buffer(cpu);
 535                release_bts_buffer(cpu);
 536        }
 537}
 538
 539void reserve_ds_buffers(void)
 540{
 541        int bts_err = 0, pebs_err = 0;
 542        int cpu;
 543
 544        x86_pmu.bts_active = 0;
 545        x86_pmu.pebs_active = 0;
 546
 547        if (!x86_pmu.bts && !x86_pmu.pebs)
 548                return;
 549
 550        if (!x86_pmu.bts)
 551                bts_err = 1;
 552
 553        if (!x86_pmu.pebs)
 554                pebs_err = 1;
 555
 556        for_each_possible_cpu(cpu) {
 557                if (alloc_ds_buffer(cpu)) {
 558                        bts_err = 1;
 559                        pebs_err = 1;
 560                }
 561
 562                if (!bts_err && alloc_bts_buffer(cpu))
 563                        bts_err = 1;
 564
 565                if (!pebs_err && alloc_pebs_buffer(cpu))
 566                        pebs_err = 1;
 567
 568                if (bts_err && pebs_err)
 569                        break;
 570        }
 571
 572        if (bts_err) {
 573                for_each_possible_cpu(cpu)
 574                        release_bts_buffer(cpu);
 575        }
 576
 577        if (pebs_err) {
 578                for_each_possible_cpu(cpu)
 579                        release_pebs_buffer(cpu);
 580        }
 581
 582        if (bts_err && pebs_err) {
 583                for_each_possible_cpu(cpu)
 584                        release_ds_buffer(cpu);
 585        } else {
 586                if (x86_pmu.bts && !bts_err)
 587                        x86_pmu.bts_active = 1;
 588
 589                if (x86_pmu.pebs && !pebs_err)
 590                        x86_pmu.pebs_active = 1;
 591
 592                for_each_possible_cpu(cpu) {
 593                        /*
 594                         * Ignores wrmsr_on_cpu() errors for offline CPUs they
 595                         * will get this call through intel_pmu_cpu_starting().
 596                         */
 597                        init_debug_store_on_cpu(cpu);
 598                }
 599        }
 600}
 601
 602/*
 603 * BTS
 604 */
 605
 606struct event_constraint bts_constraint =
 607        EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
 608
 609void intel_pmu_enable_bts(u64 config)
 610{
 611        unsigned long debugctlmsr;
 612
 613        debugctlmsr = get_debugctlmsr();
 614
 615        debugctlmsr |= DEBUGCTLMSR_TR;
 616        debugctlmsr |= DEBUGCTLMSR_BTS;
 617        if (config & ARCH_PERFMON_EVENTSEL_INT)
 618                debugctlmsr |= DEBUGCTLMSR_BTINT;
 619
 620        if (!(config & ARCH_PERFMON_EVENTSEL_OS))
 621                debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
 622
 623        if (!(config & ARCH_PERFMON_EVENTSEL_USR))
 624                debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
 625
 626        update_debugctlmsr(debugctlmsr);
 627}
 628
 629void intel_pmu_disable_bts(void)
 630{
 631        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 632        unsigned long debugctlmsr;
 633
 634        if (!cpuc->ds)
 635                return;
 636
 637        debugctlmsr = get_debugctlmsr();
 638
 639        debugctlmsr &=
 640                ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
 641                  DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
 642
 643        update_debugctlmsr(debugctlmsr);
 644}
 645
 646int intel_pmu_drain_bts_buffer(void)
 647{
 648        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 649        struct debug_store *ds = cpuc->ds;
 650        struct bts_record {
 651                u64     from;
 652                u64     to;
 653                u64     flags;
 654        };
 655        struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
 656        struct bts_record *at, *base, *top;
 657        struct perf_output_handle handle;
 658        struct perf_event_header header;
 659        struct perf_sample_data data;
 660        unsigned long skip = 0;
 661        struct pt_regs regs;
 662
 663        if (!event)
 664                return 0;
 665
 666        if (!x86_pmu.bts_active)
 667                return 0;
 668
 669        base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
 670        top  = (struct bts_record *)(unsigned long)ds->bts_index;
 671
 672        if (top <= base)
 673                return 0;
 674
 675        memset(&regs, 0, sizeof(regs));
 676
 677        ds->bts_index = ds->bts_buffer_base;
 678
 679        perf_sample_data_init(&data, 0, event->hw.last_period);
 680
 681        /*
 682         * BTS leaks kernel addresses in branches across the cpl boundary,
 683         * such as traps or system calls, so unless the user is asking for
 684         * kernel tracing (and right now it's not possible), we'd need to
 685         * filter them out. But first we need to count how many of those we
 686         * have in the current batch. This is an extra O(n) pass, however,
 687         * it's much faster than the other one especially considering that
 688         * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
 689         * alloc_bts_buffer()).
 690         */
 691        for (at = base; at < top; at++) {
 692                /*
 693                 * Note that right now *this* BTS code only works if
 694                 * attr::exclude_kernel is set, but let's keep this extra
 695                 * check here in case that changes.
 696                 */
 697                if (event->attr.exclude_kernel &&
 698                    (kernel_ip(at->from) || kernel_ip(at->to)))
 699                        skip++;
 700        }
 701
 702        /*
 703         * Prepare a generic sample, i.e. fill in the invariant fields.
 704         * We will overwrite the from and to address before we output
 705         * the sample.
 706         */
 707        rcu_read_lock();
 708        perf_prepare_sample(&header, &data, event, &regs);
 709
 710        if (perf_output_begin(&handle, &data, event,
 711                              header.size * (top - base - skip)))
 712                goto unlock;
 713
 714        for (at = base; at < top; at++) {
 715                /* Filter out any records that contain kernel addresses. */
 716                if (event->attr.exclude_kernel &&
 717                    (kernel_ip(at->from) || kernel_ip(at->to)))
 718                        continue;
 719
 720                data.ip         = at->from;
 721                data.addr       = at->to;
 722
 723                perf_output_sample(&handle, &header, &data, event);
 724        }
 725
 726        perf_output_end(&handle);
 727
 728        /* There's new data available. */
 729        event->hw.interrupts++;
 730        event->pending_kill = POLL_IN;
 731unlock:
 732        rcu_read_unlock();
 733        return 1;
 734}
 735
 736static inline void intel_pmu_drain_pebs_buffer(void)
 737{
 738        struct perf_sample_data data;
 739
 740        x86_pmu.drain_pebs(NULL, &data);
 741}
 742
 743/*
 744 * PEBS
 745 */
 746struct event_constraint intel_core2_pebs_event_constraints[] = {
 747        INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
 748        INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
 749        INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
 750        INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
 751        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
 752        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 753        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
 754        EVENT_CONSTRAINT_END
 755};
 756
 757struct event_constraint intel_atom_pebs_event_constraints[] = {
 758        INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
 759        INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
 760        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
 761        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 762        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
 763        /* Allow all events as PEBS with no flags */
 764        INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
 765        EVENT_CONSTRAINT_END
 766};
 767
 768struct event_constraint intel_slm_pebs_event_constraints[] = {
 769        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 770        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1),
 771        /* Allow all events as PEBS with no flags */
 772        INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
 773        EVENT_CONSTRAINT_END
 774};
 775
 776struct event_constraint intel_glm_pebs_event_constraints[] = {
 777        /* Allow all events as PEBS with no flags */
 778        INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
 779        EVENT_CONSTRAINT_END
 780};
 781
 782struct event_constraint intel_grt_pebs_event_constraints[] = {
 783        /* Allow all events as PEBS with no flags */
 784        INTEL_PLD_CONSTRAINT(0x5d0, 0xf),
 785        INTEL_PSD_CONSTRAINT(0x6d0, 0xf),
 786        EVENT_CONSTRAINT_END
 787};
 788
 789struct event_constraint intel_nehalem_pebs_event_constraints[] = {
 790        INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
 791        INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
 792        INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
 793        INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
 794        INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
 795        INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
 796        INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
 797        INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
 798        INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
 799        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
 800        INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
 801        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 802        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
 803        EVENT_CONSTRAINT_END
 804};
 805
 806struct event_constraint intel_westmere_pebs_event_constraints[] = {
 807        INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
 808        INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
 809        INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
 810        INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
 811        INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
 812        INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
 813        INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
 814        INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
 815        INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
 816        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
 817        INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
 818        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 819        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
 820        EVENT_CONSTRAINT_END
 821};
 822
 823struct event_constraint intel_snb_pebs_event_constraints[] = {
 824        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
 825        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
 826        INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
 827        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 828        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
 829        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
 830        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
 831        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
 832        INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
 833        /* Allow all events as PEBS with no flags */
 834        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 835        EVENT_CONSTRAINT_END
 836};
 837
 838struct event_constraint intel_ivb_pebs_event_constraints[] = {
 839        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
 840        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
 841        INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
 842        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 843        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
 844        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
 845        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
 846        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
 847        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
 848        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
 849        INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
 850        /* Allow all events as PEBS with no flags */
 851        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 852        EVENT_CONSTRAINT_END
 853};
 854
 855struct event_constraint intel_hsw_pebs_event_constraints[] = {
 856        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
 857        INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
 858        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 859        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
 860        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
 861        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
 862        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
 863        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
 864        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
 865        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
 866        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
 867        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
 868        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
 869        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
 870        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
 871        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
 872        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
 873        /* Allow all events as PEBS with no flags */
 874        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 875        EVENT_CONSTRAINT_END
 876};
 877
 878struct event_constraint intel_bdw_pebs_event_constraints[] = {
 879        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
 880        INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
 881        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 882        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
 883        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
 884        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
 885        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
 886        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
 887        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
 888        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
 889        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
 890        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
 891        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
 892        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
 893        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
 894        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
 895        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
 896        /* Allow all events as PEBS with no flags */
 897        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 898        EVENT_CONSTRAINT_END
 899};
 900
 901
 902struct event_constraint intel_skl_pebs_event_constraints[] = {
 903        INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),      /* INST_RETIRED.PREC_DIST */
 904        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
 905        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
 906        /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
 907        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
 908        INTEL_PLD_CONSTRAINT(0x1cd, 0xf),                     /* MEM_TRANS_RETIRED.* */
 909        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
 910        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
 911        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
 912        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
 913        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
 914        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
 915        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
 916        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
 917        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_RETIRED.* */
 918        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_L3_HIT_RETIRED.* */
 919        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_L3_MISS_RETIRED.* */
 920        /* Allow all events as PEBS with no flags */
 921        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 922        EVENT_CONSTRAINT_END
 923};
 924
 925struct event_constraint intel_icl_pebs_event_constraints[] = {
 926        INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),   /* INST_RETIRED.PREC_DIST */
 927        INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),  /* SLOTS */
 928
 929        INTEL_PLD_CONSTRAINT(0x1cd, 0xff),                      /* MEM_TRANS_RETIRED.LOAD_LATENCY */
 930        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),    /* MEM_INST_RETIRED.LOAD */
 931        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),    /* MEM_INST_RETIRED.STORE */
 932
 933        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
 934
 935        INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),                /* MEM_INST_RETIRED.* */
 936
 937        /*
 938         * Everything else is handled by PMU_FL_PEBS_ALL, because we
 939         * need the full constraints from the main table.
 940         */
 941
 942        EVENT_CONSTRAINT_END
 943};
 944
 945struct event_constraint intel_spr_pebs_event_constraints[] = {
 946        INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),
 947        INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
 948
 949        INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
 950        INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
 951        INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
 952        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),
 953        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),
 954
 955        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
 956
 957        INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
 958
 959        /*
 960         * Everything else is handled by PMU_FL_PEBS_ALL, because we
 961         * need the full constraints from the main table.
 962         */
 963
 964        EVENT_CONSTRAINT_END
 965};
 966
 967struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 968{
 969        struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints);
 970        struct event_constraint *c;
 971
 972        if (!event->attr.precise_ip)
 973                return NULL;
 974
 975        if (pebs_constraints) {
 976                for_each_event_constraint(c, pebs_constraints) {
 977                        if (constraint_match(c, event->hw.config)) {
 978                                event->hw.flags |= c->flags;
 979                                return c;
 980                        }
 981                }
 982        }
 983
 984        /*
 985         * Extended PEBS support
 986         * Makes the PEBS code search the normal constraints.
 987         */
 988        if (x86_pmu.flags & PMU_FL_PEBS_ALL)
 989                return NULL;
 990
 991        return &emptyconstraint;
 992}
 993
 994/*
 995 * We need the sched_task callback even for per-cpu events when we use
 996 * the large interrupt threshold, such that we can provide PID and TID
 997 * to PEBS samples.
 998 */
 999static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
1000{
1001        if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
1002                return false;
1003
1004        return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
1005}
1006
1007void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
1008{
1009        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1010
1011        if (!sched_in && pebs_needs_sched_cb(cpuc))
1012                intel_pmu_drain_pebs_buffer();
1013}
1014
1015static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
1016{
1017        struct debug_store *ds = cpuc->ds;
1018        int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
1019        int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
1020        u64 threshold;
1021        int reserved;
1022
1023        if (cpuc->n_pebs_via_pt)
1024                return;
1025
1026        if (x86_pmu.flags & PMU_FL_PEBS_ALL)
1027                reserved = max_pebs_events + num_counters_fixed;
1028        else
1029                reserved = max_pebs_events;
1030
1031        if (cpuc->n_pebs == cpuc->n_large_pebs) {
1032                threshold = ds->pebs_absolute_maximum -
1033                        reserved * cpuc->pebs_record_size;
1034        } else {
1035                threshold = ds->pebs_buffer_base + cpuc->pebs_record_size;
1036        }
1037
1038        ds->pebs_interrupt_threshold = threshold;
1039}
1040
1041static void adaptive_pebs_record_size_update(void)
1042{
1043        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1044        u64 pebs_data_cfg = cpuc->pebs_data_cfg;
1045        int sz = sizeof(struct pebs_basic);
1046
1047        if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
1048                sz += sizeof(struct pebs_meminfo);
1049        if (pebs_data_cfg & PEBS_DATACFG_GP)
1050                sz += sizeof(struct pebs_gprs);
1051        if (pebs_data_cfg & PEBS_DATACFG_XMMS)
1052                sz += sizeof(struct pebs_xmm);
1053        if (pebs_data_cfg & PEBS_DATACFG_LBRS)
1054                sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1055
1056        cpuc->pebs_record_size = sz;
1057}
1058
1059#define PERF_PEBS_MEMINFO_TYPE  (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC |   \
1060                                PERF_SAMPLE_PHYS_ADDR | \
1061                                PERF_SAMPLE_WEIGHT_TYPE |                    \
1062                                PERF_SAMPLE_TRANSACTION)
1063
1064static u64 pebs_update_adaptive_cfg(struct perf_event *event)
1065{
1066        struct perf_event_attr *attr = &event->attr;
1067        u64 sample_type = attr->sample_type;
1068        u64 pebs_data_cfg = 0;
1069        bool gprs, tsx_weight;
1070
1071        if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
1072            attr->precise_ip > 1)
1073                return pebs_data_cfg;
1074
1075        if (sample_type & PERF_PEBS_MEMINFO_TYPE)
1076                pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
1077
1078        /*
1079         * We need GPRs when:
1080         * + user requested them
1081         * + precise_ip < 2 for the non event IP
1082         * + For RTM TSX weight we need GPRs for the abort code.
1083         */
1084        gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
1085               (attr->sample_regs_intr & PEBS_GP_REGS);
1086
1087        tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
1088                     ((attr->config & INTEL_ARCH_EVENT_MASK) ==
1089                      x86_pmu.rtm_abort_event);
1090
1091        if (gprs || (attr->precise_ip < 2) || tsx_weight)
1092                pebs_data_cfg |= PEBS_DATACFG_GP;
1093
1094        if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
1095            (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK))
1096                pebs_data_cfg |= PEBS_DATACFG_XMMS;
1097
1098        if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
1099                /*
1100                 * For now always log all LBRs. Could configure this
1101                 * later.
1102                 */
1103                pebs_data_cfg |= PEBS_DATACFG_LBRS |
1104                        ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
1105        }
1106
1107        return pebs_data_cfg;
1108}
1109
1110static void
1111pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
1112                  struct perf_event *event, bool add)
1113{
1114        struct pmu *pmu = event->ctx->pmu;
1115        /*
1116         * Make sure we get updated with the first PEBS
1117         * event. It will trigger also during removal, but
1118         * that does not hurt:
1119         */
1120        bool update = cpuc->n_pebs == 1;
1121
1122        if (needed_cb != pebs_needs_sched_cb(cpuc)) {
1123                if (!needed_cb)
1124                        perf_sched_cb_inc(pmu);
1125                else
1126                        perf_sched_cb_dec(pmu);
1127
1128                update = true;
1129        }
1130
1131        /*
1132         * The PEBS record doesn't shrink on pmu::del(). Doing so would require
1133         * iterating all remaining PEBS events to reconstruct the config.
1134         */
1135        if (x86_pmu.intel_cap.pebs_baseline && add) {
1136                u64 pebs_data_cfg;
1137
1138                /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
1139                if (cpuc->n_pebs == 1) {
1140                        cpuc->pebs_data_cfg = 0;
1141                        cpuc->pebs_record_size = sizeof(struct pebs_basic);
1142                }
1143
1144                pebs_data_cfg = pebs_update_adaptive_cfg(event);
1145
1146                /* Update pebs_record_size if new event requires more data. */
1147                if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
1148                        cpuc->pebs_data_cfg |= pebs_data_cfg;
1149                        adaptive_pebs_record_size_update();
1150                        update = true;
1151                }
1152        }
1153
1154        if (update)
1155                pebs_update_threshold(cpuc);
1156}
1157
1158void intel_pmu_pebs_add(struct perf_event *event)
1159{
1160        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1161        struct hw_perf_event *hwc = &event->hw;
1162        bool needed_cb = pebs_needs_sched_cb(cpuc);
1163
1164        cpuc->n_pebs++;
1165        if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1166                cpuc->n_large_pebs++;
1167        if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1168                cpuc->n_pebs_via_pt++;
1169
1170        pebs_update_state(needed_cb, cpuc, event, true);
1171}
1172
1173static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
1174{
1175        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1176
1177        if (!is_pebs_pt(event))
1178                return;
1179
1180        if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
1181                cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
1182}
1183
1184static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
1185{
1186        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1187        struct hw_perf_event *hwc = &event->hw;
1188        struct debug_store *ds = cpuc->ds;
1189        u64 value = ds->pebs_event_reset[hwc->idx];
1190        u32 base = MSR_RELOAD_PMC0;
1191        unsigned int idx = hwc->idx;
1192
1193        if (!is_pebs_pt(event))
1194                return;
1195
1196        if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
1197                cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;
1198
1199        cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
1200
1201        if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
1202                base = MSR_RELOAD_FIXED_CTR0;
1203                idx = hwc->idx - INTEL_PMC_IDX_FIXED;
1204                value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
1205        }
1206        wrmsrl(base + idx, value);
1207}
1208
1209void intel_pmu_pebs_enable(struct perf_event *event)
1210{
1211        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1212        struct hw_perf_event *hwc = &event->hw;
1213        struct debug_store *ds = cpuc->ds;
1214        unsigned int idx = hwc->idx;
1215
1216        hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
1217
1218        cpuc->pebs_enabled |= 1ULL << hwc->idx;
1219
1220        if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
1221                cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
1222        else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1223                cpuc->pebs_enabled |= 1ULL << 63;
1224
1225        if (x86_pmu.intel_cap.pebs_baseline) {
1226                hwc->config |= ICL_EVENTSEL_ADAPTIVE;
1227                if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
1228                        wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
1229                        cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
1230                }
1231        }
1232
1233        if (idx >= INTEL_PMC_IDX_FIXED)
1234                idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
1235
1236        /*
1237         * Use auto-reload if possible to save a MSR write in the PMI.
1238         * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
1239         */
1240        if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
1241                ds->pebs_event_reset[idx] =
1242                        (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
1243        } else {
1244                ds->pebs_event_reset[idx] = 0;
1245        }
1246
1247        intel_pmu_pebs_via_pt_enable(event);
1248}
1249
1250void intel_pmu_pebs_del(struct perf_event *event)
1251{
1252        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1253        struct hw_perf_event *hwc = &event->hw;
1254        bool needed_cb = pebs_needs_sched_cb(cpuc);
1255
1256        cpuc->n_pebs--;
1257        if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1258                cpuc->n_large_pebs--;
1259        if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1260                cpuc->n_pebs_via_pt--;
1261
1262        pebs_update_state(needed_cb, cpuc, event, false);
1263}
1264
1265void intel_pmu_pebs_disable(struct perf_event *event)
1266{
1267        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1268        struct hw_perf_event *hwc = &event->hw;
1269
1270        if (cpuc->n_pebs == cpuc->n_large_pebs &&
1271            cpuc->n_pebs != cpuc->n_pebs_via_pt)
1272                intel_pmu_drain_pebs_buffer();
1273
1274        cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
1275
1276        if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
1277            (x86_pmu.version < 5))
1278                cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
1279        else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1280                cpuc->pebs_enabled &= ~(1ULL << 63);
1281
1282        intel_pmu_pebs_via_pt_disable(event);
1283
1284        if (cpuc->enabled)
1285                wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1286
1287        hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
1288}
1289
1290void intel_pmu_pebs_enable_all(void)
1291{
1292        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1293
1294        if (cpuc->pebs_enabled)
1295                wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1296}
1297
1298void intel_pmu_pebs_disable_all(void)
1299{
1300        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1301
1302        if (cpuc->pebs_enabled)
1303                wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
1304}
1305
1306static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
1307{
1308        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1309        unsigned long from = cpuc->lbr_entries[0].from;
1310        unsigned long old_to, to = cpuc->lbr_entries[0].to;
1311        unsigned long ip = regs->ip;
1312        int is_64bit = 0;
1313        void *kaddr;
1314        int size;
1315
1316        /*
1317         * We don't need to fixup if the PEBS assist is fault like
1318         */
1319        if (!x86_pmu.intel_cap.pebs_trap)
1320                return 1;
1321
1322        /*
1323         * No LBR entry, no basic block, no rewinding
1324         */
1325        if (!cpuc->lbr_stack.nr || !from || !to)
1326                return 0;
1327
1328        /*
1329         * Basic blocks should never cross user/kernel boundaries
1330         */
1331        if (kernel_ip(ip) != kernel_ip(to))
1332                return 0;
1333
1334        /*
1335         * unsigned math, either ip is before the start (impossible) or
1336         * the basic block is larger than 1 page (sanity)
1337         */
1338        if ((ip - to) > PEBS_FIXUP_SIZE)
1339                return 0;
1340
1341        /*
1342         * We sampled a branch insn, rewind using the LBR stack
1343         */
1344        if (ip == to) {
1345                set_linear_ip(regs, from);
1346                return 1;
1347        }
1348
1349        size = ip - to;
1350        if (!kernel_ip(ip)) {
1351                int bytes;
1352                u8 *buf = this_cpu_read(insn_buffer);
1353
1354                /* 'size' must fit our buffer, see above */
1355                bytes = copy_from_user_nmi(buf, (void __user *)to, size);
1356                if (bytes != 0)
1357                        return 0;
1358
1359                kaddr = buf;
1360        } else {
1361                kaddr = (void *)to;
1362        }
1363
1364        do {
1365                struct insn insn;
1366
1367                old_to = to;
1368
1369#ifdef CONFIG_X86_64
1370                is_64bit = kernel_ip(to) || any_64bit_mode(regs);
1371#endif
1372                insn_init(&insn, kaddr, size, is_64bit);
1373                insn_get_length(&insn);
1374                /*
1375                 * Make sure there was not a problem decoding the
1376                 * instruction and getting the length.  This is
1377                 * doubly important because we have an infinite
1378                 * loop if insn.length=0.
1379                 */
1380                if (!insn.length)
1381                        break;
1382
1383                to += insn.length;
1384                kaddr += insn.length;
1385                size -= insn.length;
1386        } while (to < ip);
1387
1388        if (to == ip) {
1389                set_linear_ip(regs, old_to);
1390                return 1;
1391        }
1392
1393        /*
1394         * Even though we decoded the basic block, the instruction stream
1395         * never matched the given IP, either the TO or the IP got corrupted.
1396         */
1397        return 0;
1398}
1399
1400static inline u64 intel_get_tsx_weight(u64 tsx_tuning)
1401{
1402        if (tsx_tuning) {
1403                union hsw_tsx_tuning tsx = { .value = tsx_tuning };
1404                return tsx.cycles_last_block;
1405        }
1406        return 0;
1407}
1408
1409static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
1410{
1411        u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
1412
1413        /* For RTM XABORTs also log the abort code from AX */
1414        if ((txn & PERF_TXN_TRANSACTION) && (ax & 1))
1415                txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
1416        return txn;
1417}
1418
1419static inline u64 get_pebs_status(void *n)
1420{
1421        if (x86_pmu.intel_cap.pebs_format < 4)
1422                return ((struct pebs_record_nhm *)n)->status;
1423        return ((struct pebs_basic *)n)->applicable_counters;
1424}
1425
1426#define PERF_X86_EVENT_PEBS_HSW_PREC \
1427                (PERF_X86_EVENT_PEBS_ST_HSW | \
1428                 PERF_X86_EVENT_PEBS_LD_HSW | \
1429                 PERF_X86_EVENT_PEBS_NA_HSW)
1430
1431static u64 get_data_src(struct perf_event *event, u64 aux)
1432{
1433        u64 val = PERF_MEM_NA;
1434        int fl = event->hw.flags;
1435        bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
1436
1437        if (fl & PERF_X86_EVENT_PEBS_LDLAT)
1438                val = load_latency_data(aux);
1439        else if (fl & PERF_X86_EVENT_PEBS_STLAT)
1440                val = store_latency_data(aux);
1441        else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
1442                val = precise_datala_hsw(event, aux);
1443        else if (fst)
1444                val = precise_store_data(aux);
1445        return val;
1446}
1447
1448static void setup_pebs_fixed_sample_data(struct perf_event *event,
1449                                   struct pt_regs *iregs, void *__pebs,
1450                                   struct perf_sample_data *data,
1451                                   struct pt_regs *regs)
1452{
1453        /*
1454         * We cast to the biggest pebs_record but are careful not to
1455         * unconditionally access the 'extra' entries.
1456         */
1457        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1458        struct pebs_record_skl *pebs = __pebs;
1459        u64 sample_type;
1460        int fll;
1461
1462        if (pebs == NULL)
1463                return;
1464
1465        sample_type = event->attr.sample_type;
1466        fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
1467
1468        perf_sample_data_init(data, 0, event->hw.last_period);
1469
1470        data->period = event->hw.last_period;
1471
1472        /*
1473         * Use latency for weight (only avail with PEBS-LL)
1474         */
1475        if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
1476                data->weight.full = pebs->lat;
1477
1478        /*
1479         * data.data_src encodes the data source
1480         */
1481        if (sample_type & PERF_SAMPLE_DATA_SRC)
1482                data->data_src.val = get_data_src(event, pebs->dse);
1483
1484        /*
1485         * We must however always use iregs for the unwinder to stay sane; the
1486         * record BP,SP,IP can point into thin air when the record is from a
1487         * previous PMI context or an (I)RET happened between the record and
1488         * PMI.
1489         */
1490        if (sample_type & PERF_SAMPLE_CALLCHAIN)
1491                data->callchain = perf_callchain(event, iregs);
1492
1493        /*
1494         * We use the interrupt regs as a base because the PEBS record does not
1495         * contain a full regs set, specifically it seems to lack segment
1496         * descriptors, which get used by things like user_mode().
1497         *
1498         * In the simple case fix up only the IP for PERF_SAMPLE_IP.
1499         */
1500        *regs = *iregs;
1501
1502        /*
1503         * Initialize regs_>flags from PEBS,
1504         * Clear exact bit (which uses x86 EFLAGS Reserved bit 3),
1505         * i.e., do not rely on it being zero:
1506         */
1507        regs->flags = pebs->flags & ~PERF_EFLAGS_EXACT;
1508
1509        if (sample_type & PERF_SAMPLE_REGS_INTR) {
1510                regs->ax = pebs->ax;
1511                regs->bx = pebs->bx;
1512                regs->cx = pebs->cx;
1513                regs->dx = pebs->dx;
1514                regs->si = pebs->si;
1515                regs->di = pebs->di;
1516
1517                regs->bp = pebs->bp;
1518                regs->sp = pebs->sp;
1519
1520#ifndef CONFIG_X86_32
1521                regs->r8 = pebs->r8;
1522                regs->r9 = pebs->r9;
1523                regs->r10 = pebs->r10;
1524                regs->r11 = pebs->r11;
1525                regs->r12 = pebs->r12;
1526                regs->r13 = pebs->r13;
1527                regs->r14 = pebs->r14;
1528                regs->r15 = pebs->r15;
1529#endif
1530        }
1531
1532        if (event->attr.precise_ip > 1) {
1533                /*
1534                 * Haswell and later processors have an 'eventing IP'
1535                 * (real IP) which fixes the off-by-1 skid in hardware.
1536                 * Use it when precise_ip >= 2 :
1537                 */
1538                if (x86_pmu.intel_cap.pebs_format >= 2) {
1539                        set_linear_ip(regs, pebs->real_ip);
1540                        regs->flags |= PERF_EFLAGS_EXACT;
1541                } else {
1542                        /* Otherwise, use PEBS off-by-1 IP: */
1543                        set_linear_ip(regs, pebs->ip);
1544
1545                        /*
1546                         * With precise_ip >= 2, try to fix up the off-by-1 IP
1547                         * using the LBR. If successful, the fixup function
1548                         * corrects regs->ip and calls set_linear_ip() on regs:
1549                         */
1550                        if (intel_pmu_pebs_fixup_ip(regs))
1551                                regs->flags |= PERF_EFLAGS_EXACT;
1552                }
1553        } else {
1554                /*
1555                 * When precise_ip == 1, return the PEBS off-by-1 IP,
1556                 * no fixup attempted:
1557                 */
1558                set_linear_ip(regs, pebs->ip);
1559        }
1560
1561
1562        if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) &&
1563            x86_pmu.intel_cap.pebs_format >= 1)
1564                data->addr = pebs->dla;
1565
1566        if (x86_pmu.intel_cap.pebs_format >= 2) {
1567                /* Only set the TSX weight when no memory weight. */
1568                if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
1569                        data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
1570
1571                if (sample_type & PERF_SAMPLE_TRANSACTION)
1572                        data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
1573                                                              pebs->ax);
1574        }
1575
1576        /*
1577         * v3 supplies an accurate time stamp, so we use that
1578         * for the time stamp.
1579         *
1580         * We can only do this for the default trace clock.
1581         */
1582        if (x86_pmu.intel_cap.pebs_format >= 3 &&
1583                event->attr.use_clockid == 0)
1584                data->time = native_sched_clock_from_tsc(pebs->tsc);
1585
1586        if (has_branch_stack(event))
1587                data->br_stack = &cpuc->lbr_stack;
1588}
1589
1590static void adaptive_pebs_save_regs(struct pt_regs *regs,
1591                                    struct pebs_gprs *gprs)
1592{
1593        regs->ax = gprs->ax;
1594        regs->bx = gprs->bx;
1595        regs->cx = gprs->cx;
1596        regs->dx = gprs->dx;
1597        regs->si = gprs->si;
1598        regs->di = gprs->di;
1599        regs->bp = gprs->bp;
1600        regs->sp = gprs->sp;
1601#ifndef CONFIG_X86_32
1602        regs->r8 = gprs->r8;
1603        regs->r9 = gprs->r9;
1604        regs->r10 = gprs->r10;
1605        regs->r11 = gprs->r11;
1606        regs->r12 = gprs->r12;
1607        regs->r13 = gprs->r13;
1608        regs->r14 = gprs->r14;
1609        regs->r15 = gprs->r15;
1610#endif
1611}
1612
1613#define PEBS_LATENCY_MASK                       0xffff
1614#define PEBS_CACHE_LATENCY_OFFSET               32
1615
1616/*
1617 * With adaptive PEBS the layout depends on what fields are configured.
1618 */
1619
1620static void setup_pebs_adaptive_sample_data(struct perf_event *event,
1621                                            struct pt_regs *iregs, void *__pebs,
1622                                            struct perf_sample_data *data,
1623                                            struct pt_regs *regs)
1624{
1625        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1626        struct pebs_basic *basic = __pebs;
1627        void *next_record = basic + 1;
1628        u64 sample_type;
1629        u64 format_size;
1630        struct pebs_meminfo *meminfo = NULL;
1631        struct pebs_gprs *gprs = NULL;
1632        struct x86_perf_regs *perf_regs;
1633
1634        if (basic == NULL)
1635                return;
1636
1637        perf_regs = container_of(regs, struct x86_perf_regs, regs);
1638        perf_regs->xmm_regs = NULL;
1639
1640        sample_type = event->attr.sample_type;
1641        format_size = basic->format_size;
1642        perf_sample_data_init(data, 0, event->hw.last_period);
1643        data->period = event->hw.last_period;
1644
1645        if (event->attr.use_clockid == 0)
1646                data->time = native_sched_clock_from_tsc(basic->tsc);
1647
1648        /*
1649         * We must however always use iregs for the unwinder to stay sane; the
1650         * record BP,SP,IP can point into thin air when the record is from a
1651         * previous PMI context or an (I)RET happened between the record and
1652         * PMI.
1653         */
1654        if (sample_type & PERF_SAMPLE_CALLCHAIN)
1655                data->callchain = perf_callchain(event, iregs);
1656
1657        *regs = *iregs;
1658        /* The ip in basic is EventingIP */
1659        set_linear_ip(regs, basic->ip);
1660        regs->flags = PERF_EFLAGS_EXACT;
1661
1662        /*
1663         * The record for MEMINFO is in front of GP
1664         * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
1665         * Save the pointer here but process later.
1666         */
1667        if (format_size & PEBS_DATACFG_MEMINFO) {
1668                meminfo = next_record;
1669                next_record = meminfo + 1;
1670        }
1671
1672        if (format_size & PEBS_DATACFG_GP) {
1673                gprs = next_record;
1674                next_record = gprs + 1;
1675
1676                if (event->attr.precise_ip < 2) {
1677                        set_linear_ip(regs, gprs->ip);
1678                        regs->flags &= ~PERF_EFLAGS_EXACT;
1679                }
1680
1681                if (sample_type & PERF_SAMPLE_REGS_INTR)
1682                        adaptive_pebs_save_regs(regs, gprs);
1683        }
1684
1685        if (format_size & PEBS_DATACFG_MEMINFO) {
1686                if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
1687                        u64 weight = meminfo->latency;
1688
1689                        if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) {
1690                                data->weight.var2_w = weight & PEBS_LATENCY_MASK;
1691                                weight >>= PEBS_CACHE_LATENCY_OFFSET;
1692                        }
1693
1694                        /*
1695                         * Although meminfo::latency is defined as a u64,
1696                         * only the lower 32 bits include the valid data
1697                         * in practice on Ice Lake and earlier platforms.
1698                         */
1699                        if (sample_type & PERF_SAMPLE_WEIGHT) {
1700                                data->weight.full = weight ?:
1701                                        intel_get_tsx_weight(meminfo->tsx_tuning);
1702                        } else {
1703                                data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
1704                                        intel_get_tsx_weight(meminfo->tsx_tuning);
1705                        }
1706                }
1707
1708                if (sample_type & PERF_SAMPLE_DATA_SRC)
1709                        data->data_src.val = get_data_src(event, meminfo->aux);
1710
1711                if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
1712                        data->addr = meminfo->address;
1713
1714                if (sample_type & PERF_SAMPLE_TRANSACTION)
1715                        data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
1716                                                          gprs ? gprs->ax : 0);
1717        }
1718
1719        if (format_size & PEBS_DATACFG_XMMS) {
1720                struct pebs_xmm *xmm = next_record;
1721
1722                next_record = xmm + 1;
1723                perf_regs->xmm_regs = xmm->xmm;
1724        }
1725
1726        if (format_size & PEBS_DATACFG_LBRS) {
1727                struct lbr_entry *lbr = next_record;
1728                int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
1729                                        & 0xff) + 1;
1730                next_record = next_record + num_lbr * sizeof(struct lbr_entry);
1731
1732                if (has_branch_stack(event)) {
1733                        intel_pmu_store_pebs_lbrs(lbr);
1734                        data->br_stack = &cpuc->lbr_stack;
1735                }
1736        }
1737
1738        WARN_ONCE(next_record != __pebs + (format_size >> 48),
1739                        "PEBS record size %llu, expected %llu, config %llx\n",
1740                        format_size >> 48,
1741                        (u64)(next_record - __pebs),
1742                        basic->format_size);
1743}
1744
1745static inline void *
1746get_next_pebs_record_by_bit(void *base, void *top, int bit)
1747{
1748        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1749        void *at;
1750        u64 pebs_status;
1751
1752        /*
1753         * fmt0 does not have a status bitfield (does not use
1754         * perf_record_nhm format)
1755         */
1756        if (x86_pmu.intel_cap.pebs_format < 1)
1757                return base;
1758
1759        if (base == NULL)
1760                return NULL;
1761
1762        for (at = base; at < top; at += cpuc->pebs_record_size) {
1763                unsigned long status = get_pebs_status(at);
1764
1765                if (test_bit(bit, (unsigned long *)&status)) {
1766                        /* PEBS v3 has accurate status bits */
1767                        if (x86_pmu.intel_cap.pebs_format >= 3)
1768                                return at;
1769
1770                        if (status == (1 << bit))
1771                                return at;
1772
1773                        /* clear non-PEBS bit and re-check */
1774                        pebs_status = status & cpuc->pebs_enabled;
1775                        pebs_status &= PEBS_COUNTER_MASK;
1776                        if (pebs_status == (1 << bit))
1777                                return at;
1778                }
1779        }
1780        return NULL;
1781}
1782
1783void intel_pmu_auto_reload_read(struct perf_event *event)
1784{
1785        WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
1786
1787        perf_pmu_disable(event->pmu);
1788        intel_pmu_drain_pebs_buffer();
1789        perf_pmu_enable(event->pmu);
1790}
1791
1792/*
1793 * Special variant of intel_pmu_save_and_restart() for auto-reload.
1794 */
1795static int
1796intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
1797{
1798        struct hw_perf_event *hwc = &event->hw;
1799        int shift = 64 - x86_pmu.cntval_bits;
1800        u64 period = hwc->sample_period;
1801        u64 prev_raw_count, new_raw_count;
1802        s64 new, old;
1803
1804        WARN_ON(!period);
1805
1806        /*
1807         * drain_pebs() only happens when the PMU is disabled.
1808         */
1809        WARN_ON(this_cpu_read(cpu_hw_events.enabled));
1810
1811        prev_raw_count = local64_read(&hwc->prev_count);
1812        rdpmcl(hwc->event_base_rdpmc, new_raw_count);
1813        local64_set(&hwc->prev_count, new_raw_count);
1814
1815        /*
1816         * Since the counter increments a negative counter value and
1817         * overflows on the sign switch, giving the interval:
1818         *
1819         *   [-period, 0]
1820         *
1821         * the difference between two consequtive reads is:
1822         *
1823         *   A) value2 - value1;
1824         *      when no overflows have happened in between,
1825         *
1826         *   B) (0 - value1) + (value2 - (-period));
1827         *      when one overflow happened in between,
1828         *
1829         *   C) (0 - value1) + (n - 1) * (period) + (value2 - (-period));
1830         *      when @n overflows happened in between.
1831         *
1832         * Here A) is the obvious difference, B) is the extension to the
1833         * discrete interval, where the first term is to the top of the
1834         * interval and the second term is from the bottom of the next
1835         * interval and C) the extension to multiple intervals, where the
1836         * middle term is the whole intervals covered.
1837         *
1838         * An equivalent of C, by reduction, is:
1839         *
1840         *   value2 - value1 + n * period
1841         */
1842        new = ((s64)(new_raw_count << shift) >> shift);
1843        old = ((s64)(prev_raw_count << shift) >> shift);
1844        local64_add(new - old + count * period, &event->count);
1845
1846        local64_set(&hwc->period_left, -new);
1847
1848        perf_event_update_userpage(event);
1849
1850        return 0;
1851}
1852
1853static __always_inline void
1854__intel_pmu_pebs_event(struct perf_event *event,
1855                       struct pt_regs *iregs,
1856                       struct perf_sample_data *data,
1857                       void *base, void *top,
1858                       int bit, int count,
1859                       void (*setup_sample)(struct perf_event *,
1860                                            struct pt_regs *,
1861                                            void *,
1862                                            struct perf_sample_data *,
1863                                            struct pt_regs *))
1864{
1865        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1866        struct hw_perf_event *hwc = &event->hw;
1867        struct x86_perf_regs perf_regs;
1868        struct pt_regs *regs = &perf_regs.regs;
1869        void *at = get_next_pebs_record_by_bit(base, top, bit);
1870        static struct pt_regs dummy_iregs;
1871
1872        if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
1873                /*
1874                 * Now, auto-reload is only enabled in fixed period mode.
1875                 * The reload value is always hwc->sample_period.
1876                 * May need to change it, if auto-reload is enabled in
1877                 * freq mode later.
1878                 */
1879                intel_pmu_save_and_restart_reload(event, count);
1880        } else if (!intel_pmu_save_and_restart(event))
1881                return;
1882
1883        if (!iregs)
1884                iregs = &dummy_iregs;
1885
1886        while (count > 1) {
1887                setup_sample(event, iregs, at, data, regs);
1888                perf_event_output(event, data, regs);
1889                at += cpuc->pebs_record_size;
1890                at = get_next_pebs_record_by_bit(at, top, bit);
1891                count--;
1892        }
1893
1894        setup_sample(event, iregs, at, data, regs);
1895        if (iregs == &dummy_iregs) {
1896                /*
1897                 * The PEBS records may be drained in the non-overflow context,
1898                 * e.g., large PEBS + context switch. Perf should treat the
1899                 * last record the same as other PEBS records, and doesn't
1900                 * invoke the generic overflow handler.
1901                 */
1902                perf_event_output(event, data, regs);
1903        } else {
1904                /*
1905                 * All but the last records are processed.
1906                 * The last one is left to be able to call the overflow handler.
1907                 */
1908                if (perf_event_overflow(event, data, regs))
1909                        x86_pmu_stop(event, 0);
1910        }
1911}
1912
1913static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
1914{
1915        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1916        struct debug_store *ds = cpuc->ds;
1917        struct perf_event *event = cpuc->events[0]; /* PMC0 only */
1918        struct pebs_record_core *at, *top;
1919        int n;
1920
1921        if (!x86_pmu.pebs_active)
1922                return;
1923
1924        at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
1925        top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
1926
1927        /*
1928         * Whatever else happens, drain the thing
1929         */
1930        ds->pebs_index = ds->pebs_buffer_base;
1931
1932        if (!test_bit(0, cpuc->active_mask))
1933                return;
1934
1935        WARN_ON_ONCE(!event);
1936
1937        if (!event->attr.precise_ip)
1938                return;
1939
1940        n = top - at;
1941        if (n <= 0) {
1942                if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
1943                        intel_pmu_save_and_restart_reload(event, 0);
1944                return;
1945        }
1946
1947        __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n,
1948                               setup_pebs_fixed_sample_data);
1949}
1950
1951static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
1952{
1953        struct perf_event *event;
1954        int bit;
1955
1956        /*
1957         * The drain_pebs() could be called twice in a short period
1958         * for auto-reload event in pmu::read(). There are no
1959         * overflows have happened in between.
1960         * It needs to call intel_pmu_save_and_restart_reload() to
1961         * update the event->count for this case.
1962         */
1963        for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) {
1964                event = cpuc->events[bit];
1965                if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
1966                        intel_pmu_save_and_restart_reload(event, 0);
1967        }
1968}
1969
1970static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data)
1971{
1972        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1973        struct debug_store *ds = cpuc->ds;
1974        struct perf_event *event;
1975        void *base, *at, *top;
1976        short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
1977        short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
1978        int bit, i, size;
1979        u64 mask;
1980
1981        if (!x86_pmu.pebs_active)
1982                return;
1983
1984        base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
1985        top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
1986
1987        ds->pebs_index = ds->pebs_buffer_base;
1988
1989        mask = (1ULL << x86_pmu.max_pebs_events) - 1;
1990        size = x86_pmu.max_pebs_events;
1991        if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
1992                mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
1993                size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
1994        }
1995
1996        if (unlikely(base >= top)) {
1997                intel_pmu_pebs_event_update_no_drain(cpuc, size);
1998                return;
1999        }
2000
2001        for (at = base; at < top; at += x86_pmu.pebs_record_size) {
2002                struct pebs_record_nhm *p = at;
2003                u64 pebs_status;
2004
2005                pebs_status = p->status & cpuc->pebs_enabled;
2006                pebs_status &= mask;
2007
2008                /* PEBS v3 has more accurate status bits */
2009                if (x86_pmu.intel_cap.pebs_format >= 3) {
2010                        for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
2011                                counts[bit]++;
2012
2013                        continue;
2014                }
2015
2016                /*
2017                 * On some CPUs the PEBS status can be zero when PEBS is
2018                 * racing with clearing of GLOBAL_STATUS.
2019                 *
2020                 * Normally we would drop that record, but in the
2021                 * case when there is only a single active PEBS event
2022                 * we can assume it's for that event.
2023                 */
2024                if (!pebs_status && cpuc->pebs_enabled &&
2025                        !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
2026                        pebs_status = p->status = cpuc->pebs_enabled;
2027
2028                bit = find_first_bit((unsigned long *)&pebs_status,
2029                                        x86_pmu.max_pebs_events);
2030                if (bit >= x86_pmu.max_pebs_events)
2031                        continue;
2032
2033                /*
2034                 * The PEBS hardware does not deal well with the situation
2035                 * when events happen near to each other and multiple bits
2036                 * are set. But it should happen rarely.
2037                 *
2038                 * If these events include one PEBS and multiple non-PEBS
2039                 * events, it doesn't impact PEBS record. The record will
2040                 * be handled normally. (slow path)
2041                 *
2042                 * If these events include two or more PEBS events, the
2043                 * records for the events can be collapsed into a single
2044                 * one, and it's not possible to reconstruct all events
2045                 * that caused the PEBS record. It's called collision.
2046                 * If collision happened, the record will be dropped.
2047                 */
2048                if (pebs_status != (1ULL << bit)) {
2049                        for_each_set_bit(i, (unsigned long *)&pebs_status, size)
2050                                error[i]++;
2051                        continue;
2052                }
2053
2054                counts[bit]++;
2055        }
2056
2057        for_each_set_bit(bit, (unsigned long *)&mask, size) {
2058                if ((counts[bit] == 0) && (error[bit] == 0))
2059                        continue;
2060
2061                event = cpuc->events[bit];
2062                if (WARN_ON_ONCE(!event))
2063                        continue;
2064
2065                if (WARN_ON_ONCE(!event->attr.precise_ip))
2066                        continue;
2067
2068                /* log dropped samples number */
2069                if (error[bit]) {
2070                        perf_log_lost_samples(event, error[bit]);
2071
2072                        if (iregs && perf_event_account_interrupt(event))
2073                                x86_pmu_stop(event, 0);
2074                }
2075
2076                if (counts[bit]) {
2077                        __intel_pmu_pebs_event(event, iregs, data, base,
2078                                               top, bit, counts[bit],
2079                                               setup_pebs_fixed_sample_data);
2080                }
2081        }
2082}
2083
2084static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
2085{
2086        short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
2087        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2088        int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
2089        int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
2090        struct debug_store *ds = cpuc->ds;
2091        struct perf_event *event;
2092        void *base, *at, *top;
2093        int bit, size;
2094        u64 mask;
2095
2096        if (!x86_pmu.pebs_active)
2097                return;
2098
2099        base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
2100        top = (struct pebs_basic *)(unsigned long)ds->pebs_index;
2101
2102        ds->pebs_index = ds->pebs_buffer_base;
2103
2104        mask = ((1ULL << max_pebs_events) - 1) |
2105               (((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
2106        size = INTEL_PMC_IDX_FIXED + num_counters_fixed;
2107
2108        if (unlikely(base >= top)) {
2109                intel_pmu_pebs_event_update_no_drain(cpuc, size);
2110                return;
2111        }
2112
2113        for (at = base; at < top; at += cpuc->pebs_record_size) {
2114                u64 pebs_status;
2115
2116                pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
2117                pebs_status &= mask;
2118
2119                for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
2120                        counts[bit]++;
2121        }
2122
2123        for_each_set_bit(bit, (unsigned long *)&mask, size) {
2124                if (counts[bit] == 0)
2125                        continue;
2126
2127                event = cpuc->events[bit];
2128                if (WARN_ON_ONCE(!event))
2129                        continue;
2130
2131                if (WARN_ON_ONCE(!event->attr.precise_ip))
2132                        continue;
2133
2134                __intel_pmu_pebs_event(event, iregs, data, base,
2135                                       top, bit, counts[bit],
2136                                       setup_pebs_adaptive_sample_data);
2137        }
2138}
2139
2140/*
2141 * BTS, PEBS probe and setup
2142 */
2143
2144void __init intel_ds_init(void)
2145{
2146        /*
2147         * No support for 32bit formats
2148         */
2149        if (!boot_cpu_has(X86_FEATURE_DTES64))
2150                return;
2151
2152        x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
2153        x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
2154        x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
2155        if (x86_pmu.version <= 4)
2156                x86_pmu.pebs_no_isolation = 1;
2157
2158        if (x86_pmu.pebs) {
2159                char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
2160                char *pebs_qual = "";
2161                int format = x86_pmu.intel_cap.pebs_format;
2162
2163                if (format < 4)
2164                        x86_pmu.intel_cap.pebs_baseline = 0;
2165
2166                switch (format) {
2167                case 0:
2168                        pr_cont("PEBS fmt0%c, ", pebs_type);
2169                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
2170                        /*
2171                         * Using >PAGE_SIZE buffers makes the WRMSR to
2172                         * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
2173                         * mysteriously hang on Core2.
2174                         *
2175                         * As a workaround, we don't do this.
2176                         */
2177                        x86_pmu.pebs_buffer_size = PAGE_SIZE;
2178                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
2179                        break;
2180
2181                case 1:
2182                        pr_cont("PEBS fmt1%c, ", pebs_type);
2183                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
2184                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2185                        break;
2186
2187                case 2:
2188                        pr_cont("PEBS fmt2%c, ", pebs_type);
2189                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
2190                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2191                        break;
2192
2193                case 3:
2194                        pr_cont("PEBS fmt3%c, ", pebs_type);
2195                        x86_pmu.pebs_record_size =
2196                                                sizeof(struct pebs_record_skl);
2197                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2198                        x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
2199                        break;
2200
2201                case 4:
2202                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
2203                        x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
2204                        if (x86_pmu.intel_cap.pebs_baseline) {
2205                                x86_pmu.large_pebs_flags |=
2206                                        PERF_SAMPLE_BRANCH_STACK |
2207                                        PERF_SAMPLE_TIME;
2208                                x86_pmu.flags |= PMU_FL_PEBS_ALL;
2209                                pebs_qual = "-baseline";
2210                                x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
2211                        } else {
2212                                /* Only basic record supported */
2213                                x86_pmu.large_pebs_flags &=
2214                                        ~(PERF_SAMPLE_ADDR |
2215                                          PERF_SAMPLE_TIME |
2216                                          PERF_SAMPLE_DATA_SRC |
2217                                          PERF_SAMPLE_TRANSACTION |
2218                                          PERF_SAMPLE_REGS_USER |
2219                                          PERF_SAMPLE_REGS_INTR);
2220                        }
2221                        pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
2222
2223                        if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) {
2224                                pr_cont("PEBS-via-PT, ");
2225                                x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
2226                        }
2227
2228                        break;
2229
2230                default:
2231                        pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
2232                        x86_pmu.pebs = 0;
2233                }
2234        }
2235}
2236
2237void perf_restore_debug_store(void)
2238{
2239        struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
2240
2241        if (!x86_pmu.bts && !x86_pmu.pebs)
2242                return;
2243
2244        wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
2245}
2246